From 82ad759143ed77673db0d93d53c1cde7b99917ee Mon Sep 17 00:00:00 2001
From: Philipp Puschmann <philipp.puschmann@emlix.com>
Date: Wed, 27 Feb 2019 16:17:33 +0100
Subject: [PATCH 0001/1861] ASoC: tlv320aic3x: fix reset gpio reference
 counting

This patch fixes a bug that prevents freeing the reset gpio on unloading
the module.

aic3x_i2c_probe is called when loading the module and it calls list_add
with a probably uninitialized list entry aic3x->list (next = prev = NULL)).
So even if list_del is called it does nothing and in the end the gpio_reset
is not freed. Then a repeated module probing fails silently because
gpio_request fails.

When moving INIT_LIST_HEAD to aic3x_i2c_probe we also have to move
list_del to aic3x_i2c_remove because aic3x_remove may be called
multiple times without aic3x_i2c_remove being called which leads to
a NULL pointer dereference.

Signed-off-by: Philipp Puschmann <philipp.puschmann@emlix.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/tlv320aic3x.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index 6aa0edf8c5ef9..cea3ebecdb12b 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -1609,7 +1609,6 @@ static int aic3x_probe(struct snd_soc_component *component)
 	struct aic3x_priv *aic3x = snd_soc_component_get_drvdata(component);
 	int ret, i;
 
-	INIT_LIST_HEAD(&aic3x->list);
 	aic3x->component = component;
 
 	for (i = 0; i < ARRAY_SIZE(aic3x->supplies); i++) {
@@ -1692,7 +1691,6 @@ static void aic3x_remove(struct snd_soc_component *component)
 	struct aic3x_priv *aic3x = snd_soc_component_get_drvdata(component);
 	int i;
 
-	list_del(&aic3x->list);
 	for (i = 0; i < ARRAY_SIZE(aic3x->supplies); i++)
 		regulator_unregister_notifier(aic3x->supplies[i].consumer,
 					      &aic3x->disable_nb[i].nb);
@@ -1890,6 +1888,7 @@ static int aic3x_i2c_probe(struct i2c_client *i2c,
 	if (ret != 0)
 		goto err_gpio;
 
+	INIT_LIST_HEAD(&aic3x->list);
 	list_add(&aic3x->list, &reset_list);
 
 	return 0;
@@ -1906,6 +1905,8 @@ static int aic3x_i2c_remove(struct i2c_client *client)
 {
 	struct aic3x_priv *aic3x = i2c_get_clientdata(client);
 
+	list_del(&aic3x->list);
+
 	if (gpio_is_valid(aic3x->gpio_reset) &&
 	    !aic3x_is_shared_reset(aic3x)) {
 		gpio_set_value(aic3x->gpio_reset, 0);
-- 
GitLab


From f060f46f09bb920d1e0d436d654996033b856e26 Mon Sep 17 00:00:00 2001
From: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Date: Wed, 27 Feb 2019 09:30:44 +0800
Subject: [PATCH 0002/1861] ASoC: mediatek: btcvsd add loopback

add direct loopback path from rx to tx

Signed-off-by: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/mediatek/common/mtk-btcvsd.c | 69 +++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/sound/soc/mediatek/common/mtk-btcvsd.c b/sound/soc/mediatek/common/mtk-btcvsd.c
index 1b8bcdaf02d11..9a163d7064d17 100644
--- a/sound/soc/mediatek/common/mtk-btcvsd.c
+++ b/sound/soc/mediatek/common/mtk-btcvsd.c
@@ -49,6 +49,7 @@ enum bt_sco_state {
 	BT_SCO_STATE_IDLE,
 	BT_SCO_STATE_RUNNING,
 	BT_SCO_STATE_ENDING,
+	BT_SCO_STATE_LOOPBACK,
 };
 
 enum bt_sco_direct {
@@ -486,7 +487,8 @@ static irqreturn_t mtk_btcvsd_snd_irq_handler(int irq_id, void *dev)
 	if (bt->rx->state != BT_SCO_STATE_RUNNING &&
 	    bt->rx->state != BT_SCO_STATE_ENDING &&
 	    bt->tx->state != BT_SCO_STATE_RUNNING &&
-	    bt->tx->state != BT_SCO_STATE_ENDING) {
+	    bt->tx->state != BT_SCO_STATE_ENDING &&
+	    bt->tx->state != BT_SCO_STATE_LOOPBACK) {
 		dev_warn(bt->dev, "%s(), in idle state: rx->state: %d, tx->state: %d\n",
 			 __func__, bt->rx->state, bt->tx->state);
 		goto irq_handler_exit;
@@ -512,6 +514,42 @@ static irqreturn_t mtk_btcvsd_snd_irq_handler(int irq_id, void *dev)
 	buf_cnt_tx = btsco_packet_info[packet_type][2];
 	buf_cnt_rx = btsco_packet_info[packet_type][3];
 
+	if (bt->tx->state == BT_SCO_STATE_LOOPBACK) {
+		u8 *src, *dst;
+		unsigned long connsys_addr_rx, ap_addr_rx;
+		unsigned long connsys_addr_tx, ap_addr_tx;
+
+		connsys_addr_rx = *bt->bt_reg_pkt_r;
+		ap_addr_rx = (unsigned long)bt->bt_sram_bank2_base +
+			     (connsys_addr_rx & 0xFFFF);
+
+		connsys_addr_tx = *bt->bt_reg_pkt_w;
+		ap_addr_tx = (unsigned long)bt->bt_sram_bank2_base +
+			     (connsys_addr_tx & 0xFFFF);
+
+		if (connsys_addr_tx == 0xdeadfeed ||
+		    connsys_addr_rx == 0xdeadfeed) {
+			/* bt return 0xdeadfeed if read reg during bt sleep */
+			dev_warn(bt->dev, "%s(), connsys_addr_tx == 0xdeadfeed\n",
+				 __func__);
+			goto irq_handler_exit;
+		}
+
+		src = (u8 *)ap_addr_rx;
+		dst = (u8 *)ap_addr_tx;
+
+		mtk_btcvsd_snd_data_transfer(BT_SCO_DIRECT_BT2ARM, src,
+					     bt->tx->temp_packet_buf,
+					     packet_length,
+					     packet_num);
+		mtk_btcvsd_snd_data_transfer(BT_SCO_DIRECT_ARM2BT,
+					     bt->tx->temp_packet_buf, dst,
+					     packet_length,
+					     packet_num);
+		bt->rx->rw_cnt++;
+		bt->tx->rw_cnt++;
+	}
+
 	if (bt->rx->state == BT_SCO_STATE_RUNNING ||
 	    bt->rx->state == BT_SCO_STATE_ENDING) {
 		if (bt->rx->xrun) {
@@ -1067,6 +1105,33 @@ static int btcvsd_band_set(struct snd_kcontrol *kcontrol,
 	return 0;
 }
 
+static int btcvsd_loopback_get(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct mtk_btcvsd_snd *bt = snd_soc_component_get_drvdata(cmpnt);
+	bool lpbk_en = bt->tx->state == BT_SCO_STATE_LOOPBACK;
+
+	ucontrol->value.integer.value[0] = lpbk_en;
+	return 0;
+}
+
+static int btcvsd_loopback_set(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct mtk_btcvsd_snd *bt = snd_soc_component_get_drvdata(cmpnt);
+
+	if (ucontrol->value.integer.value[0]) {
+		mtk_btcvsd_snd_set_state(bt, bt->tx, BT_SCO_STATE_LOOPBACK);
+		mtk_btcvsd_snd_set_state(bt, bt->rx, BT_SCO_STATE_LOOPBACK);
+	} else {
+		mtk_btcvsd_snd_set_state(bt, bt->tx, BT_SCO_STATE_RUNNING);
+		mtk_btcvsd_snd_set_state(bt, bt->rx, BT_SCO_STATE_RUNNING);
+	}
+	return 0;
+}
+
 static int btcvsd_tx_mute_get(struct snd_kcontrol *kcontrol,
 			      struct snd_ctl_elem_value *ucontrol)
 {
@@ -1202,6 +1267,8 @@ static int btcvsd_tx_timestamp_get(struct snd_kcontrol *kcontrol,
 static const struct snd_kcontrol_new mtk_btcvsd_snd_controls[] = {
 	SOC_ENUM_EXT("BTCVSD Band", btcvsd_enum[0],
 		     btcvsd_band_get, btcvsd_band_set),
+	SOC_SINGLE_BOOL_EXT("BTCVSD Loopback Switch", 0,
+			    btcvsd_loopback_get, btcvsd_loopback_set),
 	SOC_SINGLE_BOOL_EXT("BTCVSD Tx Mute Switch", 0,
 			    btcvsd_tx_mute_get, btcvsd_tx_mute_set),
 	SOC_SINGLE_BOOL_EXT("BTCVSD Tx Irq Received Switch", 0,
-- 
GitLab


From b805d78d300bcf2c83d6df7da0c818b0fee41427 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 28 Feb 2019 15:18:59 +0800
Subject: [PATCH 0003/1861] xfrm: policy: Fix out-of-bound array accesses in
 __xfrm_policy_unlink

UBSAN report this:

UBSAN: Undefined behaviour in net/xfrm/xfrm_policy.c:1289:24
index 6 is out of range for type 'unsigned int [6]'
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.4.162-514.55.6.9.x86_64+ #13
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
 0000000000000000 1466cf39b41b23c9 ffff8801f6b07a58 ffffffff81cb35f4
 0000000041b58ab3 ffffffff83230f9c ffffffff81cb34e0 ffff8801f6b07a80
 ffff8801f6b07a20 1466cf39b41b23c9 ffffffff851706e0 ffff8801f6b07ae8
Call Trace:
 <IRQ>  [<ffffffff81cb35f4>] __dump_stack lib/dump_stack.c:15 [inline]
 <IRQ>  [<ffffffff81cb35f4>] dump_stack+0x114/0x1a0 lib/dump_stack.c:51
 [<ffffffff81d94225>] ubsan_epilogue+0x12/0x8f lib/ubsan.c:164
 [<ffffffff81d954db>] __ubsan_handle_out_of_bounds+0x16e/0x1b2 lib/ubsan.c:382
 [<ffffffff82a25acd>] __xfrm_policy_unlink+0x3dd/0x5b0 net/xfrm/xfrm_policy.c:1289
 [<ffffffff82a2e572>] xfrm_policy_delete+0x52/0xb0 net/xfrm/xfrm_policy.c:1309
 [<ffffffff82a3319b>] xfrm_policy_timer+0x30b/0x590 net/xfrm/xfrm_policy.c:243
 [<ffffffff813d3927>] call_timer_fn+0x237/0x990 kernel/time/timer.c:1144
 [<ffffffff813d8e7e>] __run_timers kernel/time/timer.c:1218 [inline]
 [<ffffffff813d8e7e>] run_timer_softirq+0x6ce/0xb80 kernel/time/timer.c:1401
 [<ffffffff8120d6f9>] __do_softirq+0x299/0xe10 kernel/softirq.c:273
 [<ffffffff8120e676>] invoke_softirq kernel/softirq.c:350 [inline]
 [<ffffffff8120e676>] irq_exit+0x216/0x2c0 kernel/softirq.c:391
 [<ffffffff82c5edab>] exiting_irq arch/x86/include/asm/apic.h:652 [inline]
 [<ffffffff82c5edab>] smp_apic_timer_interrupt+0x8b/0xc0 arch/x86/kernel/apic/apic.c:926
 [<ffffffff82c5c985>] apic_timer_interrupt+0xa5/0xb0 arch/x86/entry/entry_64.S:735
 <EOI>  [<ffffffff81188096>] ? native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:52
 [<ffffffff810834d7>] arch_safe_halt arch/x86/include/asm/paravirt.h:111 [inline]
 [<ffffffff810834d7>] default_idle+0x27/0x430 arch/x86/kernel/process.c:446
 [<ffffffff81085f05>] arch_cpu_idle+0x15/0x20 arch/x86/kernel/process.c:437
 [<ffffffff8132abc3>] default_idle_call+0x53/0x90 kernel/sched/idle.c:92
 [<ffffffff8132b32d>] cpuidle_idle_call kernel/sched/idle.c:156 [inline]
 [<ffffffff8132b32d>] cpu_idle_loop kernel/sched/idle.c:251 [inline]
 [<ffffffff8132b32d>] cpu_startup_entry+0x60d/0x9a0 kernel/sched/idle.c:299
 [<ffffffff8113e119>] start_secondary+0x3c9/0x560 arch/x86/kernel/smpboot.c:245

The issue is triggered as this:

xfrm_add_policy
    -->verify_newpolicy_info  //check the index provided by user with XFRM_POLICY_MAX
			      //In my case, the index is 0x6E6BB6, so it pass the check.
    -->xfrm_policy_construct  //copy the user's policy and set xfrm_policy_timer
    -->xfrm_policy_insert
	--> __xfrm_policy_link //use the orgin dir, in my case is 2
	--> xfrm_gen_index   //generate policy index, there is 0x6E6BB6

then xfrm_policy_timer be fired

xfrm_policy_timer
   --> xfrm_policy_id2dir  //get dir from (policy index & 7), in my case is 6
   --> xfrm_policy_delete
      --> __xfrm_policy_unlink //access policy_count[dir], trigger out of range access

Add xfrm_policy_id2dir check in verify_newpolicy_info, make sure the computed dir is
valid, to fix the issue.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: e682adf021be ("xfrm: Try to honor policy index if it's supplied by user")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a131f9ff979e1..8d4d52fd457b2 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1424,7 +1424,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 	ret = verify_policy_dir(p->dir);
 	if (ret)
 		return ret;
-	if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir))
+	if (p->index && (xfrm_policy_id2dir(p->index) != p->dir))
 		return -EINVAL;
 
 	return 0;
-- 
GitLab


From 2e95f984aae4cf0608d0ba2189c756f2bd50b44a Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Feb 2019 15:30:34 +0000
Subject: [PATCH 0004/1861] ASoC: hdmi-codec: fix S/PDIF DAI

When using the S/PDIF DAI, there is no requirement to call
snd_soc_dai_set_fmt() as there is no DAI format definition that defines
S/PDIF.  In any case, S/PDIF does not have separate clocks, this is
embedded into the data stream.

Consequently, when attempting to use TDA998x in S/PDIF mode, the attempt
to configure TDA998x via the hw_params callback fails as the
hdmi_codec_daifmt is left initialised to zero.

Since the S/PDIF DAI will only be used by S/PDIF, prepare the
hdmi_codec_daifmt structure for this format.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/hdmi-codec.c | 118 +++++++++++++++++-----------------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index e5b6769b97977..d5f73c8372817 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -529,73 +529,71 @@ static int hdmi_codec_set_fmt(struct snd_soc_dai *dai,
 {
 	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
 	struct hdmi_codec_daifmt cf = { 0 };
-	int ret = 0;
 
 	dev_dbg(dai->dev, "%s()\n", __func__);
 
-	if (dai->id == DAI_ID_SPDIF) {
-		cf.fmt = HDMI_SPDIF;
-	} else {
-		switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
-		case SND_SOC_DAIFMT_CBM_CFM:
-			cf.bit_clk_master = 1;
-			cf.frame_clk_master = 1;
-			break;
-		case SND_SOC_DAIFMT_CBS_CFM:
-			cf.frame_clk_master = 1;
-			break;
-		case SND_SOC_DAIFMT_CBM_CFS:
-			cf.bit_clk_master = 1;
-			break;
-		case SND_SOC_DAIFMT_CBS_CFS:
-			break;
-		default:
-			return -EINVAL;
-		}
+	if (dai->id == DAI_ID_SPDIF)
+		return 0;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		cf.bit_clk_master = 1;
+		cf.frame_clk_master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFM:
+		cf.frame_clk_master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFS:
+		cf.bit_clk_master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
 
-		switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
-		case SND_SOC_DAIFMT_NB_NF:
-			break;
-		case SND_SOC_DAIFMT_NB_IF:
-			cf.frame_clk_inv = 1;
-			break;
-		case SND_SOC_DAIFMT_IB_NF:
-			cf.bit_clk_inv = 1;
-			break;
-		case SND_SOC_DAIFMT_IB_IF:
-			cf.frame_clk_inv = 1;
-			cf.bit_clk_inv = 1;
-			break;
-		}
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		cf.frame_clk_inv = 1;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		cf.bit_clk_inv = 1;
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		cf.frame_clk_inv = 1;
+		cf.bit_clk_inv = 1;
+		break;
+	}
 
-		switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
-		case SND_SOC_DAIFMT_I2S:
-			cf.fmt = HDMI_I2S;
-			break;
-		case SND_SOC_DAIFMT_DSP_A:
-			cf.fmt = HDMI_DSP_A;
-			break;
-		case SND_SOC_DAIFMT_DSP_B:
-			cf.fmt = HDMI_DSP_B;
-			break;
-		case SND_SOC_DAIFMT_RIGHT_J:
-			cf.fmt = HDMI_RIGHT_J;
-			break;
-		case SND_SOC_DAIFMT_LEFT_J:
-			cf.fmt = HDMI_LEFT_J;
-			break;
-		case SND_SOC_DAIFMT_AC97:
-			cf.fmt = HDMI_AC97;
-			break;
-		default:
-			dev_err(dai->dev, "Invalid DAI interface format\n");
-			return -EINVAL;
-		}
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		cf.fmt = HDMI_I2S;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		cf.fmt = HDMI_DSP_A;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		cf.fmt = HDMI_DSP_B;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		cf.fmt = HDMI_RIGHT_J;
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		cf.fmt = HDMI_LEFT_J;
+		break;
+	case SND_SOC_DAIFMT_AC97:
+		cf.fmt = HDMI_AC97;
+		break;
+	default:
+		dev_err(dai->dev, "Invalid DAI interface format\n");
+		return -EINVAL;
 	}
 
 	hcp->daifmt[dai->id] = cf;
 
-	return ret;
+	return 0;
 }
 
 static int hdmi_codec_digital_mute(struct snd_soc_dai *dai, int mute)
@@ -792,8 +790,10 @@ static int hdmi_codec_probe(struct platform_device *pdev)
 		i++;
 	}
 
-	if (hcd->spdif)
+	if (hcd->spdif) {
 		hcp->daidrv[i] = hdmi_spdif_dai;
+		hcp->daifmt[DAI_ID_SPDIF].fmt = HDMI_SPDIF;
+	}
 
 	dev_set_drvdata(dev, hcp);
 
-- 
GitLab


From 102cefc8e879b707be0024fdc7bce1deeb359a5f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 1 Mar 2019 14:43:10 -0600
Subject: [PATCH 0005/1861] ASoC: ab8500: Mark expected switch fall-through
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation to enabling -Wimplicit-fallthrough, mark switch
cases where we are expecting to fall through.

This patch fixes the following warning:

In file included from sound/soc/codecs/ab8500-codec.c:24:
sound/soc/codecs/ab8500-codec.c: In function ‘ab8500_codec_set_dai_fmt’:
./include/linux/device.h:1485:2: warning: this statement may fall through [-Wimplicit-fallthrough=]
  _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sound/soc/codecs/ab8500-codec.c:2129:3: note: in expansion of macro ‘dev_err’
   dev_err(dai->component->dev,
   ^~~~~~~
sound/soc/codecs/ab8500-codec.c:2132:2: note: here
  default:
  ^~~~~~~

Warning level 3 was used: -Wimplicit-fallthrough=3

This patch is part of the ongoing efforts to enable
-Wimplicit-fallthrough.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/ab8500-codec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c
index 03bbbcd3b6c11..87616b126018b 100644
--- a/sound/soc/codecs/ab8500-codec.c
+++ b/sound/soc/codecs/ab8500-codec.c
@@ -2129,6 +2129,7 @@ static int ab8500_codec_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt)
 		dev_err(dai->component->dev,
 			"%s: ERROR: The device is either a master or a slave.\n",
 			__func__);
+		/* fall through */
 	default:
 		dev_err(dai->component->dev,
 			"%s: ERROR: Unsupporter master mask 0x%x\n",
-- 
GitLab


From 5f8a1000c3e630c3ac06f1d664eeaa755bce8823 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Thu, 28 Feb 2019 14:19:21 +0100
Subject: [PATCH 0006/1861] ASoC: stm32: sai: fix iec958 controls indexation

Allow indexation of sai iec958 controls according
to device id.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai_sub.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index f9297228c41ce..506360b7bc010 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -100,7 +100,7 @@
  * @slot_mask: rx or tx active slots mask. set at init or at runtime
  * @data_size: PCM data width. corresponds to PCM substream width.
  * @spdif_frm_cnt: S/PDIF playback frame counter
- * @snd_aes_iec958: iec958 data
+ * @iec958: iec958 data
  * @ctrl_lock: control lock
  */
 struct stm32_sai_sub_data {
@@ -1068,11 +1068,12 @@ static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd,
 			     struct snd_soc_dai *cpu_dai)
 {
 	struct stm32_sai_sub_data *sai = dev_get_drvdata(cpu_dai->dev);
+	struct snd_kcontrol_new knew = iec958_ctls;
 
 	if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) {
 		dev_dbg(&sai->pdev->dev, "%s: register iec controls", __func__);
-		return snd_ctl_add(rtd->pcm->card,
-				   snd_ctl_new1(&iec958_ctls, sai));
+		knew.device = rtd->pcm->device;
+		return snd_ctl_add(rtd->pcm->card, snd_ctl_new1(&knew, sai));
 	}
 
 	return 0;
-- 
GitLab


From b8468192971807c43a80d6e2c41f83141cb7b211 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Thu, 28 Feb 2019 14:19:22 +0100
Subject: [PATCH 0007/1861] ASoC: stm32: sai: fix exposed capabilities in spdif
 mode

Change capabilities exposed in SAI S/PDIF mode, to match
actually supported formats.
In S/PDIF mode only 32 bits stereo is supported.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai_sub.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index 506360b7bc010..e418f446e03be 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -682,6 +682,14 @@ static int stm32_sai_startup(struct snd_pcm_substream *substream,
 
 	sai->substream = substream;
 
+	if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) {
+		snd_pcm_hw_constraint_mask64(substream->runtime,
+					     SNDRV_PCM_HW_PARAM_FORMAT,
+					     SNDRV_PCM_FMTBIT_S32_LE);
+		snd_pcm_hw_constraint_single(substream->runtime,
+					     SNDRV_PCM_HW_PARAM_CHANNELS, 2);
+	}
+
 	ret = clk_prepare_enable(sai->sai_ck);
 	if (ret < 0) {
 		dev_err(cpu_dai->dev, "Failed to enable clock: %d\n", ret);
-- 
GitLab


From 26f98e82dd49b7c3cc5ef0edd882aa732a62b672 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Thu, 28 Feb 2019 14:19:23 +0100
Subject: [PATCH 0008/1861] ASoC: stm32: sai: fix race condition in irq handler

When snd_pcm_stop_xrun() is called in interrupt routine,
substream context may have already been released.
Add protection on substream context.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai_sub.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index e418f446e03be..cad415e03b5ef 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -102,6 +102,7 @@
  * @spdif_frm_cnt: S/PDIF playback frame counter
  * @iec958: iec958 data
  * @ctrl_lock: control lock
+ * @irq_lock: prevent race condition with IRQ
  */
 struct stm32_sai_sub_data {
 	struct platform_device *pdev;
@@ -133,6 +134,7 @@ struct stm32_sai_sub_data {
 	unsigned int spdif_frm_cnt;
 	struct snd_aes_iec958 iec958;
 	struct mutex ctrl_lock; /* protect resources accessed by controls */
+	spinlock_t irq_lock; /* used to prevent race condition with IRQ */
 };
 
 enum stm32_sai_fifo_th {
@@ -474,8 +476,10 @@ static irqreturn_t stm32_sai_isr(int irq, void *devid)
 		status = SNDRV_PCM_STATE_XRUN;
 	}
 
-	if (status != SNDRV_PCM_STATE_RUNNING)
+	spin_lock(&sai->irq_lock);
+	if (status != SNDRV_PCM_STATE_RUNNING && sai->substream)
 		snd_pcm_stop_xrun(sai->substream);
+	spin_unlock(&sai->irq_lock);
 
 	return IRQ_HANDLED;
 }
@@ -679,8 +683,11 @@ static int stm32_sai_startup(struct snd_pcm_substream *substream,
 {
 	struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai);
 	int imr, cr2, ret;
+	unsigned long flags;
 
+	spin_lock_irqsave(&sai->irq_lock, flags);
 	sai->substream = substream;
+	spin_unlock_irqrestore(&sai->irq_lock, flags);
 
 	if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) {
 		snd_pcm_hw_constraint_mask64(substream->runtime,
@@ -1059,6 +1066,7 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream,
 			       struct snd_soc_dai *cpu_dai)
 {
 	struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai);
+	unsigned long flags;
 
 	regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, SAI_XIMR_MASK, 0);
 
@@ -1069,7 +1077,9 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream,
 
 	clk_rate_exclusive_put(sai->sai_mclk);
 
+	spin_lock_irqsave(&sai->irq_lock, flags);
 	sai->substream = NULL;
+	spin_unlock_irqrestore(&sai->irq_lock, flags);
 }
 
 static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd,
@@ -1433,6 +1443,7 @@ static int stm32_sai_sub_probe(struct platform_device *pdev)
 
 	sai->pdev = pdev;
 	mutex_init(&sai->ctrl_lock);
+	spin_lock_init(&sai->irq_lock);
 	platform_set_drvdata(pdev, sai);
 
 	sai->pdata = dev_get_drvdata(pdev->dev.parent);
-- 
GitLab


From 71d9537fada47762a1a1b33a8a1f95a92d7edc11 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Thu, 28 Feb 2019 14:19:24 +0100
Subject: [PATCH 0009/1861] ASoC: stm32: sai: fix oversampling mode

Set OSR bit if mclk/fs ratio is 512.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai_sub.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index cad415e03b5ef..cb658463ccd1a 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -913,7 +913,7 @@ static int stm32_sai_configure_clock(struct snd_soc_dai *cpu_dai,
 				     struct snd_pcm_hw_params *params)
 {
 	struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai);
-	int div = 0;
+	int div = 0, cr1 = 0;
 	int sai_clk_rate, mclk_ratio, den;
 	unsigned int rate = params_rate(params);
 
@@ -958,13 +958,19 @@ static int stm32_sai_configure_clock(struct snd_soc_dai *cpu_dai,
 		} else {
 			if (sai->mclk_rate) {
 				mclk_ratio = sai->mclk_rate / rate;
-				if ((mclk_ratio != 512) &&
-				    (mclk_ratio != 256)) {
+				if (mclk_ratio == 512) {
+					cr1 = SAI_XCR1_OSR;
+				} else if (mclk_ratio != 256) {
 					dev_err(cpu_dai->dev,
 						"Wrong mclk ratio %d\n",
 						mclk_ratio);
 					return -EINVAL;
 				}
+
+				regmap_update_bits(sai->regmap,
+						   STM_SAI_CR1_REGX,
+						   SAI_XCR1_OSR, cr1);
+
 				div = stm32_sai_get_clk_div(sai, sai_clk_rate,
 							    sai->mclk_rate);
 				if (div < 0)
-- 
GitLab


From d4180b4c02e7b04b8479f6237b2bd98b4c5fd19c Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Thu, 28 Feb 2019 14:19:25 +0100
Subject: [PATCH 0010/1861] ASoC: stm32: sai: fix set_sync service

Add error check on set_sync function return.
Add of_node_put() as of_get_parent() takes a reference
which has to be released.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai.c     | 8 +++++---
 sound/soc/stm/stm32_sai_sub.c | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/sound/soc/stm/stm32_sai.c b/sound/soc/stm/stm32_sai.c
index 14c9591aae426..d68d62f12df56 100644
--- a/sound/soc/stm/stm32_sai.c
+++ b/sound/soc/stm/stm32_sai.c
@@ -105,6 +105,7 @@ static int stm32_sai_set_sync(struct stm32_sai_data *sai_client,
 	if (!pdev) {
 		dev_err(&sai_client->pdev->dev,
 			"Device not found for node %pOFn\n", np_provider);
+		of_node_put(np_provider);
 		return -ENODEV;
 	}
 
@@ -113,19 +114,20 @@ static int stm32_sai_set_sync(struct stm32_sai_data *sai_client,
 		dev_err(&sai_client->pdev->dev,
 			"SAI sync provider data not found\n");
 		ret = -EINVAL;
-		goto out_put_dev;
+		goto error;
 	}
 
 	/* Configure sync client */
 	ret = stm32_sai_sync_conf_client(sai_client, synci);
 	if (ret < 0)
-		goto out_put_dev;
+		goto error;
 
 	/* Configure sync provider */
 	ret = stm32_sai_sync_conf_provider(sai_provider, synco);
 
-out_put_dev:
+error:
 	put_device(&pdev->dev);
+	of_node_put(np_provider);
 	return ret;
 }
 
diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index cb658463ccd1a..55d802f51c155 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -1106,7 +1106,7 @@ static int stm32_sai_pcm_new(struct snd_soc_pcm_runtime *rtd,
 static int stm32_sai_dai_probe(struct snd_soc_dai *cpu_dai)
 {
 	struct stm32_sai_sub_data *sai = dev_get_drvdata(cpu_dai->dev);
-	int cr1 = 0, cr1_mask;
+	int cr1 = 0, cr1_mask, ret;
 
 	sai->cpu_dai = cpu_dai;
 
@@ -1136,8 +1136,10 @@ static int stm32_sai_dai_probe(struct snd_soc_dai *cpu_dai)
 	/* Configure synchronization */
 	if (sai->sync == SAI_SYNC_EXTERNAL) {
 		/* Configure synchro client and provider */
-		sai->pdata->set_sync(sai->pdata, sai->np_sync_provider,
-				     sai->synco, sai->synci);
+		ret = sai->pdata->set_sync(sai->pdata, sai->np_sync_provider,
+					   sai->synco, sai->synci);
+		if (ret)
+			return ret;
 	}
 
 	cr1_mask |= SAI_XCR1_SYNCEN_MASK;
-- 
GitLab


From bbf62563d8622434c761cb96569c132467f88597 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Feb 2019 15:30:40 +0000
Subject: [PATCH 0011/1861] ASoC: hdmi-codec: avoid limiting params->msbits in
 hw_params()

Limiting the value of the passed in params->msbits in the hw_params()
callback is redundant on three counts:

1. We already specify in the DAI driver that we can only handle up to
   24 bits.  This means msbits will be limited to 24 via the ALSA
   constraints imposed by the ASoC core, unless we have multiple codecs
   that can handle more bits.

2. Nothing in our hw_params() implementation uses this value.

3. The copy of the params that we are passed by the ASoC core never
   reads back the msbits value.

Consequently, this code is unnecessary and does nothing useful.  Remove
it.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/hdmi-codec.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index d5f73c8372817..35df73e42cbc5 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -484,9 +484,6 @@ static int hdmi_codec_hw_params(struct snd_pcm_substream *substream,
 		params_width(params), params_rate(params),
 		params_channels(params));
 
-	if (params_width(params) > 24)
-		params->msbits = 24;
-
 	ret = snd_pcm_create_iec958_consumer_hw_params(params, hp.iec.status,
 						       sizeof(hp.iec.status));
 	if (ret < 0) {
-- 
GitLab


From c342febcde452f817cbd3896dc40953ab17c309d Mon Sep 17 00:00:00 2001
From: Jonathan Hunter <jonathanh@nvidia.com>
Date: Mon, 4 Mar 2019 13:31:14 +0000
Subject: [PATCH 0012/1861] ASoC: soc-core: Fix probe deferral following
 prelink failure

Commit 78a24e10cd94 ("ASoC: soc-core: clear platform pointers on error")
re-worked the clean-up of any platform pointers that may have been
initialised by the function snd_soc_init_platform(). This commit missed
one error path where if any of the prelinks for a soundcard failed to
initialise, then these platform pointers would not be cleaned-up. This
then prevents the soundcard from being initialised following a probe
deferral when any of the soundcard prelinks cannot be found.

Fix this by ensuring that soc_cleanup_platform() is called when
initialising the soundcard prelinks fails.

Fixes: 78a24e10cd94 ("ASoC: soc-core: clear platform pointers on error")

Signed-off-by: Jonathan Hunter <jonathanh@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 93d316d5bf8e3..5a5764dba1479 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2797,6 +2797,7 @@ int snd_soc_register_card(struct snd_soc_card *card)
 
 		ret = soc_init_dai_link(card, link);
 		if (ret) {
+			soc_cleanup_platform(card);
 			dev_err(card->dev, "ASoC: failed to init link %s\n",
 				link->name);
 			mutex_unlock(&client_mutex);
-- 
GitLab


From 6ed69184ed9c43873b8a1ee721e3bf3c08c2c6be Mon Sep 17 00:00:00 2001
From: Myungho Jung <mhjungk@gmail.com>
Date: Thu, 7 Mar 2019 10:23:08 +0900
Subject: [PATCH 0013/1861] xfrm: Reset secpath in xfrm failure

In esp4_gro_receive() and esp6_gro_receive(), secpath can be allocated
without adding xfrm state to xvec. Then, sp->xvec[sp->len - 1] would
fail and result in dereferencing invalid pointer in esp4_gso_segment()
and esp6_gso_segment(). Reset secpath if xfrm function returns error.

Fixes: 7785bba299a8 ("esp: Add a software GRO codepath")
Reported-by: syzbot+b69368fd933c6c592f4c@syzkaller.appspotmail.com
Signed-off-by: Myungho Jung <mhjungk@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4_offload.c | 8 +++++---
 net/ipv6/esp6_offload.c | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 8756e0e790d2a..d3170a8001b2a 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -52,13 +52,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 			goto out;
 
 		if (sp->len == XFRM_MAX_DEPTH)
-			goto out;
+			goto out_reset;
 
 		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
 				      (xfrm_address_t *)&ip_hdr(skb)->daddr,
 				      spi, IPPROTO_ESP, AF_INET);
 		if (!x)
-			goto out;
+			goto out_reset;
 
 		sp->xvec[sp->len++] = x;
 		sp->olen++;
@@ -66,7 +66,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 		xo = xfrm_offload(skb);
 		if (!xo) {
 			xfrm_state_put(x);
-			goto out;
+			goto out_reset;
 		}
 	}
 
@@ -82,6 +82,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 	xfrm_input(skb, IPPROTO_ESP, spi, -2);
 
 	return ERR_PTR(-EINPROGRESS);
+out_reset:
+	secpath_reset(skb);
 out:
 	skb_push(skb, offset);
 	NAPI_GRO_CB(skb)->same_flow = 0;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d46b4eb645c2e..cb99f6fb79b79 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -74,13 +74,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 			goto out;
 
 		if (sp->len == XFRM_MAX_DEPTH)
-			goto out;
+			goto out_reset;
 
 		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
 				      (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
 				      spi, IPPROTO_ESP, AF_INET6);
 		if (!x)
-			goto out;
+			goto out_reset;
 
 		sp->xvec[sp->len++] = x;
 		sp->olen++;
@@ -88,7 +88,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 		xo = xfrm_offload(skb);
 		if (!xo) {
 			xfrm_state_put(x);
-			goto out;
+			goto out_reset;
 		}
 	}
 
@@ -109,6 +109,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
 	xfrm_input(skb, IPPROTO_ESP, spi, -2);
 
 	return ERR_PTR(-EINPROGRESS);
+out_reset:
+	secpath_reset(skb);
 out:
 	skb_push(skb, offset);
 	NAPI_GRO_CB(skb)->same_flow = 0;
-- 
GitLab


From f10e0010fae8174dc20bdc872bcaa85baa925cb7 Mon Sep 17 00:00:00 2001
From: Su Yanjun <suyj.fnst@cn.fujitsu.com>
Date: Wed, 6 Mar 2019 20:54:08 -0500
Subject: [PATCH 0014/1861] net: xfrm: Add '_rcu' tag for rcu protected pointer
 in netns_xfrm

For rcu protected pointers, we'd better add '__rcu' for them.

Once added '__rcu' tag for rcu protected pointer, the sparse tool reports
warnings.

net/xfrm/xfrm_user.c:1198:39: sparse:    expected struct sock *sk
net/xfrm/xfrm_user.c:1198:39: sparse:    got struct sock [noderef] <asn:4> *nlsk
[...]

So introduce a new wrapper function of nlmsg_unicast  to handle type
conversions.

This patch also fixes a direct access of a rcu protected socket.

Fixes: be33690d8fcf("[XFRM]: Fix aevent related crash")
Signed-off-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h |  2 +-
 net/xfrm/xfrm_user.c     | 30 +++++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 59f45b1e9dac0..d2a36fb9f92a4 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -57,7 +57,7 @@ struct netns_xfrm {
 	struct list_head	inexact_bins;
 
 
-	struct sock		*nlsk;
+	struct sock		__rcu *nlsk;
 	struct sock		*nlsk_stash;
 
 	u32			sysctl_aevent_etime;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8d4d52fd457b2..9445898323438 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1071,6 +1071,22 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
 	return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
 }
 
+/* A similar wrapper like xfrm_nlmsg_multicast checking that nlsk is still
+ * available.
+ */
+static inline int xfrm_nlmsg_unicast(struct net *net, struct sk_buff *skb,
+				     u32 pid)
+{
+	struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
+
+	if (!nlsk) {
+		kfree_skb(skb);
+		return -EPIPE;
+	}
+
+	return nlmsg_unicast(nlsk, skb, pid);
+}
+
 static inline unsigned int xfrm_spdinfo_msgsize(void)
 {
 	return NLMSG_ALIGN(4)
@@ -1195,7 +1211,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	err = build_spdinfo(r_skb, net, sportid, seq, *flags);
 	BUG_ON(err < 0);
 
-	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+	return xfrm_nlmsg_unicast(net, r_skb, sportid);
 }
 
 static inline unsigned int xfrm_sadinfo_msgsize(void)
@@ -1254,7 +1270,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	err = build_sadinfo(r_skb, net, sportid, seq, *flags);
 	BUG_ON(err < 0);
 
-	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+	return xfrm_nlmsg_unicast(net, r_skb, sportid);
 }
 
 static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1274,7 +1290,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (IS_ERR(resp_skb)) {
 		err = PTR_ERR(resp_skb);
 	} else {
-		err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
+		err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid);
 	}
 	xfrm_state_put(x);
 out_noput:
@@ -1337,7 +1353,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto out;
 	}
 
-	err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
+	err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid);
 
 out:
 	xfrm_state_put(x);
@@ -1903,8 +1919,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		if (IS_ERR(resp_skb)) {
 			err = PTR_ERR(resp_skb);
 		} else {
-			err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
-					    NETLINK_CB(skb).portid);
+			err = xfrm_nlmsg_unicast(net, resp_skb,
+						 NETLINK_CB(skb).portid);
 		}
 	} else {
 		xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
@@ -2062,7 +2078,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	err = build_aevent(r_skb, x, &c);
 	BUG_ON(err < 0);
 
-	err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
+	err = xfrm_nlmsg_unicast(net, r_skb, NETLINK_CB(skb).portid);
 	spin_unlock_bh(&x->lock);
 	xfrm_state_put(x);
 	return err;
-- 
GitLab


From 0a8a29be499cbb67df79370aaf5109085509feb8 Mon Sep 17 00:00:00 2001
From: Leonard Pollak <leonardp@tr-host.de>
Date: Wed, 13 Feb 2019 11:19:52 +0100
Subject: [PATCH 0015/1861] Staging: iio: meter: fixed typo

This patch fixes an obvious typo, which will cause erroneously returning the Peak
Voltage instead of the Peak Current.

Signed-off-by: Leonard Pollak <leonardp@tr-host.de>
Cc: <Stable@vger.kernel.org>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/meter/ade7854.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/iio/meter/ade7854.c b/drivers/staging/iio/meter/ade7854.c
index 029c3bf42d4d9..07774c000c5a6 100644
--- a/drivers/staging/iio/meter/ade7854.c
+++ b/drivers/staging/iio/meter/ade7854.c
@@ -269,7 +269,7 @@ static IIO_DEV_ATTR_VPEAK(0644,
 static IIO_DEV_ATTR_IPEAK(0644,
 		ade7854_read_32bit,
 		ade7854_write_32bit,
-		ADE7854_VPEAK);
+		ADE7854_IPEAK);
 static IIO_DEV_ATTR_APHCAL(0644,
 		ade7854_read_16bit,
 		ade7854_write_16bit,
-- 
GitLab


From 40a7198a4a01037003c7ca714f0d048a61e729ac Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Wed, 13 Feb 2019 08:41:47 +0100
Subject: [PATCH 0016/1861] iio/gyro/bmg160: Use millidegrees for temperature
 scale

Standard unit for temperature is millidegrees Celcius, whereas this driver
was reporting in degrees. Fix the scale factor in the driver.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/bmg160_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
index 63ca31628a93a..92c07ab826eb3 100644
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -582,11 +582,10 @@ static int bmg160_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
 		return bmg160_get_filter(data, val);
 	case IIO_CHAN_INFO_SCALE:
-		*val = 0;
 		switch (chan->type) {
 		case IIO_TEMP:
-			*val2 = 500000;
-			return IIO_VAL_INT_PLUS_MICRO;
+			*val = 500;
+			return IIO_VAL_INT;
 		case IIO_ANGL_VEL:
 		{
 			int i;
@@ -594,6 +593,7 @@ static int bmg160_read_raw(struct iio_dev *indio_dev,
 			for (i = 0; i < ARRAY_SIZE(bmg160_scale_table); ++i) {
 				if (bmg160_scale_table[i].dps_range ==
 							data->dps_range) {
+					*val = 0;
 					*val2 = bmg160_scale_table[i].scale;
 					return IIO_VAL_INT_PLUS_MICRO;
 				}
-- 
GitLab


From 7ce0f216221856a17fc4934b39284678a5fef2e9 Mon Sep 17 00:00:00 2001
From: Mircea Caprioru <mircea.caprioru@analog.com>
Date: Wed, 20 Feb 2019 13:08:20 +0200
Subject: [PATCH 0017/1861] staging: iio: ad7192: Fix ad7193 channel address

This patch fixes the differential channels addresses for the ad7193.

Signed-off-by: Mircea Caprioru <mircea.caprioru@analog.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/adc/ad7192.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c
index acdbc07fd2592..2fc8bc22b57ba 100644
--- a/drivers/staging/iio/adc/ad7192.c
+++ b/drivers/staging/iio/adc/ad7192.c
@@ -109,10 +109,10 @@
 #define AD7192_CH_AIN3		BIT(6) /* AIN3 - AINCOM */
 #define AD7192_CH_AIN4		BIT(7) /* AIN4 - AINCOM */
 
-#define AD7193_CH_AIN1P_AIN2M	0x000  /* AIN1(+) - AIN2(-) */
-#define AD7193_CH_AIN3P_AIN4M	0x001  /* AIN3(+) - AIN4(-) */
-#define AD7193_CH_AIN5P_AIN6M	0x002  /* AIN5(+) - AIN6(-) */
-#define AD7193_CH_AIN7P_AIN8M	0x004  /* AIN7(+) - AIN8(-) */
+#define AD7193_CH_AIN1P_AIN2M	0x001  /* AIN1(+) - AIN2(-) */
+#define AD7193_CH_AIN3P_AIN4M	0x002  /* AIN3(+) - AIN4(-) */
+#define AD7193_CH_AIN5P_AIN6M	0x004  /* AIN5(+) - AIN6(-) */
+#define AD7193_CH_AIN7P_AIN8M	0x008  /* AIN7(+) - AIN8(-) */
 #define AD7193_CH_TEMP		0x100 /* Temp senseor */
 #define AD7193_CH_AIN2P_AIN2M	0x200 /* AIN2(+) - AIN2(-) */
 #define AD7193_CH_AIN1		0x401 /* AIN1 - AINCOM */
-- 
GitLab


From 20ea39ef9f2f911bd01c69519e7d69cfec79fde3 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 20 Feb 2019 17:11:32 +0200
Subject: [PATCH 0018/1861] iio: Fix scan mask selection

The trialmask is expected to have all bits set to 0 after allocation.
Currently kmalloc_array() is used which does not zero the memory and so
random bits are set. This results in random channels being enabled when
they shouldn't. Replace kmalloc_array() with kcalloc() which has the same
interface but zeros the memory.

Note the fix is actually required earlier than the below fixes tag, but
will require a manual backport due to move from kmalloc to kmalloc_array.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Fixes commit 057ac1acdfc4 ("iio: Use kmalloc_array() in iio_scan_mask_set()").
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-buffer.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index cd5bfe39591bb..dadd921a4a30f 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -320,9 +320,8 @@ static int iio_scan_mask_set(struct iio_dev *indio_dev,
 	const unsigned long *mask;
 	unsigned long *trialmask;
 
-	trialmask = kmalloc_array(BITS_TO_LONGS(indio_dev->masklength),
-				  sizeof(*trialmask),
-				  GFP_KERNEL);
+	trialmask = kcalloc(BITS_TO_LONGS(indio_dev->masklength),
+			    sizeof(*trialmask), GFP_KERNEL);
 	if (trialmask == NULL)
 		return -ENOMEM;
 	if (!indio_dev->masklength) {
-- 
GitLab


From 409a51e0a4a5f908763191fae2c29008632eb712 Mon Sep 17 00:00:00 2001
From: Sergey Larin <cerg2010cerg2010@mail.ru>
Date: Sat, 2 Mar 2019 19:54:55 +0300
Subject: [PATCH 0019/1861] iio: gyro: mpu3050: fix chip ID reading

According to the datasheet, the last bit of CHIP_ID register controls
I2C bus, and the first one is unused. Handle this correctly.

Note that there are chips out there that have a value such that
the id check currently fails.

Signed-off-by: Sergey Larin <cerg2010cerg2010@mail.ru>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/mpu3050-core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c
index 77fac81a3adce..5ddebede31a6f 100644
--- a/drivers/iio/gyro/mpu3050-core.c
+++ b/drivers/iio/gyro/mpu3050-core.c
@@ -29,7 +29,8 @@
 
 #include "mpu3050.h"
 
-#define MPU3050_CHIP_ID		0x69
+#define MPU3050_CHIP_ID		0x68
+#define MPU3050_CHIP_ID_MASK	0x7E
 
 /*
  * Register map: anything suffixed *_H is a big-endian high byte and always
@@ -1176,8 +1177,9 @@ int mpu3050_common_probe(struct device *dev,
 		goto err_power_down;
 	}
 
-	if (val != MPU3050_CHIP_ID) {
-		dev_err(dev, "unsupported chip id %02x\n", (u8)val);
+	if ((val & MPU3050_CHIP_ID_MASK) != MPU3050_CHIP_ID) {
+		dev_err(dev, "unsupported chip id %02x\n",
+				(u8)(val & MPU3050_CHIP_ID_MASK));
 		ret = -ENODEV;
 		goto err_power_down;
 	}
-- 
GitLab


From 09c6bdee51183a575bf7546890c8c137a75a2b44 Mon Sep 17 00:00:00 2001
From: Georg Ottinger <g.ottinger@abatec.at>
Date: Wed, 30 Jan 2019 14:42:02 +0100
Subject: [PATCH 0020/1861] iio: adc: at91: disable adc channel interrupt in
 timeout case

Having a brief look at at91_adc_read_raw() it is obvious that in the case
of a timeout the setting of AT91_ADC_CHDR and AT91_ADC_IDR registers is
omitted. If 2 different channels are queried we can end up with a
situation where two interrupts are enabled, but only one interrupt is
cleared in the interrupt handler. Resulting in a interrupt loop and a
system hang.

Signed-off-by: Georg Ottinger <g.ottinger@abatec.at>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/at91_adc.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
index 75d2f73582a3d..596841a3c4db7 100644
--- a/drivers/iio/adc/at91_adc.c
+++ b/drivers/iio/adc/at91_adc.c
@@ -704,23 +704,29 @@ static int at91_adc_read_raw(struct iio_dev *idev,
 		ret = wait_event_interruptible_timeout(st->wq_data_avail,
 						       st->done,
 						       msecs_to_jiffies(1000));
-		if (ret == 0)
-			ret = -ETIMEDOUT;
-		if (ret < 0) {
-			mutex_unlock(&st->lock);
-			return ret;
-		}
-
-		*val = st->last_value;
 
+		/* Disable interrupts, regardless if adc conversion was
+		 * successful or not
+		 */
 		at91_adc_writel(st, AT91_ADC_CHDR,
 				AT91_ADC_CH(chan->channel));
 		at91_adc_writel(st, AT91_ADC_IDR, BIT(chan->channel));
 
-		st->last_value = 0;
-		st->done = false;
+		if (ret > 0) {
+			/* a valid conversion took place */
+			*val = st->last_value;
+			st->last_value = 0;
+			st->done = false;
+			ret = IIO_VAL_INT;
+		} else if (ret == 0) {
+			/* conversion timeout */
+			dev_err(&idev->dev, "ADC Channel %d timeout.\n",
+				chan->channel);
+			ret = -ETIMEDOUT;
+		}
+
 		mutex_unlock(&st->lock);
-		return IIO_VAL_INT;
+		return ret;
 
 	case IIO_CHAN_INFO_SCALE:
 		*val = st->vref_mv;
-- 
GitLab


From 831d2fefdfce757fcea86742220f8cbe7ca51ddd Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sat, 9 Mar 2019 16:59:03 +0000
Subject: [PATCH 0021/1861] iio: chemical: fix missing Kconfig block for sgp30

I clearly messed up applying this patch. Not sure
how but the entire Kconfig block is missing. This
patch puts it back as it was in the original patch.

Reported-by: Andreas Brauchli <a.brauchli@elementarea.net>
Fixes: ce514124161a ("iio: chemical: sgp30: Support Sensirion SGP30/SGPC3 sensors")
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/Kconfig | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig
index d5d146e9e3723..5248e180b4421 100644
--- a/drivers/iio/chemical/Kconfig
+++ b/drivers/iio/chemical/Kconfig
@@ -71,6 +71,19 @@ config PMS7003
 	  To compile this driver as a module, choose M here: the module will
 	  be called pms7003.
 
+config SENSIRION_SGP30
+	tristate "Sensirion SGPxx gas sensors"
+	depends on I2C
+	select CRC8
+	help
+	  Say Y here to build I2C interface support for the following
+	  Sensirion SGP gas sensors:
+	    * SGP30 gas sensor
+	    * SGPC3 low power gas sensor
+
+	  To compile this driver as module, choose M here: the
+	  module will be called sgp30.
+
 config SPS30
 	tristate "SPS30 particulate matter sensor"
 	depends on I2C
-- 
GitLab


From 9436f45dd53595e21566a8c6627411077dfdb776 Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Wed, 6 Mar 2019 08:31:47 +0100
Subject: [PATCH 0022/1861] iio:chemical:bme680: Fix, report temperature in
 millidegrees

The standard unit for temperature is millidegrees Celcius. Adapt the
driver to report in millidegrees instead of degrees.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor");
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/bme680_core.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c
index 70c1fe4366f4c..fefe32b5b69d8 100644
--- a/drivers/iio/chemical/bme680_core.c
+++ b/drivers/iio/chemical/bme680_core.c
@@ -583,8 +583,7 @@ static int bme680_gas_config(struct bme680_data *data)
 	return ret;
 }
 
-static int bme680_read_temp(struct bme680_data *data,
-			    int *val, int *val2)
+static int bme680_read_temp(struct bme680_data *data, int *val)
 {
 	struct device *dev = regmap_get_device(data->regmap);
 	int ret;
@@ -617,10 +616,9 @@ static int bme680_read_temp(struct bme680_data *data,
 	 * compensate_press/compensate_humid to get compensated
 	 * pressure/humidity readings.
 	 */
-	if (val && val2) {
-		*val = comp_temp;
-		*val2 = 100;
-		return IIO_VAL_FRACTIONAL;
+	if (val) {
+		*val = comp_temp * 10; /* Centidegrees to millidegrees */
+		return IIO_VAL_INT;
 	}
 
 	return ret;
@@ -635,7 +633,7 @@ static int bme680_read_press(struct bme680_data *data,
 	s32 adc_press;
 
 	/* Read and compensate temperature to get a reading of t_fine */
-	ret = bme680_read_temp(data, NULL, NULL);
+	ret = bme680_read_temp(data, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -668,7 +666,7 @@ static int bme680_read_humid(struct bme680_data *data,
 	u32 comp_humidity;
 
 	/* Read and compensate temperature to get a reading of t_fine */
-	ret = bme680_read_temp(data, NULL, NULL);
+	ret = bme680_read_temp(data, NULL);
 	if (ret < 0)
 		return ret;
 
@@ -761,7 +759,7 @@ static int bme680_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_PROCESSED:
 		switch (chan->type) {
 		case IIO_TEMP:
-			return bme680_read_temp(data, val, val2);
+			return bme680_read_temp(data, val);
 		case IIO_PRESSURE:
 			return bme680_read_press(data, val, val2);
 		case IIO_HUMIDITYRELATIVE:
-- 
GitLab


From 73f3bc6da506711302bb67572440eb84b1ec4a2c Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Wed, 6 Mar 2019 08:31:48 +0100
Subject: [PATCH 0023/1861] iio:chemical:bme680: Fix SPI read interface

The SPI interface implementation was completely broken.

When using the SPI interface, there are only 7 address bits, the upper bit
is controlled by a page select register. The core needs access to both
ranges, so implement register read/write for both regions. The regmap
paging functionality didn't agree with a register that needs to be read
and modified, so I implemented a custom paging algorithm.

This fixes that the device wouldn't even probe in SPI mode.

The SPI interface then isn't different from I2C, merged them into the core,
and the I2C/SPI named registers are no longer needed.

Implemented register value caching for the registers to reduce the I2C/SPI
data transfers considerably.

The calibration set reads as all zeroes until some undefined point in time,
and I couldn't determine what makes it valid. The datasheet mentions these
registers but does not provide any hints on when they become valid, and they
aren't even enumerated in the memory map. So check the calibration and
retry reading it from the device after each measurement until it provides
something valid.

Despite the size this is suitable for a stable backport given that
it seems the SPI support never worked.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Fixes: 1b3bd8592780 ("iio: chemical: Add support for Bosch BME680 sensor");
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/bme680.h      |   6 +-
 drivers/iio/chemical/bme680_core.c |  38 ++++++++++
 drivers/iio/chemical/bme680_i2c.c  |  21 ------
 drivers/iio/chemical/bme680_spi.c  | 115 +++++++++++++++++++----------
 4 files changed, 118 insertions(+), 62 deletions(-)

diff --git a/drivers/iio/chemical/bme680.h b/drivers/iio/chemical/bme680.h
index 0ae89b87e2d64..4edc5d21cb9fa 100644
--- a/drivers/iio/chemical/bme680.h
+++ b/drivers/iio/chemical/bme680.h
@@ -2,11 +2,9 @@
 #ifndef BME680_H_
 #define BME680_H_
 
-#define BME680_REG_CHIP_I2C_ID			0xD0
-#define BME680_REG_CHIP_SPI_ID			0x50
+#define BME680_REG_CHIP_ID			0xD0
 #define   BME680_CHIP_ID_VAL			0x61
-#define BME680_REG_SOFT_RESET_I2C		0xE0
-#define BME680_REG_SOFT_RESET_SPI		0x60
+#define BME680_REG_SOFT_RESET			0xE0
 #define   BME680_CMD_SOFTRESET			0xB6
 #define BME680_REG_STATUS			0x73
 #define   BME680_SPI_MEM_PAGE_BIT		BIT(4)
diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c
index fefe32b5b69d8..ccde4c65ff934 100644
--- a/drivers/iio/chemical/bme680_core.c
+++ b/drivers/iio/chemical/bme680_core.c
@@ -63,9 +63,23 @@ struct bme680_data {
 	s32 t_fine;
 };
 
+static const struct regmap_range bme680_volatile_ranges[] = {
+	regmap_reg_range(BME680_REG_MEAS_STAT_0, BME680_REG_GAS_R_LSB),
+	regmap_reg_range(BME680_REG_STATUS, BME680_REG_STATUS),
+	regmap_reg_range(BME680_T2_LSB_REG, BME680_GH3_REG),
+};
+
+static const struct regmap_access_table bme680_volatile_table = {
+	.yes_ranges	= bme680_volatile_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(bme680_volatile_ranges),
+};
+
 const struct regmap_config bme680_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
+	.max_register = 0xef,
+	.volatile_table = &bme680_volatile_table,
+	.cache_type = REGCACHE_RBTREE,
 };
 EXPORT_SYMBOL(bme680_regmap_config);
 
@@ -316,6 +330,10 @@ static s16 bme680_compensate_temp(struct bme680_data *data,
 	s64 var1, var2, var3;
 	s16 calc_temp;
 
+	/* If the calibration is invalid, attempt to reload it */
+	if (!calib->par_t2)
+		bme680_read_calib(data, calib);
+
 	var1 = (adc_temp >> 3) - (calib->par_t1 << 1);
 	var2 = (var1 * calib->par_t2) >> 11;
 	var3 = ((var1 >> 1) * (var1 >> 1)) >> 12;
@@ -865,8 +883,28 @@ int bme680_core_probe(struct device *dev, struct regmap *regmap,
 {
 	struct iio_dev *indio_dev;
 	struct bme680_data *data;
+	unsigned int val;
 	int ret;
 
+	ret = regmap_write(regmap, BME680_REG_SOFT_RESET,
+			   BME680_CMD_SOFTRESET);
+	if (ret < 0) {
+		dev_err(dev, "Failed to reset chip\n");
+		return ret;
+	}
+
+	ret = regmap_read(regmap, BME680_REG_CHIP_ID, &val);
+	if (ret < 0) {
+		dev_err(dev, "Error reading chip ID\n");
+		return ret;
+	}
+
+	if (val != BME680_CHIP_ID_VAL) {
+		dev_err(dev, "Wrong chip ID, got %x expected %x\n",
+				val, BME680_CHIP_ID_VAL);
+		return -ENODEV;
+	}
+
 	indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
 	if (!indio_dev)
 		return -ENOMEM;
diff --git a/drivers/iio/chemical/bme680_i2c.c b/drivers/iio/chemical/bme680_i2c.c
index b2f805b6b36a4..de9c9e3d23ea3 100644
--- a/drivers/iio/chemical/bme680_i2c.c
+++ b/drivers/iio/chemical/bme680_i2c.c
@@ -23,8 +23,6 @@ static int bme680_i2c_probe(struct i2c_client *client,
 {
 	struct regmap *regmap;
 	const char *name = NULL;
-	unsigned int val;
-	int ret;
 
 	regmap = devm_regmap_init_i2c(client, &bme680_regmap_config);
 	if (IS_ERR(regmap)) {
@@ -33,25 +31,6 @@ static int bme680_i2c_probe(struct i2c_client *client,
 		return PTR_ERR(regmap);
 	}
 
-	ret = regmap_write(regmap, BME680_REG_SOFT_RESET_I2C,
-			   BME680_CMD_SOFTRESET);
-	if (ret < 0) {
-		dev_err(&client->dev, "Failed to reset chip\n");
-		return ret;
-	}
-
-	ret = regmap_read(regmap, BME680_REG_CHIP_I2C_ID, &val);
-	if (ret < 0) {
-		dev_err(&client->dev, "Error reading I2C chip ID\n");
-		return ret;
-	}
-
-	if (val != BME680_CHIP_ID_VAL) {
-		dev_err(&client->dev, "Wrong chip ID, got %x expected %x\n",
-				val, BME680_CHIP_ID_VAL);
-		return -ENODEV;
-	}
-
 	if (id)
 		name = id->name;
 
diff --git a/drivers/iio/chemical/bme680_spi.c b/drivers/iio/chemical/bme680_spi.c
index d0b7bdd3f0660..3b838068a7e48 100644
--- a/drivers/iio/chemical/bme680_spi.c
+++ b/drivers/iio/chemical/bme680_spi.c
@@ -12,28 +12,93 @@
 
 #include "bme680.h"
 
+struct bme680_spi_bus_context {
+	struct spi_device *spi;
+	u8 current_page;
+};
+
+/*
+ * In SPI mode there are only 7 address bits, a "page" register determines
+ * which part of the 8-bit range is active. This function looks at the address
+ * and writes the page selection bit if needed
+ */
+static int bme680_regmap_spi_select_page(
+	struct bme680_spi_bus_context *ctx, u8 reg)
+{
+	struct spi_device *spi = ctx->spi;
+	int ret;
+	u8 buf[2];
+	u8 page = (reg & 0x80) ? 0 : 1; /* Page "1" is low range */
+
+	if (page == ctx->current_page)
+		return 0;
+
+	/*
+	 * Data sheet claims we're only allowed to change bit 4, so we must do
+	 * a read-modify-write on each and every page select
+	 */
+	buf[0] = BME680_REG_STATUS;
+	ret = spi_write_then_read(spi, buf, 1, buf + 1, 1);
+	if (ret < 0) {
+		dev_err(&spi->dev, "failed to set page %u\n", page);
+		return ret;
+	}
+
+	buf[0] = BME680_REG_STATUS;
+	if (page)
+		buf[1] |= BME680_SPI_MEM_PAGE_BIT;
+	else
+		buf[1] &= ~BME680_SPI_MEM_PAGE_BIT;
+
+	ret = spi_write(spi, buf, 2);
+	if (ret < 0) {
+		dev_err(&spi->dev, "failed to set page %u\n", page);
+		return ret;
+	}
+
+	ctx->current_page = page;
+
+	return 0;
+}
+
 static int bme680_regmap_spi_write(void *context, const void *data,
 				   size_t count)
 {
-	struct spi_device *spi = context;
+	struct bme680_spi_bus_context *ctx = context;
+	struct spi_device *spi = ctx->spi;
+	int ret;
 	u8 buf[2];
 
 	memcpy(buf, data, 2);
+
+	ret = bme680_regmap_spi_select_page(ctx, buf[0]);
+	if (ret)
+		return ret;
+
 	/*
 	 * The SPI register address (= full register address without bit 7)
 	 * and the write command (bit7 = RW = '0')
 	 */
 	buf[0] &= ~0x80;
 
-	return spi_write_then_read(spi, buf, 2, NULL, 0);
+	return spi_write(spi, buf, 2);
 }
 
 static int bme680_regmap_spi_read(void *context, const void *reg,
 				  size_t reg_size, void *val, size_t val_size)
 {
-	struct spi_device *spi = context;
+	struct bme680_spi_bus_context *ctx = context;
+	struct spi_device *spi = ctx->spi;
+	int ret;
+	u8 addr = *(const u8 *)reg;
+
+	ret = bme680_regmap_spi_select_page(ctx, addr);
+	if (ret)
+		return ret;
 
-	return spi_write_then_read(spi, reg, reg_size, val, val_size);
+	addr |= 0x80; /* bit7 = RW = '1' */
+
+	return spi_write_then_read(spi, &addr, 1, val, val_size);
 }
 
 static struct regmap_bus bme680_regmap_bus = {
@@ -46,8 +111,8 @@ static struct regmap_bus bme680_regmap_bus = {
 static int bme680_spi_probe(struct spi_device *spi)
 {
 	const struct spi_device_id *id = spi_get_device_id(spi);
+	struct bme680_spi_bus_context *bus_context;
 	struct regmap *regmap;
-	unsigned int val;
 	int ret;
 
 	spi->bits_per_word = 8;
@@ -57,45 +122,21 @@ static int bme680_spi_probe(struct spi_device *spi)
 		return ret;
 	}
 
+	bus_context = devm_kzalloc(&spi->dev, sizeof(*bus_context), GFP_KERNEL);
+	if (!bus_context)
+		return -ENOMEM;
+
+	bus_context->spi = spi;
+	bus_context->current_page = 0xff; /* Undefined on warm boot */
+
 	regmap = devm_regmap_init(&spi->dev, &bme680_regmap_bus,
-				  &spi->dev, &bme680_regmap_config);
+				  bus_context, &bme680_regmap_config);
 	if (IS_ERR(regmap)) {
 		dev_err(&spi->dev, "Failed to register spi regmap %d\n",
 				(int)PTR_ERR(regmap));
 		return PTR_ERR(regmap);
 	}
 
-	ret = regmap_write(regmap, BME680_REG_SOFT_RESET_SPI,
-			   BME680_CMD_SOFTRESET);
-	if (ret < 0) {
-		dev_err(&spi->dev, "Failed to reset chip\n");
-		return ret;
-	}
-
-	/* after power-on reset, Page 0(0x80-0xFF) of spi_mem_page is active */
-	ret = regmap_read(regmap, BME680_REG_CHIP_SPI_ID, &val);
-	if (ret < 0) {
-		dev_err(&spi->dev, "Error reading SPI chip ID\n");
-		return ret;
-	}
-
-	if (val != BME680_CHIP_ID_VAL) {
-		dev_err(&spi->dev, "Wrong chip ID, got %x expected %x\n",
-				val, BME680_CHIP_ID_VAL);
-		return -ENODEV;
-	}
-	/*
-	 * select Page 1 of spi_mem_page to enable access to
-	 * to registers from address 0x00 to 0x7F.
-	 */
-	ret = regmap_write_bits(regmap, BME680_REG_STATUS,
-				BME680_SPI_MEM_PAGE_BIT,
-				BME680_SPI_MEM_PAGE_1_VAL);
-	if (ret < 0) {
-		dev_err(&spi->dev, "failed to set page 1 of spi_mem_page\n");
-		return ret;
-	}
-
 	return bme680_core_probe(&spi->dev, regmap, id->name);
 }
 
-- 
GitLab


From fe2d3df639a7940a125a33d6460529b9689c5406 Mon Sep 17 00:00:00 2001
From: "he, bo" <bo.he@intel.com>
Date: Wed, 6 Mar 2019 10:32:20 +0800
Subject: [PATCH 0024/1861] io: accel: kxcjk1013: restore the range after
 resume.

On some laptops, kxcjk1013 is powered off when system enters S3. We need
restore the range regiter during resume. Otherwise, the sensor doesn't
work properly after S3.

Signed-off-by: he, bo <bo.he@intel.com>
Signed-off-by: Chen, Hu <hu1.chen@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/kxcjk-1013.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
index 7096e577b23f8..50f3ff386bea4 100644
--- a/drivers/iio/accel/kxcjk-1013.c
+++ b/drivers/iio/accel/kxcjk-1013.c
@@ -1437,6 +1437,8 @@ static int kxcjk1013_resume(struct device *dev)
 
 	mutex_lock(&data->mutex);
 	ret = kxcjk1013_set_mode(data, OPERATION);
+	if (ret == 0)
+		ret = kxcjk1013_set_range(data, data->range);
 	mutex_unlock(&data->mutex);
 
 	return ret;
-- 
GitLab


From 7d01427aaa78fa611f84c1a05fde66a41a6598be Mon Sep 17 00:00:00 2001
From: Louis Taylor <louis@kragniz.eu>
Date: Wed, 27 Feb 2019 11:07:20 +0000
Subject: [PATCH 0025/1861] HID: quirks: use correct format chars in dbg_hid

When building with -Wformat, clang warns:

drivers/hid/hid-quirks.c:1075:27: warning: format specifies type
'unsigned short' but the argument has type '__u32' (aka 'unsigned int')
[-Wformat]
		  bl_entry->driver_data, bl_entry->vendor,
					 ^~~~~~~~~~~~~~~~
./include/linux/hid.h:1170:48: note: expanded from macro 'dbg_hid'
	  printk(KERN_DEBUG "%s: " format, __FILE__, ##arg);      \
				   ~~~~~~              ^~~
drivers/hid/hid-quirks.c:1076:4: warning: format specifies type
'unsigned short' but the argument has type '__u32' (aka 'unsigned int')
[-Wformat]
		  bl_entry->product);
		  ^~~~~~~~~~~~~~~~~
./include/linux/hid.h:1170:48: note: expanded from macro 'dbg_hid'
	  printk(KERN_DEBUG "%s: " format, __FILE__, ##arg);      \
				   ~~~~~~              ^~~
drivers/hid/hid-quirks.c:1242:12: warning: format specifies type
'unsigned short' but the argument has type '__u32' (aka 'unsigned int')
[-Wformat]
		  quirks, hdev->vendor, hdev->product);
			  ^~~~~~~~~~~~
./include/linux/hid.h:1170:48: note: expanded from macro 'dbg_hid'
	  printk(KERN_DEBUG "%s: " format, __FILE__, ##arg);      \
				   ~~~~~~              ^~~
drivers/hid/hid-quirks.c:1242:26: warning: format specifies type
'unsigned short' but the argument has type '__u32' (aka 'unsigned int')
[-Wformat]
		  quirks, hdev->vendor, hdev->product);
					^~~~~~~~~~~~~
./include/linux/hid.h:1170:48: note: expanded from macro 'dbg_hid'
	  printk(KERN_DEBUG "%s: " format, __FILE__, ##arg);      \
				   ~~~~~~              ^~~
4 warnings generated.

This patch fixes the format strings to use the correct format type for unsigned
ints.

Link: https://github.com/ClangBuiltLinux/linux/issues/378
Signed-off-by: Louis Taylor <louis@kragniz.eu>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-quirks.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 953908f2267c0..2f9a9bf552081 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -1042,7 +1042,7 @@ static struct hid_device_id *hid_exists_dquirk(const struct hid_device *hdev)
 	}
 
 	if (bl_entry != NULL)
-		dbg_hid("Found dynamic quirk 0x%lx for HID device 0x%hx:0x%hx\n",
+		dbg_hid("Found dynamic quirk 0x%lx for HID device 0x%04x:0x%04x\n",
 			bl_entry->driver_data, bl_entry->vendor,
 			bl_entry->product);
 
@@ -1209,7 +1209,7 @@ static unsigned long hid_gets_squirk(const struct hid_device *hdev)
 		quirks |= bl_entry->driver_data;
 
 	if (quirks)
-		dbg_hid("Found squirk 0x%lx for HID device 0x%hx:0x%hx\n",
+		dbg_hid("Found squirk 0x%lx for HID device 0x%04x:0x%04x\n",
 			quirks, hdev->vendor, hdev->product);
 	return quirks;
 }
-- 
GitLab


From a23eab893476f67bd7572cdbf24498d647c86e48 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 4 Mar 2019 20:54:43 +0100
Subject: [PATCH 0026/1861] HID: hid-asus: select CONFIG_POWER_SUPPLY

The newly added power supply code fails to link when the power supply core
code is disabled:

drivers/hid/hid-asus.o: In function `asus_battery_get_property':
hid-asus.c:(.text+0x11de): undefined reference to `power_supply_get_drvdata'
drivers/hid/hid-asus.o: In function `asus_probe':
hid-asus.c:(.text+0x170c): undefined reference to `devm_power_supply_register'
hid-asus.c:(.text+0x1734): undefined reference to `power_supply_powers'
drivers/hid/hid-asus.o: In function `asus_raw_event':
hid-asus.c:(.text+0x1914): undefined reference to `power_supply_changed'

Select the subsystem from Kconfig as we do for other hid drivers already.

Fixes: 6311d329e12a ("HID: hid-asus: Add BT keyboard dock battery monitoring support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 6ca8d322b4872..4ca0cdfa6b33a 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -150,6 +150,7 @@ config HID_ASUS
 	tristate "Asus"
 	depends on LEDS_CLASS
 	depends on ASUS_WMI || ASUS_WMI=n
+	select POWER_SUPPLY
 	---help---
 	Support for Asus notebook built-in keyboard and touchpad via i2c, and
 	the Asus Republic of Gamers laptop keyboard special keys.
-- 
GitLab


From 78b92f5f00cb3dbca0553f50847232eef60ccff4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 5 Mar 2019 14:15:25 +0300
Subject: [PATCH 0027/1861] HID: quirks: Drop misused kernel-doc annotation

The kernel-doc annotation is misused for hid_mouse_ignore_list. The script
complains about it:

drivers/hid/hid-quirks.c:894: warning: cannot understand function prototype: 'const struct hid_device_id hid_mouse_ignore_list[] = '

Drop the annotation to make script happy.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-quirks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 2f9a9bf552081..1148d8c0816a3 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -855,7 +855,7 @@ static const struct hid_device_id hid_ignore_list[] = {
 	{ }
 };
 
-/**
+/*
  * hid_mouse_ignore_list - mouse devices which should not be handled by the hid layer
  *
  * There are composite devices for which we want to ignore only a certain
-- 
GitLab


From 1cbbd85fbcdce186649ce778ff1e08e3df35d285 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 2 Mar 2019 22:23:38 +0000
Subject: [PATCH 0028/1861] HID: uclogic: remove redudant duplicated null check
 on ver_ptr

Currently ver_ptr is being null checked twice, once before calling
usb_string and once afterwards.  The second null check is redundant
and can be removed, remove it.

Detected by CoverityScan, CID#1477308 ("Logically dead code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-uclogic-params.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c
index 7710d9f957da5..0187c9f8fc22c 100644
--- a/drivers/hid/hid-uclogic-params.c
+++ b/drivers/hid/hid-uclogic-params.c
@@ -735,10 +735,6 @@ static int uclogic_params_huion_init(struct uclogic_params *params,
 		goto cleanup;
 	}
 	rc = usb_string(udev, 201, ver_ptr, ver_len);
-	if (ver_ptr == NULL) {
-		rc = -ENOMEM;
-		goto cleanup;
-	}
 	if (rc == -EPIPE) {
 		*ver_ptr = '\0';
 	} else if (rc < 0) {
-- 
GitLab


From 42e4cedd67e4eb2abaa5e684353a55d4a01a913e Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Wed, 6 Mar 2019 11:24:45 +0100
Subject: [PATCH 0029/1861] ASoC: samsung: i2s: Fix DAPM routes for capture
 stream

This patch sets missing stream_name of capture part of the DAI driver
so we can define DAPM routing properly also for the capture stream.

While at it "Playback" suffix is added to the playback stream names
to clearly identify playback/capture.

Together with related dts patch this fixes NULL pointer dereference
when opening ALSA device for recording on Odroid XU3.

Fixes: 64aba9bca5bd ("ASoC: samsung: i2s: Add widgets and routes for DPCM support")
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/samsung/i2s.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 4231001226f49..ab471d550d17a 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -1130,11 +1130,11 @@ static const struct snd_soc_dapm_widget samsung_i2s_widgets[] = {
 };
 
 static const struct snd_soc_dapm_route samsung_i2s_dapm_routes[] = {
-	{ "Playback Mixer", NULL, "Primary" },
-	{ "Playback Mixer", NULL, "Secondary" },
+	{ "Playback Mixer", NULL, "Primary Playback" },
+	{ "Playback Mixer", NULL, "Secondary Playback" },
 
 	{ "Mixer DAI TX", NULL, "Playback Mixer" },
-	{ "Playback Mixer", NULL, "Mixer DAI RX" },
+	{ "Primary Capture", NULL, "Mixer DAI RX" },
 };
 
 static const struct snd_soc_component_driver samsung_i2s_component = {
@@ -1155,7 +1155,8 @@ static int i2s_alloc_dais(struct samsung_i2s_priv *priv,
 			  int num_dais)
 {
 	static const char *dai_names[] = { "samsung-i2s", "samsung-i2s-sec" };
-	static const char *stream_names[] = { "Primary", "Secondary" };
+	static const char *stream_names[] = { "Primary Playback",
+					      "Secondary Playback" };
 	struct snd_soc_dai_driver *dai_drv;
 	struct i2s_dai *dai;
 	int i;
@@ -1201,6 +1202,7 @@ static int i2s_alloc_dais(struct samsung_i2s_priv *priv,
 	dai_drv->capture.channels_max = 2;
 	dai_drv->capture.rates = i2s_dai_data->pcm_rates;
 	dai_drv->capture.formats = SAMSUNG_I2S_FMTS;
+	dai_drv->capture.stream_name = "Primary Capture";
 
 	return 0;
 }
-- 
GitLab


From 570f18b6a8d1f0e60e8caf30e66161b6438dcc91 Mon Sep 17 00:00:00 2001
From: Rander Wang <rander.wang@linux.intel.com>
Date: Fri, 8 Mar 2019 16:38:57 +0800
Subject: [PATCH 0030/1861] ASoC:soc-pcm:fix a codec fixup issue in TDM case

On HDaudio platforms, if playback is started when capture is working,
there is no audible output.

This can be root-caused to the use of the rx|tx_mask to store an HDaudio
stream tag.

If capture is stared before playback, rx_mask would be non-zero on HDaudio
platform, then the channel number of playback, which is in the same codec
dai with the capture, would be changed by soc_pcm_codec_params_fixup based
on the tx_mask at first, then overwritten by this function based on rx_mask
at last.

According to the author of tx|rx_mask, tx_mask is for playback and rx_mask
is for capture. And stream direction is checked at all other references of
tx|rx_mask in ASoC, so here should be an error. This patch checks stream
direction for tx|rx_mask for fixup function.

This issue would affect not only HDaudio+ASoC, but also I2S codecs if the
channel number based on rx_mask is not equal to the one for tx_mask. It could
be rarely reproduecd because most drivers in kernel set the same channel number
to tx|rx_mask or rx_mask is zero.

Tested on all platforms using stream_tag & HDaudio and intel I2S platforms.

Signed-off-by: Rander Wang <rander.wang@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-pcm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index a5b40e82dea4a..e70555a4ea069 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -954,10 +954,13 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 		codec_params = *params;
 
 		/* fixup params based on TDM slot masks */
-		if (codec_dai->tx_mask)
+		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
+		    codec_dai->tx_mask)
 			soc_pcm_codec_params_fixup(&codec_params,
 						   codec_dai->tx_mask);
-		if (codec_dai->rx_mask)
+
+		if (substream->stream == SNDRV_PCM_STREAM_CAPTURE &&
+		    codec_dai->rx_mask)
 			soc_pcm_codec_params_fixup(&codec_params,
 						   codec_dai->rx_mask);
 
-- 
GitLab


From 03d0aa4d4fddce4a5d865d819a4d98bfc3d451e6 Mon Sep 17 00:00:00 2001
From: Rander Wang <rander.wang@linux.intel.com>
Date: Fri, 8 Mar 2019 16:38:58 +0800
Subject: [PATCH 0031/1861] ASoC:hdac_hda:use correct format to setup hda codec

The current implementation of the hdac_hda codec results in zero-valued
samples on capture and noise with headset playback when SOF is used on
platforms with an on-board HDaudio codec. This is root-caused to SOF
using be_hw_params_fixup, and the prepare() call using invalid runtime
fields to determine the format.

This patch moves the format handling to the hw_params() callback, as
done already for hdac_hdmi, to make sure the fixed-up information is
taken into account but keeps the codec initialization in prepare() as
the stream_tag is only available at that time. Moving everything in the
prepare() callback is possible but the code is less elegant so this
two-step solution was chosen.

The solution was tested with the SST driver with no regressions, and all
the issues with SOF playback and capture are solved.

Signed-off-by: Rander Wang <rander.wang@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/hdac_hda.c | 53 +++++++++++++++++++++++++++----------
 sound/soc/codecs/hdac_hda.h |  1 +
 2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c
index ffecdaaa8cf2b..f889d94c8e3cf 100644
--- a/sound/soc/codecs/hdac_hda.c
+++ b/sound/soc/codecs/hdac_hda.c
@@ -38,6 +38,9 @@ static void hdac_hda_dai_close(struct snd_pcm_substream *substream,
 			       struct snd_soc_dai *dai);
 static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream,
 				struct snd_soc_dai *dai);
+static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream,
+				  struct snd_pcm_hw_params *params,
+				  struct snd_soc_dai *dai);
 static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream,
 				struct snd_soc_dai *dai);
 static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai,
@@ -50,6 +53,7 @@ static const struct snd_soc_dai_ops hdac_hda_dai_ops = {
 	.startup = hdac_hda_dai_open,
 	.shutdown = hdac_hda_dai_close,
 	.prepare = hdac_hda_dai_prepare,
+	.hw_params = hdac_hda_dai_hw_params,
 	.hw_free = hdac_hda_dai_hw_free,
 	.set_tdm_slot = hdac_hda_dai_set_tdm_slot,
 };
@@ -139,6 +143,39 @@ static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai,
 	return 0;
 }
 
+static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream,
+				  struct snd_pcm_hw_params *params,
+				  struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *component = dai->component;
+	struct hdac_hda_priv *hda_pvt;
+	unsigned int format_val;
+	unsigned int maxbps;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		maxbps = dai->driver->playback.sig_bits;
+	else
+		maxbps = dai->driver->capture.sig_bits;
+
+	hda_pvt = snd_soc_component_get_drvdata(component);
+	format_val = snd_hdac_calc_stream_format(params_rate(params),
+						 params_channels(params),
+						 params_format(params),
+						 maxbps,
+						 0);
+	if (!format_val) {
+		dev_err(dai->dev,
+			"invalid format_val, rate=%d, ch=%d, format=%d, maxbps=%d\n",
+			params_rate(params), params_channels(params),
+			params_format(params), maxbps);
+
+		return -EINVAL;
+	}
+
+	hda_pvt->pcm[dai->id].format_val[substream->stream] = format_val;
+	return 0;
+}
+
 static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream,
 				struct snd_soc_dai *dai)
 {
@@ -162,10 +199,9 @@ static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream,
 				struct snd_soc_dai *dai)
 {
 	struct snd_soc_component *component = dai->component;
+	struct hda_pcm_stream *hda_stream;
 	struct hdac_hda_priv *hda_pvt;
-	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct hdac_device *hdev;
-	struct hda_pcm_stream *hda_stream;
 	unsigned int format_val;
 	struct hda_pcm *pcm;
 	unsigned int stream;
@@ -179,19 +215,8 @@ static int hdac_hda_dai_prepare(struct snd_pcm_substream *substream,
 
 	hda_stream = &pcm->stream[substream->stream];
 
-	format_val = snd_hdac_calc_stream_format(runtime->rate,
-						 runtime->channels,
-						 runtime->format,
-						 hda_stream->maxbps,
-						 0);
-	if (!format_val) {
-		dev_err(&hdev->dev,
-			"invalid format_val, rate=%d, ch=%d, format=%d\n",
-			runtime->rate, runtime->channels, runtime->format);
-		return -EINVAL;
-	}
-
 	stream = hda_pvt->pcm[dai->id].stream_tag[substream->stream];
+	format_val = hda_pvt->pcm[dai->id].format_val[substream->stream];
 
 	ret = snd_hda_codec_prepare(&hda_pvt->codec, hda_stream,
 				    stream, format_val, substream);
diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h
index e444ef5933606..6b1bd4f428e70 100644
--- a/sound/soc/codecs/hdac_hda.h
+++ b/sound/soc/codecs/hdac_hda.h
@@ -8,6 +8,7 @@
 
 struct hdac_hda_pcm {
 	int stream_tag[2];
+	unsigned int format_val[2];
 };
 
 struct hdac_hda_priv {
-- 
GitLab


From c899df3e9b0bf7b76e642aed1a214582ea7012d5 Mon Sep 17 00:00:00 2001
From: Rander Wang <rander.wang@linux.intel.com>
Date: Fri, 8 Mar 2019 16:38:59 +0800
Subject: [PATCH 0032/1861] ASoC:intel:skl:fix a simultaneous playback &
 capture issue on hda platform

If playback and capture are enabled concurrently, when the capture stops
the output becomes inaudile. The playback application will become stuck
and underrun after a timeout.

This is caused by mistaken use of the stream_id, which should only be
set for playback and not for capture

Tested on Apollolake and Kabylake with SST driver.

Signed-off-by: Rander Wang <rander.wang@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/skylake/skl-pcm.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index a4284778f117d..bb463ee6ff842 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -181,6 +181,7 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 	struct hdac_stream *hstream;
 	struct hdac_ext_stream *stream;
 	struct hdac_ext_link *link;
+	unsigned char stream_tag;
 
 	hstream = snd_hdac_get_stream(bus, params->stream,
 					params->link_dma_id + 1);
@@ -199,10 +200,13 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 
 	snd_hdac_ext_link_stream_setup(stream, format_val);
 
-	list_for_each_entry(link, &bus->hlink_list, list) {
-		if (link->index == params->link_index)
-			snd_hdac_ext_link_set_stream_id(link,
-					hstream->stream_tag);
+	stream_tag = hstream->stream_tag;
+	if (stream->hstream.direction == SNDRV_PCM_STREAM_PLAYBACK) {
+		list_for_each_entry(link, &bus->hlink_list, list) {
+			if (link->index == params->link_index)
+				snd_hdac_ext_link_set_stream_id(link,
+								stream_tag);
+		}
 	}
 
 	stream->link_prepared = 1;
@@ -645,6 +649,7 @@ static int skl_link_hw_free(struct snd_pcm_substream *substream,
 	struct hdac_ext_stream *link_dev =
 				snd_soc_dai_get_dma_data(dai, substream);
 	struct hdac_ext_link *link;
+	unsigned char stream_tag;
 
 	dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name);
 
@@ -654,7 +659,11 @@ static int skl_link_hw_free(struct snd_pcm_substream *substream,
 	if (!link)
 		return -EINVAL;
 
-	snd_hdac_ext_link_clear_stream_id(link, hdac_stream(link_dev)->stream_tag);
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		stream_tag = hdac_stream(link_dev)->stream_tag;
+		snd_hdac_ext_link_clear_stream_id(link, stream_tag);
+	}
+
 	snd_hdac_ext_stream_release(link_dev, HDAC_EXT_STREAM_TYPE_LINK);
 	return 0;
 }
-- 
GitLab


From a9764869779081e8bf24da07ac040e8f3efcf13a Mon Sep 17 00:00:00 2001
From: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Date: Fri, 8 Mar 2019 13:05:53 +0800
Subject: [PATCH 0033/1861] ASoC: dpcm: prevent snd_soc_dpcm use after free

The dpcm get from fe_clients/be_clients
may be free before use

Add a spin lock at snd_soc_card level,
to protect the dpcm instance.
The lock may be used in atomic context, so use spin lock.

Use irq spin lock version,
since the lock may be used in interrupts.

possible race condition between
void dpcm_be_disconnect(
	...
	list_del(&dpcm->list_be);
	list_del(&dpcm->list_fe);
	kfree(dpcm);
	...

and
	for_each_dpcm_fe()
	for_each_dpcm_be*()

race condition example
Thread 1:
    snd_soc_dapm_mixer_update_power()
        -> soc_dpcm_runtime_update()
            -> dpcm_be_disconnect()
                -> kfree(dpcm);
Thread 2:
    dpcm_fe_dai_trigger()
        -> dpcm_be_dai_trigger()
            -> snd_soc_dpcm_can_be_free_stop()
                -> if (dpcm->fe == fe)

Excpetion Scenario:
	two FE link to same BE
	FE1 -> BE
	FE2 ->

	Thread 1: switch of mixer between FE2 -> BE
	Thread 2: pcm_stop FE1

Exception:

Unable to handle kernel paging request at virtual address dead0000000000e0

pc=<> [<ffffff8960e2cd10>] dpcm_be_dai_trigger+0x29c/0x47c
	sound/soc/soc-pcm.c:3226
		if (dpcm->fe == fe)
lr=<> [<ffffff8960e2f694>] dpcm_fe_dai_do_trigger+0x94/0x26c

Backtrace:
[<ffffff89602dba80>] notify_die+0x68/0xb8
[<ffffff896028c7dc>] die+0x118/0x2a8
[<ffffff89602a2f84>] __do_kernel_fault+0x13c/0x14c
[<ffffff89602a27f4>] do_translation_fault+0x64/0xa0
[<ffffff8960280cf8>] do_mem_abort+0x4c/0xd0
[<ffffff8960282ad0>] el1_da+0x24/0x40
[<ffffff8960e2cd10>] dpcm_be_dai_trigger+0x29c/0x47c
[<ffffff8960e2f694>] dpcm_fe_dai_do_trigger+0x94/0x26c
[<ffffff8960e2edec>] dpcm_fe_dai_trigger+0x3c/0x44
[<ffffff8960de5588>] snd_pcm_do_stop+0x50/0x5c
[<ffffff8960dded24>] snd_pcm_action+0xb4/0x13c
[<ffffff8960ddfdb4>] snd_pcm_drop+0xa0/0x128
[<ffffff8960de69bc>] snd_pcm_common_ioctl+0x9d8/0x30f0
[<ffffff8960de1cac>] snd_pcm_ioctl_compat+0x29c/0x2f14
[<ffffff89604c9d60>] compat_SyS_ioctl+0x128/0x244
[<ffffff8960283740>] el0_svc_naked+0x34/0x38
[<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  2 ++
 sound/soc/soc-core.c |  1 +
 sound/soc/soc-pcm.c  | 40 +++++++++++++++++++++++++++++++++-------
 3 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/include/sound/soc.h b/include/sound/soc.h
index eb7db605955b8..1e2be35ed36f2 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1083,6 +1083,8 @@ struct snd_soc_card {
 	struct mutex mutex;
 	struct mutex dapm_mutex;
 
+	spinlock_t dpcm_lock;
+
 	bool instantiated;
 	bool topology_shortname_created;
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 5a5764dba1479..d887576597290 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2820,6 +2820,7 @@ int snd_soc_register_card(struct snd_soc_card *card)
 	card->instantiated = 0;
 	mutex_init(&card->mutex);
 	mutex_init(&card->dapm_mutex);
+	spin_lock_init(&card->dpcm_lock);
 
 	return snd_soc_bind_card(card);
 }
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index e70555a4ea069..90504c1499314 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1216,6 +1216,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
 		struct snd_soc_pcm_runtime *be, int stream)
 {
 	struct snd_soc_dpcm *dpcm;
+	unsigned long flags;
 
 	/* only add new dpcms */
 	for_each_dpcm_be(fe, stream, dpcm) {
@@ -1231,8 +1232,10 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
 	dpcm->fe = fe;
 	be->dpcm[stream].runtime = fe->dpcm[stream].runtime;
 	dpcm->state = SND_SOC_DPCM_LINK_STATE_NEW;
+	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
 	list_add(&dpcm->list_be, &fe->dpcm[stream].be_clients);
 	list_add(&dpcm->list_fe, &be->dpcm[stream].fe_clients);
+	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
 
 	dev_dbg(fe->dev, "connected new DPCM %s path %s %s %s\n",
 			stream ? "capture" : "playback",  fe->dai_link->name,
@@ -1278,6 +1281,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe,
 void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm, *d;
+	unsigned long flags;
 
 	for_each_dpcm_be_safe(fe, stream, dpcm, d) {
 		dev_dbg(fe->dev, "ASoC: BE %s disconnect check for %s\n",
@@ -1297,8 +1301,10 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
 #ifdef CONFIG_DEBUG_FS
 		debugfs_remove(dpcm->debugfs_state);
 #endif
+		spin_lock_irqsave(&fe->card->dpcm_lock, flags);
 		list_del(&dpcm->list_be);
 		list_del(&dpcm->list_fe);
+		spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
 		kfree(dpcm);
 	}
 }
@@ -1550,10 +1556,13 @@ int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
 void dpcm_clear_pending_state(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_soc_dpcm *dpcm;
+	unsigned long flags;
 
+	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
 	for_each_dpcm_be(fe, stream, dpcm)
 		dpcm->be->dpcm[stream].runtime_update =
 						SND_SOC_DPCM_UPDATE_NO;
+	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
 }
 
 static void dpcm_be_dai_startup_unwind(struct snd_soc_pcm_runtime *fe,
@@ -2574,6 +2583,7 @@ static int dpcm_run_update_startup(struct snd_soc_pcm_runtime *fe, int stream)
 	struct snd_soc_dpcm *dpcm;
 	enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream];
 	int ret;
+	unsigned long flags;
 
 	dev_dbg(fe->dev, "ASoC: runtime %s open on FE %s\n",
 			stream ? "capture" : "playback", fe->dai_link->name);
@@ -2643,11 +2653,13 @@ close:
 	dpcm_be_dai_shutdown(fe, stream);
 disconnect:
 	/* disconnect any non started BEs */
+	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
 	for_each_dpcm_be(fe, stream, dpcm) {
 		struct snd_soc_pcm_runtime *be = dpcm->be;
 		if (be->dpcm[stream].state != SND_SOC_DPCM_STATE_START)
 				dpcm->state = SND_SOC_DPCM_LINK_STATE_FREE;
 	}
+	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
 
 	return ret;
 }
@@ -3223,7 +3235,10 @@ int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe,
 {
 	struct snd_soc_dpcm *dpcm;
 	int state;
+	int ret = 1;
+	unsigned long flags;
 
+	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
 	for_each_dpcm_fe(be, stream, dpcm) {
 
 		if (dpcm->fe == fe)
@@ -3232,12 +3247,15 @@ int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe,
 		state = dpcm->fe->dpcm[stream].state;
 		if (state == SND_SOC_DPCM_STATE_START ||
 			state == SND_SOC_DPCM_STATE_PAUSED ||
-			state == SND_SOC_DPCM_STATE_SUSPEND)
-			return 0;
+			state == SND_SOC_DPCM_STATE_SUSPEND) {
+			ret = 0;
+			break;
+		}
 	}
+	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
 
 	/* it's safe to free/stop this BE DAI */
-	return 1;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_free_stop);
 
@@ -3250,7 +3268,10 @@ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe,
 {
 	struct snd_soc_dpcm *dpcm;
 	int state;
+	int ret = 1;
+	unsigned long flags;
 
+	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
 	for_each_dpcm_fe(be, stream, dpcm) {
 
 		if (dpcm->fe == fe)
@@ -3260,12 +3281,15 @@ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe,
 		if (state == SND_SOC_DPCM_STATE_START ||
 			state == SND_SOC_DPCM_STATE_PAUSED ||
 			state == SND_SOC_DPCM_STATE_SUSPEND ||
-			state == SND_SOC_DPCM_STATE_PREPARE)
-			return 0;
+			state == SND_SOC_DPCM_STATE_PREPARE) {
+			ret = 0;
+			break;
+		}
 	}
+	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
 
 	/* it's safe to change hw_params */
-	return 1;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_params);
 
@@ -3304,6 +3328,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
 	struct snd_pcm_hw_params *params = &fe->dpcm[stream].hw_params;
 	struct snd_soc_dpcm *dpcm;
 	ssize_t offset = 0;
+	unsigned long flags;
 
 	/* FE state */
 	offset += snprintf(buf + offset, size - offset,
@@ -3331,6 +3356,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
 		goto out;
 	}
 
+	spin_lock_irqsave(&fe->card->dpcm_lock, flags);
 	for_each_dpcm_be(fe, stream, dpcm) {
 		struct snd_soc_pcm_runtime *be = dpcm->be;
 		params = &dpcm->hw_params;
@@ -3351,7 +3377,7 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe,
 				params_channels(params),
 				params_rate(params));
 	}
-
+	spin_unlock_irqrestore(&fe->card->dpcm_lock, flags);
 out:
 	return offset;
 }
-- 
GitLab


From 844a4a362dbec166b44d6b9b3dd45b08cb273703 Mon Sep 17 00:00:00 2001
From: John Hsu <KCHSU0@nuvoton.com>
Date: Mon, 11 Mar 2019 09:36:45 +0800
Subject: [PATCH 0034/1861] ASoC: nau8824: fix the issue of the widget with
 prefix name

The driver has two issues when machine add prefix name for codec.
(1)The stream name of DAI can't find the AIF widgets.
(2)The drivr can enable/disalbe the MICBIAS and SAR widgets.

The patch will fix these issues caused by prefixed name added.

Signed-off-by: John Hsu <KCHSU0@nuvoton.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/nau8824.c | 46 +++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c
index 87ed3dc496dc2..5ab05e75edeac 100644
--- a/sound/soc/codecs/nau8824.c
+++ b/sound/soc/codecs/nau8824.c
@@ -681,8 +681,8 @@ static const struct snd_soc_dapm_widget nau8824_dapm_widgets[] = {
 	SND_SOC_DAPM_ADC("ADCR", NULL, NAU8824_REG_ANALOG_ADC_2,
 		NAU8824_ADCR_EN_SFT, 0),
 
-	SND_SOC_DAPM_AIF_OUT("AIFTX", "HiFi Capture", 0, SND_SOC_NOPM, 0, 0),
-	SND_SOC_DAPM_AIF_IN("AIFRX", "HiFi Playback", 0, SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("AIFTX", "Capture", 0, SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_AIF_IN("AIFRX", "Playback", 0, SND_SOC_NOPM, 0, 0),
 
 	SND_SOC_DAPM_DAC("DACL", NULL, NAU8824_REG_RDAC,
 		NAU8824_DACL_EN_SFT, 0),
@@ -831,6 +831,36 @@ static void nau8824_int_status_clear_all(struct regmap *regmap)
 	}
 }
 
+static void nau8824_dapm_disable_pin(struct nau8824 *nau8824, const char *pin)
+{
+	struct snd_soc_dapm_context *dapm = nau8824->dapm;
+	const char *prefix = dapm->component->name_prefix;
+	char prefixed_pin[80];
+
+	if (prefix) {
+		snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s",
+			 prefix, pin);
+		snd_soc_dapm_disable_pin(dapm, prefixed_pin);
+	} else {
+		snd_soc_dapm_disable_pin(dapm, pin);
+	}
+}
+
+static void nau8824_dapm_enable_pin(struct nau8824 *nau8824, const char *pin)
+{
+	struct snd_soc_dapm_context *dapm = nau8824->dapm;
+	const char *prefix = dapm->component->name_prefix;
+	char prefixed_pin[80];
+
+	if (prefix) {
+		snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s",
+			 prefix, pin);
+		snd_soc_dapm_force_enable_pin(dapm, prefixed_pin);
+	} else {
+		snd_soc_dapm_force_enable_pin(dapm, pin);
+	}
+}
+
 static void nau8824_eject_jack(struct nau8824 *nau8824)
 {
 	struct snd_soc_dapm_context *dapm = nau8824->dapm;
@@ -839,8 +869,8 @@ static void nau8824_eject_jack(struct nau8824 *nau8824)
 	/* Clear all interruption status */
 	nau8824_int_status_clear_all(regmap);
 
-	snd_soc_dapm_disable_pin(dapm, "SAR");
-	snd_soc_dapm_disable_pin(dapm, "MICBIAS");
+	nau8824_dapm_disable_pin(nau8824, "SAR");
+	nau8824_dapm_disable_pin(nau8824, "MICBIAS");
 	snd_soc_dapm_sync(dapm);
 
 	/* Enable the insertion interruption, disable the ejection
@@ -870,8 +900,8 @@ static void nau8824_jdet_work(struct work_struct *work)
 	struct regmap *regmap = nau8824->regmap;
 	int adc_value, event = 0, event_mask = 0;
 
-	snd_soc_dapm_force_enable_pin(dapm, "MICBIAS");
-	snd_soc_dapm_force_enable_pin(dapm, "SAR");
+	nau8824_dapm_enable_pin(nau8824, "MICBIAS");
+	nau8824_dapm_enable_pin(nau8824, "SAR");
 	snd_soc_dapm_sync(dapm);
 
 	msleep(100);
@@ -882,8 +912,8 @@ static void nau8824_jdet_work(struct work_struct *work)
 	if (adc_value < HEADSET_SARADC_THD) {
 		event |= SND_JACK_HEADPHONE;
 
-		snd_soc_dapm_disable_pin(dapm, "SAR");
-		snd_soc_dapm_disable_pin(dapm, "MICBIAS");
+		nau8824_dapm_disable_pin(nau8824, "SAR");
+		nau8824_dapm_disable_pin(nau8824, "MICBIAS");
 		snd_soc_dapm_sync(dapm);
 	} else {
 		event |= SND_JACK_HEADSET;
-- 
GitLab


From a39fe6e2061615496c12825d6d249fedf1974f8a Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Mon, 11 Mar 2019 16:39:28 +0100
Subject: [PATCH 0035/1861] ASoC: stm32: i2s: fix registers declaration in
 regmap

- Declare SR as volatile, as it is changed by hardware.
- Remove TXDR from readable and volatile register list,
as it is intended for write accesses only.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_i2s.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c
index 47c334de6b096..8968458eec62d 100644
--- a/sound/soc/stm/stm32_i2s.c
+++ b/sound/soc/stm/stm32_i2s.c
@@ -281,7 +281,6 @@ static bool stm32_i2s_readable_reg(struct device *dev, unsigned int reg)
 	case STM32_I2S_CFG2_REG:
 	case STM32_I2S_IER_REG:
 	case STM32_I2S_SR_REG:
-	case STM32_I2S_TXDR_REG:
 	case STM32_I2S_RXDR_REG:
 	case STM32_I2S_CGFR_REG:
 		return true;
@@ -293,7 +292,7 @@ static bool stm32_i2s_readable_reg(struct device *dev, unsigned int reg)
 static bool stm32_i2s_volatile_reg(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
-	case STM32_I2S_TXDR_REG:
+	case STM32_I2S_SR_REG:
 	case STM32_I2S_RXDR_REG:
 		return true;
 	default:
-- 
GitLab


From ba164a49f8f7390b036713bf8a70a150a938c670 Mon Sep 17 00:00:00 2001
From: Jiada Wang <jiada_wang@mentor.com>
Date: Thu, 7 Mar 2019 15:15:53 +0900
Subject: [PATCH 0036/1861] ASoC: rsnd: src: Avoid a potential deadlock

lockdep warns us that priv->lock and k->k_lock can cause a
deadlock when after acquire of k->k_lock, process is interrupted
by src, while in another routine of src .init, k->k_lock is
acquired with priv->lock held.

This patch avoids a potential deadlock by not calling soc_device_match()
in SRC .init callback, instead it adds new soc fields in priv->flags to
differentiate SoCs.

Fixes: linux-next commit 7674bec4fc09 ("ASoC: rsnd: update BSDSR/BSDISR handling")
Signed-off-by: Jiada Wang <jiada_wang@mentor.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/core.c | 2 ++
 sound/soc/sh/rcar/rsnd.h | 5 +++++
 sound/soc/sh/rcar/src.c  | 9 +--------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index 9834474684b1b..8d3758f862f10 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -110,6 +110,8 @@ static const struct of_device_id rsnd_of_match[] = {
 	{ .compatible = "renesas,rcar_sound-gen1", .data = (void *)RSND_GEN1 },
 	{ .compatible = "renesas,rcar_sound-gen2", .data = (void *)RSND_GEN2 },
 	{ .compatible = "renesas,rcar_sound-gen3", .data = (void *)RSND_GEN3 },
+	/* Special Handling */
+	{ .compatible = "renesas,rcar_sound-r8a77990", .data = (void *)(RSND_GEN3 | RSND_SOC_E) },
 	{},
 };
 MODULE_DEVICE_TABLE(of, rsnd_of_match);
diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h
index 90625c57847b5..0e6ef4e184002 100644
--- a/sound/soc/sh/rcar/rsnd.h
+++ b/sound/soc/sh/rcar/rsnd.h
@@ -607,6 +607,8 @@ struct rsnd_priv {
 #define RSND_GEN1	(1 << 0)
 #define RSND_GEN2	(2 << 0)
 #define RSND_GEN3	(3 << 0)
+#define RSND_SOC_MASK	(0xFF << 4)
+#define RSND_SOC_E	(1 << 4) /* E1/E2/E3 */
 
 	/*
 	 * below value will be filled on rsnd_gen_probe()
@@ -679,6 +681,9 @@ struct rsnd_priv {
 #define rsnd_is_gen1(priv)	(((priv)->flags & RSND_GEN_MASK) == RSND_GEN1)
 #define rsnd_is_gen2(priv)	(((priv)->flags & RSND_GEN_MASK) == RSND_GEN2)
 #define rsnd_is_gen3(priv)	(((priv)->flags & RSND_GEN_MASK) == RSND_GEN3)
+#define rsnd_is_e3(priv)	(((priv)->flags & \
+					(RSND_GEN_MASK | RSND_SOC_MASK)) == \
+					(RSND_GEN3 | RSND_SOC_E))
 
 #define rsnd_flags_has(p, f) ((p)->flags & (f))
 #define rsnd_flags_set(p, f) ((p)->flags |= (f))
diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
index db81e066b92ef..45096a66c0743 100644
--- a/sound/soc/sh/rcar/src.c
+++ b/sound/soc/sh/rcar/src.c
@@ -14,7 +14,6 @@
  */
 
 #include "rsnd.h"
-#include <linux/sys_soc.h>
 
 #define SRC_NAME "src"
 
@@ -189,18 +188,12 @@ const static u32 chan222222[] = {
 	0x00000006, /* 1 to 2 */
 };
 
-static const struct soc_device_attribute ov_soc[] = {
-	{ .soc_id = "r8a77990" }, /* E3 */
-	{ /* sentinel */ }
-};
-
 static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
 				      struct rsnd_mod *mod)
 {
 	struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
 	struct device *dev = rsnd_priv_to_dev(priv);
 	struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
-	const struct soc_device_attribute *soc = soc_device_match(ov_soc);
 	int is_play = rsnd_io_is_play(io);
 	int use_src = 0;
 	u32 fin, fout;
@@ -307,7 +300,7 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
 	/*
 	 * E3 need to overwrite
 	 */
-	if (soc)
+	if (rsnd_is_e3(priv))
 		switch (rsnd_mod_id(mod)) {
 		case 0:
 		case 4:
-- 
GitLab


From 399706df420ea8bc8b31283a919e6475f737d0ea Mon Sep 17 00:00:00 2001
From: Jiada Wang <jiada_wang@mentor.com>
Date: Thu, 7 Mar 2019 15:15:54 +0900
Subject: [PATCH 0037/1861] ASoC: rsnd: src: fix compiler warnings

compiler complains about following declarations

sound/soc/sh/rcar/src.c:174:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration]
 const static u32 bsdsr_table_pattern1[] = {
 ^~~~~
sound/soc/sh/rcar/src.c:183:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration]
 const static u32 bsdsr_table_pattern2[] = {
 ^~~~~
sound/soc/sh/rcar/src.c:192:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration]
 const static u32 bsisr_table[] = {
 ^~~~~
sound/soc/sh/rcar/src.c:201:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration]
 const static u32 chan288888[] = {
 ^~~~~
sound/soc/sh/rcar/src.c:210:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration]
 const static u32 chan244888[] = {
 ^~~~~
sound/soc/sh/rcar/src.c:219:1: warning: 'static' is not at beginning of declaration [-Wold-style-declaration]
 const static u32 chan222222[] = {
 ^~~~~

This patch moves the 'static' keyword to the front of the
declaration to fix the compiler warnings

Fixes: linux-next commit 7674bec4fc09 ("ASoC: rsnd: update BSDSR/BSDISR handling")
Signed-off-by: Jiada Wang <jiada_wang@mentor.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/src.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
index 45096a66c0743..585ffba0244b9 100644
--- a/sound/soc/sh/rcar/src.c
+++ b/sound/soc/sh/rcar/src.c
@@ -134,7 +134,7 @@ unsigned int rsnd_src_get_rate(struct rsnd_priv *priv,
 	return rate;
 }
 
-const static u32 bsdsr_table_pattern1[] = {
+static const u32 bsdsr_table_pattern1[] = {
 	0x01800000, /* 6 - 1/6 */
 	0x01000000, /* 6 - 1/4 */
 	0x00c00000, /* 6 - 1/3 */
@@ -143,7 +143,7 @@ const static u32 bsdsr_table_pattern1[] = {
 	0x00400000, /* 6 - 1   */
 };
 
-const static u32 bsdsr_table_pattern2[] = {
+static const u32 bsdsr_table_pattern2[] = {
 	0x02400000, /* 6 - 1/6 */
 	0x01800000, /* 6 - 1/4 */
 	0x01200000, /* 6 - 1/3 */
@@ -152,7 +152,7 @@ const static u32 bsdsr_table_pattern2[] = {
 	0x00600000, /* 6 - 1   */
 };
 
-const static u32 bsisr_table[] = {
+static const u32 bsisr_table[] = {
 	0x00100060, /* 6 - 1/6 */
 	0x00100040, /* 6 - 1/4 */
 	0x00100030, /* 6 - 1/3 */
@@ -161,7 +161,7 @@ const static u32 bsisr_table[] = {
 	0x00100020, /* 6 - 1   */
 };
 
-const static u32 chan288888[] = {
+static const u32 chan288888[] = {
 	0x00000006, /* 1 to 2 */
 	0x000001fe, /* 1 to 8 */
 	0x000001fe, /* 1 to 8 */
@@ -170,7 +170,7 @@ const static u32 chan288888[] = {
 	0x000001fe, /* 1 to 8 */
 };
 
-const static u32 chan244888[] = {
+static const u32 chan244888[] = {
 	0x00000006, /* 1 to 2 */
 	0x0000001e, /* 1 to 4 */
 	0x0000001e, /* 1 to 4 */
@@ -179,7 +179,7 @@ const static u32 chan244888[] = {
 	0x000001fe, /* 1 to 8 */
 };
 
-const static u32 chan222222[] = {
+static const u32 chan222222[] = {
 	0x00000006, /* 1 to 2 */
 	0x00000006, /* 1 to 2 */
 	0x00000006, /* 1 to 2 */
-- 
GitLab


From 54d1cf78b0f4ba348a7c7fb8b7d0708d71b6cc8a Mon Sep 17 00:00:00 2001
From: John Hsu <KCHSU0@nuvoton.com>
Date: Wed, 13 Mar 2019 16:23:44 +0800
Subject: [PATCH 0038/1861] ASoC: nau8810: fix the issue of widget with
 prefixed name

The driver changes the stream name of DAC and ADC to avoid the issue of
widget with prefixed name. When the machine adds prefixed name for codec,
the stream name of DAI may not find the widgets.

Signed-off-by: John Hsu <KCHSU0@nuvoton.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/nau8810.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/nau8810.c b/sound/soc/codecs/nau8810.c
index bfd74b86c9d2f..645aa07941237 100644
--- a/sound/soc/codecs/nau8810.c
+++ b/sound/soc/codecs/nau8810.c
@@ -411,9 +411,9 @@ static const struct snd_soc_dapm_widget nau8810_dapm_widgets[] = {
 	SND_SOC_DAPM_MIXER("Mono Mixer", NAU8810_REG_POWER3,
 		NAU8810_MOUTMX_EN_SFT, 0, &nau8810_mono_mixer_controls[0],
 		ARRAY_SIZE(nau8810_mono_mixer_controls)),
-	SND_SOC_DAPM_DAC("DAC", "HiFi Playback", NAU8810_REG_POWER3,
+	SND_SOC_DAPM_DAC("DAC", "Playback", NAU8810_REG_POWER3,
 		NAU8810_DAC_EN_SFT, 0),
-	SND_SOC_DAPM_ADC("ADC", "HiFi Capture", NAU8810_REG_POWER2,
+	SND_SOC_DAPM_ADC("ADC", "Capture", NAU8810_REG_POWER2,
 		NAU8810_ADC_EN_SFT, 0),
 	SND_SOC_DAPM_PGA("SpkN Out", NAU8810_REG_POWER3,
 		NAU8810_NSPK_EN_SFT, 0, NULL, 0),
-- 
GitLab


From 2b13bee3884926cba22061efa75bd315e871de24 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Tue, 12 Mar 2019 18:40:06 +0100
Subject: [PATCH 0039/1861] ASoC: samsung: odroid: Fix clock configuration for
 44100 sample rate

After commit fbeec965b8d1c ("ASoC: samsung: odroid: Fix 32000 sample rate
handling") the audio root clock frequency is configured improperly for
44100 sample rate. Due to clock rate rounding it's 20070401 Hz instead
of 22579000 Hz. This results in a too low value of the PSR clock divider
in the CPU DAI driver and too fast actual sample rate for fs=44100. E.g.
1 kHz tone has actual 1780 Hz frequency (1 kHz * 20070401/22579000 * 2).

Fix this by increasing the correction passed to clk_set_rate() to take
into account inaccuracy of the EPLL frequency properly.

Fixes: fbeec965b8d1c ("ASoC: samsung: odroid: Fix 32000 sample rate handling")
Reported-by: JaeChul Lee <jcsing.lee@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/samsung/odroid.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c
index 694512f980fdc..1dc54c4206f0a 100644
--- a/sound/soc/samsung/odroid.c
+++ b/sound/soc/samsung/odroid.c
@@ -91,11 +91,11 @@ static int odroid_card_be_hw_params(struct snd_pcm_substream *substream,
 		return ret;
 
 	/*
-	 *  We add 1 to the rclk_freq value in order to avoid too low clock
+	 *  We add 2 to the rclk_freq value in order to avoid too low clock
 	 *  frequency values due to the EPLL output frequency not being exact
 	 *  multiple of the audio sampling rate.
 	 */
-	rclk_freq = params_rate(params) * rfs + 1;
+	rclk_freq = params_rate(params) * rfs + 2;
 
 	ret = clk_set_rate(priv->sclk_i2s, rclk_freq);
 	if (ret < 0)
-- 
GitLab


From 11cf9d863dcb583345723b0ed72173348761e9c0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Sat, 9 Mar 2019 17:37:21 +0530
Subject: [PATCH 0040/1861] fs/dax: Deposit pagetable even when installing zero
 page

Architectures like ppc64 use the deposited page table to store hardware
page table slot information. Make sure we deposit a page table when
using zero page at the pmd level for hash.

Without this we hit

Unable to handle kernel paging request for data at address 0x00000000
Faulting instruction address: 0xc000000000082a74
Oops: Kernel access of bad area, sig: 11 [#1]
....

NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0
LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0
Call Trace:
 hash_page_mm+0x43c/0x740
 do_hash_page+0x2c/0x3c
 copy_from_iter_flushcache+0xa4/0x4a0
 pmem_copy_from_iter+0x2c/0x50 [nd_pmem]
 dax_copy_from_iter+0x40/0x70
 dax_iomap_actor+0x134/0x360
 iomap_apply+0xfc/0x1b0
 dax_iomap_rw+0xac/0x130
 ext4_file_write_iter+0x254/0x460 [ext4]
 __vfs_write+0x120/0x1e0
 vfs_write+0xd8/0x220
 SyS_write+0x6c/0x110
 system_call+0x3c/0x130

Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
Cc: <stable@vger.kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index ca0671d55aa69..e5e54da1715f6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -33,6 +33,7 @@
 #include <linux/sizes.h>
 #include <linux/mmu_notifier.h>
 #include <linux/iomap.h>
+#include <asm/pgalloc.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -1407,7 +1408,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 {
 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	unsigned long pmd_addr = vmf->address & PMD_MASK;
+	struct vm_area_struct *vma = vmf->vma;
 	struct inode *inode = mapping->host;
+	pgtable_t pgtable = NULL;
 	struct page *zero_page;
 	spinlock_t *ptl;
 	pmd_t pmd_entry;
@@ -1422,12 +1425,22 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
 			DAX_PMD | DAX_ZERO_PAGE, false);
 
+	if (arch_needs_pgtable_deposit()) {
+		pgtable = pte_alloc_one(vma->vm_mm);
+		if (!pgtable)
+			return VM_FAULT_OOM;
+	}
+
 	ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
 	if (!pmd_none(*(vmf->pmd))) {
 		spin_unlock(ptl);
 		goto fallback;
 	}
 
+	if (pgtable) {
+		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+		mm_inc_nr_ptes(vma->vm_mm);
+	}
 	pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
 	pmd_entry = pmd_mkhuge(pmd_entry);
 	set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
@@ -1436,6 +1449,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 	return VM_FAULT_NOPAGE;
 
 fallback:
+	if (pgtable)
+		pte_free(vma->vm_mm, pgtable);
 	trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
 	return VM_FAULT_FALLBACK;
 }
-- 
GitLab


From 6ee02a54ef990a71bf542b6f0a4e3321de9d9c66 Mon Sep 17 00:00:00 2001
From: Su Yanjun <suyj.fnst@cn.fujitsu.com>
Date: Thu, 14 Mar 2019 14:59:42 +0800
Subject: [PATCH 0041/1861] xfrm6_tunnel: Fix potential panic when unloading
 xfrm6_tunnel module

When unloading xfrm6_tunnel module, xfrm6_tunnel_fini directly
frees the xfrm6_tunnel_spi_kmem. Maybe someone has gotten the
xfrm6_tunnel_spi, so need to wait it.

Fixes: 91cc3bb0b04ff("xfrm6_tunnel: RCU conversion")
Signed-off-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_tunnel.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index bc65db782bfb1..12cb3aa990af4 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -402,6 +402,10 @@ static void __exit xfrm6_tunnel_fini(void)
 	xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
 	xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
 	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+	/* Someone maybe has gotten the xfrm6_tunnel_spi.
+	 * So need to wait it.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
 }
 
-- 
GitLab


From 06003531502d06bc89d32528f6ec96bf978790f9 Mon Sep 17 00:00:00 2001
From: Jean-Francois Dagenais <jeff.dagenais@gmail.com>
Date: Wed, 6 Mar 2019 15:56:06 -0500
Subject: [PATCH 0042/1861] iio: dac: mcp4725: add missing powerdown bits in
 store eeprom

When issuing the write DAC register and write eeprom command, the two
powerdown bits (PD0 and PD1) are assumed by the chip to be present in
the bytes sent. Leaving them at 0 implies "powerdown disabled" which is
a different state that the current one. By adding the current state of
the powerdown in the i2c write, the chip will correctly power-on exactly
like as it is at the moment of store_eeprom call.

This is documented in MCP4725's datasheet, FIGURE 6-2: "Write Commands
for DAC Input Register and EEPROM" and MCP4726's datasheet, FIGURE 6-3:
"Write All Memory Command".

Signed-off-by: Jean-Francois Dagenais <jeff.dagenais@gmail.com>
Acked-by: Peter Meerwald-Stadler <pmeerw@pmeerw.net>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/mcp4725.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index 6d71fd905e29d..c701a45469f64 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -92,6 +92,7 @@ static ssize_t mcp4725_store_eeprom(struct device *dev,
 
 	inoutbuf[0] = 0x60; /* write EEPROM */
 	inoutbuf[0] |= data->ref_mode << 3;
+	inoutbuf[0] |= data->powerdown ? ((data->powerdown_mode + 1) << 1) : 0;
 	inoutbuf[1] = data->dac_value >> 4;
 	inoutbuf[2] = (data->dac_value & 0xf) << 4;
 
-- 
GitLab


From 62039b6aef63380ba7a37c113bbaeee8a55c5342 Mon Sep 17 00:00:00 2001
From: Sven Van Asbroeck <thesven73@gmail.com>
Date: Sun, 10 Mar 2019 14:58:24 -0400
Subject: [PATCH 0043/1861] iio: adc: xilinx: fix potential use-after-free on
 remove

When cancel_delayed_work() returns, the delayed work may still
be running. This means that the core could potentially free
the private structure (struct xadc) while the delayed work
is still using it. This is a potential use-after-free.

Fix by calling cancel_delayed_work_sync(), which waits for
any residual work to finish before returning.

Signed-off-by: Sven Van Asbroeck <TheSven73@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/xilinx-xadc-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index b13c61539d46b..ef3afaeed1948 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -1322,7 +1322,7 @@ static int xadc_remove(struct platform_device *pdev)
 	}
 	free_irq(xadc->irq, indio_dev);
 	clk_disable_unprepare(xadc->clk);
-	cancel_delayed_work(&xadc->zynq_unmask_work);
+	cancel_delayed_work_sync(&xadc->zynq_unmask_work);
 	kfree(xadc->data);
 	kfree(indio_dev->channels);
 
-- 
GitLab


From 862e4644fd2d7df8998edc65e0963ea2f567bde9 Mon Sep 17 00:00:00 2001
From: Sven Van Asbroeck <thesven73@gmail.com>
Date: Sun, 10 Mar 2019 14:58:25 -0400
Subject: [PATCH 0044/1861] iio: adc: xilinx: fix potential use-after-free on
 probe

If probe errors out after request_irq(), its error path
does not explicitly cancel the delayed work, which may
have been scheduled by the interrupt handler.

This means the delayed work may still be running when
the core frees the private structure (struct xadc).
This is a potential use-after-free.

Fix by inserting cancel_delayed_work_sync() in the probe
error path.

Signed-off-by: Sven Van Asbroeck <TheSven73@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/xilinx-xadc-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index ef3afaeed1948..5a1b63f9d0412 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -1292,6 +1292,7 @@ static int xadc_probe(struct platform_device *pdev)
 
 err_free_irq:
 	free_irq(xadc->irq, indio_dev);
+	cancel_delayed_work_sync(&xadc->zynq_unmask_work);
 err_clk_disable_unprepare:
 	clk_disable_unprepare(xadc->clk);
 err_free_samplerate_trigger:
-- 
GitLab


From 2e4b88f73966adead360e47621df0183586fac32 Mon Sep 17 00:00:00 2001
From: Sven Van Asbroeck <thesven73@gmail.com>
Date: Sun, 10 Mar 2019 14:58:26 -0400
Subject: [PATCH 0045/1861] iio: adc: xilinx: prevent touching unclocked h/w on
 remove

In remove, the clock is disabled before canceling the
delayed work. This means that the delayed work may be
touching unclocked hardware.

Fix by disabling the clock after the delayed work is
fully canceled. This is consistent with the probe error
path order.

Signed-off-by: Sven Van Asbroeck <TheSven73@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/xilinx-xadc-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index 5a1b63f9d0412..6401ca7a9a207 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -1322,8 +1322,8 @@ static int xadc_remove(struct platform_device *pdev)
 		iio_triggered_buffer_cleanup(indio_dev);
 	}
 	free_irq(xadc->irq, indio_dev);
-	clk_disable_unprepare(xadc->clk);
 	cancel_delayed_work_sync(&xadc->zynq_unmask_work);
+	clk_disable_unprepare(xadc->clk);
 	kfree(xadc->data);
 	kfree(indio_dev->channels);
 
-- 
GitLab


From 3d02d7082e5823598090530c3988a35f69689943 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Wed, 13 Mar 2019 12:40:02 +0100
Subject: [PATCH 0046/1861] iio: cros_ec: Fix the maths for gyro scale
 calculation

Calculation did not use IIO_DEGREE_TO_RAD and implemented a variant to
avoid precision loss as we aim a nano value. The offset added to avoid
rounding error, though, doesn't give us a close result to the expected
value. E.g.

For 1000dps, the result should be:

    (1000 * pi ) / 180 >> 15 ~= 0.000532632218

But with current calculation we get

    $ cat scale
    0.000547890

Fix the calculation by just doing the maths involved for a nano value

   val * pi * 10e12 / (180 * 2^15)

so we get a closer result.

    $ cat scale
    0.000532632

Fixes: c14dca07a31d ("iio: cros_ec_sensors: add ChromeOS EC Contiguous Sensors driver")
Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
index 89cb0066a6e08..8d76afb87d87c 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
@@ -103,9 +103,10 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev,
 			 * Do not use IIO_DEGREE_TO_RAD to avoid precision
 			 * loss. Round to the nearest integer.
 			 */
-			*val = div_s64(val64 * 314159 + 9000000ULL, 1000);
-			*val2 = 18000 << (CROS_EC_SENSOR_BITS - 1);
-			ret = IIO_VAL_FRACTIONAL;
+			*val = 0;
+			*val2 = div_s64(val64 * 3141592653ULL,
+					180 << (CROS_EC_SENSOR_BITS - 1));
+			ret = IIO_VAL_INT_PLUS_NANO;
 			break;
 		case MOTIONSENSE_TYPE_MAG:
 			/*
-- 
GitLab


From f6a7bf2ccf22db3e1b936ebc03898f9c025c051e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Mar 2019 10:00:52 +0100
Subject: [PATCH 0047/1861] iio: pms7003: select IIO_TRIGGERED_BUFFER

Without IIO_TRIGGERED_BUFFER, this driver fails to link:

drivers/iio/chemical/pms7003.o: In function `pms7003_probe':
pms7003.c:(.text+0x21c): undefined reference to `devm_iio_triggered_buffer_setup'
pms7003.c:(.text+0x21c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `devm_iio_triggered_buffer_setup'

Fixes: a1d642266c14 ("iio: chemical: add support for Plantower PMS7003 sensor")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Tomasz Duszynski <tduszyns@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig
index 5248e180b4421..92c684d2b67ec 100644
--- a/drivers/iio/chemical/Kconfig
+++ b/drivers/iio/chemical/Kconfig
@@ -64,6 +64,7 @@ config IAQCORE
 config PMS7003
 	tristate "Plantower PMS7003 particulate matter sensor"
 	depends on SERIAL_DEV_BUS
+	select IIO_TRIGGERED_BUFFER
 	help
 	  Say Y here to build support for the Plantower PMS7003 particulate
 	  matter sensor.
-- 
GitLab


From b53119f13a04879c3bf502828d99d13726639ead Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 6 Mar 2019 20:22:54 -0500
Subject: [PATCH 0048/1861] pin iocb through aio.

aio_poll() is not the only case that needs file pinned; worse, while
aio_read()/aio_write() can live without pinning iocb itself, the
proof is rather brittle and can easily break on later changes.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 38b741aef0bf5..07083fc0a24b0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1022,6 +1022,9 @@ static bool get_reqs_available(struct kioctx *ctx)
 /* aio_get_req
  *	Allocate a slot for an aio request.
  * Returns NULL if no requests are free.
+ *
+ * The refcount is initialized to 2 - one for the async op completion,
+ * one for the synchronous code that does this.
  */
 static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 {
@@ -1034,7 +1037,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 	percpu_ref_get(&ctx->reqs);
 	req->ki_ctx = ctx;
 	INIT_LIST_HEAD(&req->ki_list);
-	refcount_set(&req->ki_refcnt, 0);
+	refcount_set(&req->ki_refcnt, 2);
 	req->ki_eventfd = NULL;
 	return req;
 }
@@ -1067,15 +1070,18 @@ out:
 	return ret;
 }
 
+static inline void iocb_destroy(struct aio_kiocb *iocb)
+{
+	if (iocb->ki_filp)
+		fput(iocb->ki_filp);
+	percpu_ref_put(&iocb->ki_ctx->reqs);
+	kmem_cache_free(kiocb_cachep, iocb);
+}
+
 static inline void iocb_put(struct aio_kiocb *iocb)
 {
-	if (refcount_read(&iocb->ki_refcnt) == 0 ||
-	    refcount_dec_and_test(&iocb->ki_refcnt)) {
-		if (iocb->ki_filp)
-			fput(iocb->ki_filp);
-		percpu_ref_put(&iocb->ki_ctx->reqs);
-		kmem_cache_free(kiocb_cachep, iocb);
-	}
+	if (refcount_dec_and_test(&iocb->ki_refcnt))
+		iocb_destroy(iocb);
 }
 
 static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
@@ -1749,9 +1755,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	INIT_LIST_HEAD(&req->wait.entry);
 	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
 
-	/* one for removal from waitqueue, one for this function */
-	refcount_set(&aiocb->ki_refcnt, 2);
-
 	mask = vfs_poll(req->file, &apt.pt) & req->events;
 	if (unlikely(!req->head)) {
 		/* we did not manage to set up a waitqueue, done */
@@ -1782,7 +1785,6 @@ out:
 
 	if (mask)
 		aio_poll_complete(aiocb, mask);
-	iocb_put(aiocb);
 	return 0;
 }
 
@@ -1873,18 +1875,21 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 		break;
 	}
 
+	/* Done with the synchronous reference */
+	iocb_put(req);
+
 	/*
 	 * If ret is 0, we'd either done aio_complete() ourselves or have
 	 * arranged for that to be done asynchronously.  Anything non-zero
 	 * means that we need to destroy req ourselves.
 	 */
-	if (ret)
-		goto out_put_req;
-	return 0;
+	if (!ret)
+		return 0;
+
 out_put_req:
 	if (req->ki_eventfd)
 		eventfd_ctx_put(req->ki_eventfd);
-	iocb_put(req);
+	iocb_destroy(req);
 out_put_reqs_available:
 	put_reqs_available(ctx, 1);
 	return ret;
-- 
GitLab


From 833f4154ed560232120bc475935ee1d6a20e159f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 11 Mar 2019 19:00:36 -0400
Subject: [PATCH 0049/1861] aio: fold lookup_kiocb() into its sole caller

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 07083fc0a24b0..dada3bd316d50 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2002,24 +2002,6 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
 }
 #endif
 
-/* lookup_kiocb
- *	Finds a given iocb for cancellation.
- */
-static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
-{
-	struct aio_kiocb *kiocb;
-
-	assert_spin_locked(&ctx->ctx_lock);
-
-	/* TODO: use a hash or array, this sucks. */
-	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
-		if (kiocb->ki_user_iocb == iocb)
-			return kiocb;
-	}
-	return NULL;
-}
-
 /* sys_io_cancel:
  *	Attempts to cancel an iocb previously passed to io_submit.  If
  *	the operation is successfully cancelled, the resulting event is
@@ -2048,10 +2030,13 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 		return -EINVAL;
 
 	spin_lock_irq(&ctx->ctx_lock);
-	kiocb = lookup_kiocb(ctx, iocb);
-	if (kiocb) {
-		ret = kiocb->ki_cancel(&kiocb->rw);
-		list_del_init(&kiocb->ki_list);
+	/* TODO: use a hash or array, this sucks. */
+	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+		if (kiocb->ki_user_iocb == iocb) {
+			ret = kiocb->ki_cancel(&kiocb->rw);
+			list_del_init(&kiocb->ki_list);
+			break;
+		}
 	}
 	spin_unlock_irq(&ctx->ctx_lock);
 
-- 
GitLab


From a9339b7855094ba11a97e8822ae038135e879e79 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Mar 2019 19:43:45 -0500
Subject: [PATCH 0050/1861] aio: keep io_event in aio_kiocb

We want to separate forming the resulting io_event from putting it
into the ring buffer.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index dada3bd316d50..b0a8aa544e34c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -204,8 +204,7 @@ struct aio_kiocb {
 	struct kioctx		*ki_ctx;
 	kiocb_cancel_fn		*ki_cancel;
 
-	struct iocb __user	*ki_user_iocb;	/* user's aiocb */
-	__u64			ki_user_data;	/* user's data for completion */
+	struct io_event		ki_res;
 
 	struct list_head	ki_list;	/* the aio core uses this
 						 * for cancellation */
@@ -1084,15 +1083,6 @@ static inline void iocb_put(struct aio_kiocb *iocb)
 		iocb_destroy(iocb);
 }
 
-static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
-			   long res, long res2)
-{
-	ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
-	ev->data = iocb->ki_user_data;
-	ev->res = res;
-	ev->res2 = res2;
-}
-
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
@@ -1104,6 +1094,8 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 	unsigned tail, pos, head;
 	unsigned long	flags;
 
+	iocb->ki_res.res = res;
+	iocb->ki_res.res2 = res2;
 	/*
 	 * Add a completion event to the ring buffer. Must be done holding
 	 * ctx->completion_lock to prevent other code from messing with the tail
@@ -1120,14 +1112,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
 
-	aio_fill_event(event, iocb, res, res2);
+	*event = iocb->ki_res;
 
 	kunmap_atomic(ev_page);
 	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 
-	pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
-		 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
-		 res, res2);
+	pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
+		 (void __user *)(unsigned long)iocb->ki_res.obj,
+		 iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
 
 	/* after flagging the request as done, we
 	 * must never even look at it again
@@ -1844,8 +1836,10 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 		goto out_put_req;
 	}
 
-	req->ki_user_iocb = user_iocb;
-	req->ki_user_data = iocb->aio_data;
+	req->ki_res.obj = (u64)(unsigned long)user_iocb;
+	req->ki_res.data = iocb->aio_data;
+	req->ki_res.res = 0;
+	req->ki_res.res2 = 0;
 
 	switch (iocb->aio_lio_opcode) {
 	case IOCB_CMD_PREAD:
@@ -2019,6 +2013,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	struct aio_kiocb *kiocb;
 	int ret = -EINVAL;
 	u32 key;
+	u64 obj = (u64)(unsigned long)iocb;
 
 	if (unlikely(get_user(key, &iocb->aio_key)))
 		return -EFAULT;
@@ -2032,7 +2027,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 	spin_lock_irq(&ctx->ctx_lock);
 	/* TODO: use a hash or array, this sucks. */
 	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
-		if (kiocb->ki_user_iocb == iocb) {
+		if (kiocb->ki_res.obj == obj) {
 			ret = kiocb->ki_cancel(&kiocb->rw);
 			list_del_init(&kiocb->ki_list);
 			break;
-- 
GitLab


From 2bb874c0d873d13bd9b9b9c6d7b7c4edab18c8b4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Mar 2019 19:49:55 -0500
Subject: [PATCH 0051/1861] aio: store event at final iocb_put()

Instead of having aio_complete() set ->ki_res.{res,res2}, do that
explicitly in its callers, drop the reference (as aio_complete()
used to do) and delay the rest until the final iocb_put().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index b0a8aa544e34c..f405e2fd8e28e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1077,16 +1077,10 @@ static inline void iocb_destroy(struct aio_kiocb *iocb)
 	kmem_cache_free(kiocb_cachep, iocb);
 }
 
-static inline void iocb_put(struct aio_kiocb *iocb)
-{
-	if (refcount_dec_and_test(&iocb->ki_refcnt))
-		iocb_destroy(iocb);
-}
-
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
-static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb)
 {
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
@@ -1094,8 +1088,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 	unsigned tail, pos, head;
 	unsigned long	flags;
 
-	iocb->ki_res.res = res;
-	iocb->ki_res.res2 = res2;
 	/*
 	 * Add a completion event to the ring buffer. Must be done holding
 	 * ctx->completion_lock to prevent other code from messing with the tail
@@ -1161,7 +1153,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
-	iocb_put(iocb);
+}
+
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+	if (refcount_dec_and_test(&iocb->ki_refcnt)) {
+		aio_complete(iocb);
+		iocb_destroy(iocb);
+	}
 }
 
 /* aio_read_events_ring
@@ -1435,7 +1434,9 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
 		file_end_write(kiocb->ki_filp);
 	}
 
-	aio_complete(iocb, res, res2);
+	iocb->ki_res.res = res;
+	iocb->ki_res.res2 = res2;
+	iocb_put(iocb);
 }
 
 static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
@@ -1583,11 +1584,10 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
 
 static void aio_fsync_work(struct work_struct *work)
 {
-	struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
-	int ret;
+	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
 
-	ret = vfs_fsync(req->file, req->datasync);
-	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+	iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
+	iocb_put(iocb);
 }
 
 static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
@@ -1608,7 +1608,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
 
 static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
 {
-	aio_complete(iocb, mangle_poll(mask), 0);
+	iocb->ki_res.res = mangle_poll(mask);
+	iocb_put(iocb);
 }
 
 static void aio_poll_complete_work(struct work_struct *work)
-- 
GitLab


From af5c72b1fc7a00aa484e90b0c4e0eeb582545634 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Mar 2019 21:45:41 -0500
Subject: [PATCH 0052/1861] Fix aio_poll() races

aio_poll() has to cope with several unpleasant problems:
	* requests that might stay around indefinitely need to
be made visible for io_cancel(2); that must not be done to
a request already completed, though.
	* in cases when ->poll() has placed us on a waitqueue,
wakeup might have happened (and request completed) before ->poll()
returns.
	* worse, in some early wakeup cases request might end
up re-added into the queue later - we can't treat "woken up and
currently not in the queue" as "it's not going to stick around
indefinitely"
	* ... moreover, ->poll() might have decided not to
put it on any queues to start with, and that needs to be distinguished
from the previous case
	* ->poll() might have tried to put us on more than one queue.
Only the first will succeed for aio poll, so we might end up missing
wakeups.  OTOH, we might very well notice that only after the
wakeup hits and request gets completed (all before ->poll() gets
around to the second poll_wait()).  In that case it's too late to
decide that we have an error.

req->woken was an attempt to deal with that.  Unfortunately, it was
broken.  What we need to keep track of is not that wakeup has happened -
the thing might come back after that.  It's that async reference is
already gone and won't come back, so we can't (and needn't) put the
request on the list of cancellables.

The easiest case is "request hadn't been put on any waitqueues"; we
can tell by seeing NULL apt.head, and in that case there won't be
anything async.  We should either complete the request ourselves
(if vfs_poll() reports anything of interest) or return an error.

In all other cases we get exclusion with wakeups by grabbing the
queue lock.

If request is currently on queue and we have something interesting
from vfs_poll(), we can steal it and complete the request ourselves.

If it's on queue and vfs_poll() has not reported anything interesting,
we either put it on the cancellable list, or, if we know that it
hadn't been put on all queues ->poll() wanted it on, we steal it and
return an error.

If it's _not_ on queue, it's either been already dealt with (in which
case we do nothing), or there's aio_poll_complete_work() about to be
executed.  In that case we either put it on the cancellable list,
or, if we know it hadn't been put on all queues ->poll() wanted it on,
simulate what cancel would've done.

It's a lot more convoluted than I'd like it to be.  Single-consumer APIs
suck, and unfortunately aio is not an exception...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 90 +++++++++++++++++++++++++-------------------------------
 1 file changed, 40 insertions(+), 50 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index f405e2fd8e28e..39a075992ca24 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -181,7 +181,7 @@ struct poll_iocb {
 	struct file		*file;
 	struct wait_queue_head	*head;
 	__poll_t		events;
-	bool			woken;
+	bool			done;
 	bool			cancelled;
 	struct wait_queue_entry	wait;
 	struct work_struct	work;
@@ -1606,12 +1606,6 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
 	return 0;
 }
 
-static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
-	iocb->ki_res.res = mangle_poll(mask);
-	iocb_put(iocb);
-}
-
 static void aio_poll_complete_work(struct work_struct *work)
 {
 	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1637,9 +1631,11 @@ static void aio_poll_complete_work(struct work_struct *work)
 		return;
 	}
 	list_del_init(&iocb->ki_list);
+	iocb->ki_res.res = mangle_poll(mask);
+	req->done = true;
 	spin_unlock_irq(&ctx->ctx_lock);
 
-	aio_poll_complete(iocb, mask);
+	iocb_put(iocb);
 }
 
 /* assumes we are called with irqs disabled */
@@ -1667,31 +1663,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	__poll_t mask = key_to_poll(key);
 	unsigned long flags;
 
-	req->woken = true;
-
 	/* for instances that support it check for an event match first: */
-	if (mask) {
-		if (!(mask & req->events))
-			return 0;
+	if (mask && !(mask & req->events))
+		return 0;
 
+	list_del_init(&req->wait.entry);
+
+	if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
 		/*
 		 * Try to complete the iocb inline if we can. Use
 		 * irqsave/irqrestore because not all filesystems (e.g. fuse)
 		 * call this function with IRQs disabled and because IRQs
 		 * have to be disabled before ctx_lock is obtained.
 		 */
-		if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
-			list_del(&iocb->ki_list);
-			spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
-
-			list_del_init(&req->wait.entry);
-			aio_poll_complete(iocb, mask);
-			return 1;
-		}
+		list_del(&iocb->ki_list);
+		iocb->ki_res.res = mangle_poll(mask);
+		req->done = true;
+		spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
+		iocb_put(iocb);
+	} else {
+		schedule_work(&req->work);
 	}
-
-	list_del_init(&req->wait.entry);
-	schedule_work(&req->work);
 	return 1;
 }
 
@@ -1723,6 +1715,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	struct kioctx *ctx = aiocb->ki_ctx;
 	struct poll_iocb *req = &aiocb->poll;
 	struct aio_poll_table apt;
+	bool cancel = false;
 	__poll_t mask;
 
 	/* reject any unknown events outside the normal event mask. */
@@ -1736,7 +1729,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
 
 	req->head = NULL;
-	req->woken = false;
+	req->done = false;
 	req->cancelled = false;
 
 	apt.pt._qproc = aio_poll_queue_proc;
@@ -1749,36 +1742,33 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
 
 	mask = vfs_poll(req->file, &apt.pt) & req->events;
-	if (unlikely(!req->head)) {
-		/* we did not manage to set up a waitqueue, done */
-		goto out;
-	}
-
 	spin_lock_irq(&ctx->ctx_lock);
-	spin_lock(&req->head->lock);
-	if (req->woken) {
-		/* wake_up context handles the rest */
-		mask = 0;
+	if (likely(req->head)) {
+		spin_lock(&req->head->lock);
+		if (unlikely(list_empty(&req->wait.entry))) {
+			if (apt.error)
+				cancel = true;
+			apt.error = 0;
+			mask = 0;
+		}
+		if (mask || apt.error) {
+			list_del_init(&req->wait.entry);
+		} else if (cancel) {
+			WRITE_ONCE(req->cancelled, true);
+		} else if (!req->done) { /* actually waiting for an event */
+			list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+			aiocb->ki_cancel = aio_poll_cancel;
+		}
+		spin_unlock(&req->head->lock);
+	}
+	if (mask) { /* no async, we'd stolen it */
+		aiocb->ki_res.res = mangle_poll(mask);
 		apt.error = 0;
-	} else if (mask || apt.error) {
-		/* if we get an error or a mask we are done */
-		WARN_ON_ONCE(list_empty(&req->wait.entry));
-		list_del_init(&req->wait.entry);
-	} else {
-		/* actually waiting for an event */
-		list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
-		aiocb->ki_cancel = aio_poll_cancel;
 	}
-	spin_unlock(&req->head->lock);
 	spin_unlock_irq(&ctx->ctx_lock);
-
-out:
-	if (unlikely(apt.error))
-		return apt.error;
-
 	if (mask)
-		aio_poll_complete(aiocb, mask);
-	return 0;
+		iocb_put(aiocb);
+	return apt.error;
 }
 
 static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
-- 
GitLab


From 958c13ce141cd5183d3995553315d0ed27daa823 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 6 Mar 2019 18:13:00 -0500
Subject: [PATCH 0053/1861] make aio_read()/aio_write() return int

that ssize_t is a rudiment of earlier calling conventions; it's been
used only to pass 0 and -E... since last autumn.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 39a075992ca24..0e0b939958fa2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1513,13 +1513,13 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
 	}
 }
 
-static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
+static int aio_read(struct kiocb *req, const struct iocb *iocb,
 			bool vectored, bool compat)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
 	struct file *file;
-	ssize_t ret;
+	int ret;
 
 	ret = aio_prep_rw(req, iocb);
 	if (ret)
@@ -1541,13 +1541,13 @@ static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
 	return ret;
 }
 
-static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
+static int aio_write(struct kiocb *req, const struct iocb *iocb,
 			 bool vectored, bool compat)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
 	struct file *file;
-	ssize_t ret;
+	int ret;
 
 	ret = aio_prep_rw(req, iocb);
 	if (ret)
@@ -1710,7 +1710,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 	add_wait_queue(head, &pt->iocb->poll.wait);
 }
 
-static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
+static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 {
 	struct kioctx *ctx = aiocb->ki_ctx;
 	struct poll_iocb *req = &aiocb->poll;
@@ -1775,7 +1775,7 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 			   struct iocb __user *user_iocb, bool compat)
 {
 	struct aio_kiocb *req;
-	ssize_t ret;
+	int ret;
 
 	/* enforce forwards compatibility on users */
 	if (unlikely(iocb->aio_reserved2)) {
-- 
GitLab


From 7425970347a21204632a27ed28978cf875f205b2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 6 Mar 2019 18:18:31 -0500
Subject: [PATCH 0054/1861] aio: move dropping ->ki_eventfd into iocb_destroy()

no reason to duplicate that...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 0e0b939958fa2..d3837f607d098 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1071,6 +1071,8 @@ out:
 
 static inline void iocb_destroy(struct aio_kiocb *iocb)
 {
+	if (iocb->ki_eventfd)
+		eventfd_ctx_put(iocb->ki_eventfd);
 	if (iocb->ki_filp)
 		fput(iocb->ki_filp);
 	percpu_ref_put(&iocb->ki_ctx->reqs);
@@ -1138,10 +1140,8 @@ static void aio_complete(struct aio_kiocb *iocb)
 	 * eventfd. The eventfd_signal() function is safe to be called
 	 * from IRQ context.
 	 */
-	if (iocb->ki_eventfd) {
+	if (iocb->ki_eventfd)
 		eventfd_signal(iocb->ki_eventfd, 1);
-		eventfd_ctx_put(iocb->ki_eventfd);
-	}
 
 	/*
 	 * We have to order our ring_info tail store above and test
@@ -1807,18 +1807,19 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 		goto out_put_req;
 
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
+		struct eventfd_ctx *eventfd;
 		/*
 		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
 		 * instance of the file* now. The file descriptor must be
 		 * an eventfd() fd, and will be signaled for each completed
 		 * event using the eventfd_signal() function.
 		 */
-		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
-		if (IS_ERR(req->ki_eventfd)) {
-			ret = PTR_ERR(req->ki_eventfd);
-			req->ki_eventfd = NULL;
+		eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
+		if (IS_ERR(eventfd)) {
+			ret = PTR_ERR(eventfd);
 			goto out_put_req;
 		}
+		req->ki_eventfd = eventfd;
 	}
 
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1872,8 +1873,6 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 		return 0;
 
 out_put_req:
-	if (req->ki_eventfd)
-		eventfd_ctx_put(req->ki_eventfd);
 	iocb_destroy(req);
 out_put_reqs_available:
 	put_reqs_available(ctx, 1);
-- 
GitLab


From fa0ca2aee3bec899f9b9e753baf3808d1b0628f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 6 Mar 2019 18:21:08 -0500
Subject: [PATCH 0055/1861] deal with get_reqs_available() in aio_get_req()
 itself

simplifies the caller

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index d3837f607d098..eee4b4cfb66fe 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1033,6 +1033,11 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 	if (unlikely(!req))
 		return NULL;
 
+	if (unlikely(!get_reqs_available(ctx))) {
+		kfree(req);
+		return NULL;
+	}
+
 	percpu_ref_get(&ctx->reqs);
 	req->ki_ctx = ctx;
 	INIT_LIST_HEAD(&req->ki_list);
@@ -1793,13 +1798,9 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 		return -EINVAL;
 	}
 
-	if (!get_reqs_available(ctx))
-		return -EAGAIN;
-
-	ret = -EAGAIN;
 	req = aio_get_req(ctx);
 	if (unlikely(!req))
-		goto out_put_reqs_available;
+		return -EAGAIN;
 
 	req->ki_filp = fget(iocb->aio_fildes);
 	ret = -EBADF;
@@ -1874,7 +1875,6 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 
 out_put_req:
 	iocb_destroy(req);
-out_put_reqs_available:
 	put_reqs_available(ctx, 1);
 	return ret;
 }
-- 
GitLab


From 7316b49c2a117ca0611bc9af779d2108b764a7f9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 6 Mar 2019 18:24:51 -0500
Subject: [PATCH 0056/1861] aio: move sanity checks and request allocation to
 io_submit_one()

makes for somewhat cleaner control flow in __io_submit_one()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 119 +++++++++++++++++++++++++------------------------------
 1 file changed, 53 insertions(+), 66 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index eee4b4cfb66fe..a4cc2a1cccb75 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1777,35 +1777,12 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 }
 
 static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
-			   struct iocb __user *user_iocb, bool compat)
+			   struct iocb __user *user_iocb, struct aio_kiocb *req,
+			   bool compat)
 {
-	struct aio_kiocb *req;
-	int ret;
-
-	/* enforce forwards compatibility on users */
-	if (unlikely(iocb->aio_reserved2)) {
-		pr_debug("EINVAL: reserve field set\n");
-		return -EINVAL;
-	}
-
-	/* prevent overflows */
-	if (unlikely(
-	    (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
-	    (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
-	    ((ssize_t)iocb->aio_nbytes < 0)
-	   )) {
-		pr_debug("EINVAL: overflow check\n");
-		return -EINVAL;
-	}
-
-	req = aio_get_req(ctx);
-	if (unlikely(!req))
-		return -EAGAIN;
-
 	req->ki_filp = fget(iocb->aio_fildes);
-	ret = -EBADF;
 	if (unlikely(!req->ki_filp))
-		goto out_put_req;
+		return -EBADF;
 
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		struct eventfd_ctx *eventfd;
@@ -1816,17 +1793,15 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 		 * event using the eventfd_signal() function.
 		 */
 		eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
-		if (IS_ERR(eventfd)) {
-			ret = PTR_ERR(eventfd);
-			goto out_put_req;
-		}
+		if (IS_ERR(eventfd))
+			return PTR_ERR(req->ki_eventfd);
+
 		req->ki_eventfd = eventfd;
 	}
 
-	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
-	if (unlikely(ret)) {
+	if (unlikely(put_user(KIOCB_KEY, &user_iocb->aio_key))) {
 		pr_debug("EFAULT: aio_key\n");
-		goto out_put_req;
+		return -EFAULT;
 	}
 
 	req->ki_res.obj = (u64)(unsigned long)user_iocb;
@@ -1836,58 +1811,70 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 
 	switch (iocb->aio_lio_opcode) {
 	case IOCB_CMD_PREAD:
-		ret = aio_read(&req->rw, iocb, false, compat);
-		break;
+		return aio_read(&req->rw, iocb, false, compat);
 	case IOCB_CMD_PWRITE:
-		ret = aio_write(&req->rw, iocb, false, compat);
-		break;
+		return aio_write(&req->rw, iocb, false, compat);
 	case IOCB_CMD_PREADV:
-		ret = aio_read(&req->rw, iocb, true, compat);
-		break;
+		return aio_read(&req->rw, iocb, true, compat);
 	case IOCB_CMD_PWRITEV:
-		ret = aio_write(&req->rw, iocb, true, compat);
-		break;
+		return aio_write(&req->rw, iocb, true, compat);
 	case IOCB_CMD_FSYNC:
-		ret = aio_fsync(&req->fsync, iocb, false);
-		break;
+		return aio_fsync(&req->fsync, iocb, false);
 	case IOCB_CMD_FDSYNC:
-		ret = aio_fsync(&req->fsync, iocb, true);
-		break;
+		return aio_fsync(&req->fsync, iocb, true);
 	case IOCB_CMD_POLL:
-		ret = aio_poll(req, iocb);
-		break;
+		return aio_poll(req, iocb);
 	default:
 		pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
-		ret = -EINVAL;
-		break;
+		return -EINVAL;
 	}
-
-	/* Done with the synchronous reference */
-	iocb_put(req);
-
-	/*
-	 * If ret is 0, we'd either done aio_complete() ourselves or have
-	 * arranged for that to be done asynchronously.  Anything non-zero
-	 * means that we need to destroy req ourselves.
-	 */
-	if (!ret)
-		return 0;
-
-out_put_req:
-	iocb_destroy(req);
-	put_reqs_available(ctx, 1);
-	return ret;
 }
 
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			 bool compat)
 {
+	struct aio_kiocb *req;
 	struct iocb iocb;
+	int err;
 
 	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
 		return -EFAULT;
 
-	return __io_submit_one(ctx, &iocb, user_iocb, compat);
+	/* enforce forwards compatibility on users */
+	if (unlikely(iocb.aio_reserved2)) {
+		pr_debug("EINVAL: reserve field set\n");
+		return -EINVAL;
+	}
+
+	/* prevent overflows */
+	if (unlikely(
+	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
+	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
+	    ((ssize_t)iocb.aio_nbytes < 0)
+	   )) {
+		pr_debug("EINVAL: overflow check\n");
+		return -EINVAL;
+	}
+
+	req = aio_get_req(ctx);
+	if (unlikely(!req))
+		return -EAGAIN;
+
+	err = __io_submit_one(ctx, &iocb, user_iocb, req, compat);
+
+	/* Done with the synchronous reference */
+	iocb_put(req);
+
+	/*
+	 * If err is 0, we'd either done aio_complete() ourselves or have
+	 * arranged for that to be done asynchronously.  Anything non-zero
+	 * means that we need to destroy req ourselves.
+	 */
+	if (unlikely(err)) {
+		iocb_destroy(req);
+		put_reqs_available(ctx, 1);
+	}
+	return err;
 }
 
 /* sys_io_submit:
-- 
GitLab


From 924726888f660b2a86382a5dd051ec9ca1b18190 Mon Sep 17 00:00:00 2001
From: "Leonidas P. Papadakos" <papadakospan@gmail.com>
Date: Fri, 1 Mar 2019 00:29:23 +0200
Subject: [PATCH 0057/1861] arm64: dts: rockchip: fix rk3328-roc-cc gmac2io
 tx/rx_delay

The rk3328-roc-cc board exhibits tx stability issues with large packets,
as does the rock64 board, which was fixed with this patch
https://patchwork.kernel.org/patch/10178969/

A similar patch was merged for the rk3328-roc-cc here
https://patchwork.kernel.org/patch/10804863/
but it doesn't include the tx/rx_delay tweaks, and I find that they
help with an issue where large transfers would bring the ethernet
link down, causing a link reset regularly.

Signed-off-by: Leonidas P. Papadakos <papadakospan@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
index 33c44e857247e..0e34354b20927 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts
@@ -108,8 +108,8 @@
 	snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;
 	snps,reset-active-low;
 	snps,reset-delays-us = <0 10000 50000>;
-	tx_delay = <0x25>;
-	rx_delay = <0x11>;
+	tx_delay = <0x24>;
+	rx_delay = <0x18>;
 	status = "okay";
 };
 
-- 
GitLab


From eb523a4960b6d61bfda1229907ea58841f0340ae Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Mon, 18 Feb 2019 15:59:26 -0300
Subject: [PATCH 0058/1861] arm64: dts: rockchip: add DDC bus on Rock Pi 4

A DDC I2C bus specifier is required for DDC EDID probing to work
properly.

Fixes: 1b5715c602fda ("arm64: dts: rockchip: add ROCK Pi 4 DTS support")
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
index 4a543f2117d42..844eac939a97c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts
@@ -158,6 +158,7 @@
 };
 
 &hdmi {
+	ddc-i2c-bus = <&i2c3>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&hdmi_cec>;
 	status = "okay";
-- 
GitLab


From 6b2fde3dbfab6ebc45b0cd605e17ca5057ff9a3b Mon Sep 17 00:00:00 2001
From: Jonas Karlman <jonas@kwiboo.se>
Date: Sun, 24 Feb 2019 21:51:22 +0000
Subject: [PATCH 0059/1861] ARM: dts: rockchip: fix rk3288 cpu opp node
 reference

The following error can be seen during boot:

  of: /cpus/cpu@501: Couldn't find opp node

Change cpu nodes to use operating-points-v2 in order to fix this.

Fixes: ce76de984649 ("ARM: dts: rockchip: convert rk3288 to operating-points-v2")
Cc: stable@vger.kernel.org
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm/boot/dts/rk3288.dtsi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index ca7d52daa8fb6..09868dcee34b9 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -70,7 +70,7 @@
 			compatible = "arm,cortex-a12";
 			reg = <0x501>;
 			resets = <&cru SRST_CORE1>;
-			operating-points = <&cpu_opp_table>;
+			operating-points-v2 = <&cpu_opp_table>;
 			#cooling-cells = <2>; /* min followed by max */
 			clock-latency = <40000>;
 			clocks = <&cru ARMCLK>;
@@ -80,7 +80,7 @@
 			compatible = "arm,cortex-a12";
 			reg = <0x502>;
 			resets = <&cru SRST_CORE2>;
-			operating-points = <&cpu_opp_table>;
+			operating-points-v2 = <&cpu_opp_table>;
 			#cooling-cells = <2>; /* min followed by max */
 			clock-latency = <40000>;
 			clocks = <&cru ARMCLK>;
@@ -90,7 +90,7 @@
 			compatible = "arm,cortex-a12";
 			reg = <0x503>;
 			resets = <&cru SRST_CORE3>;
-			operating-points = <&cpu_opp_table>;
+			operating-points-v2 = <&cpu_opp_table>;
 			#cooling-cells = <2>; /* min followed by max */
 			clock-latency = <40000>;
 			clocks = <&cru ARMCLK>;
-- 
GitLab


From a8772e5d826d0f61f8aa9c284b3ab49035d5273d Mon Sep 17 00:00:00 2001
From: Tomohiro Mayama <parly-gh@iris.mystia.org>
Date: Sun, 10 Mar 2019 01:10:12 +0900
Subject: [PATCH 0060/1861] arm64: dts: rockchip: Fix vcc_host1_5v GPIO
 polarity on rk3328-rock64

This patch makes USB ports functioning again.

Fixes: 955bebde057e ("arm64: dts: rockchip: add rk3328-rock64 board")
Cc: stable@vger.kernel.org
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Tomohiro Mayama <parly-gh@iris.mystia.org>
Tested-by: Katsuhiro Suzuki <katsuhiro@katsuster.net>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index 2157a528276bf..79b4d1d4b5d6b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -46,8 +46,7 @@
 
 	vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator {
 		compatible = "regulator-fixed";
-		enable-active-high;
-		gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;
+		gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
 		pinctrl-names = "default";
 		pinctrl-0 = <&usb20_host_drv>;
 		regulator-name = "vcc_host1_5v";
-- 
GitLab


From 8dbc4d5ddb59f49cb3e85bccf42a4720b27a6576 Mon Sep 17 00:00:00 2001
From: David Summers <beagleboard@davidjohnsummers.uk>
Date: Sat, 9 Mar 2019 15:39:21 +0000
Subject: [PATCH 0061/1861] ARM: dts: rockchip: Fix SD card detection on
 rk3288-tinker

The Problem:

On ASUS Tinker Board S, when booting from the eMMC, and there is card
in the sd slot, there are constant errors.

Also when warm reboot, uboot can not access the sd slot

Cause:

Identified by Robin Murphy @ ARM. The Card Detect on rk3288
devices is pulled up by vccio-sd; so when the regulator powers this
off, card detect gives spurious errors. A second problem, is during
power down, vccio-sd apprears to be powered down. This causes a
problem when warm rebooting from the sd card. This was identified by
Jonas Karlman.

History:

A common fault on these rk3288 board, which impliment the reference
design.

When this arose before:

http://lists.infradead.org/pipermail/linux-arm-kernel/2014-August/281153.html

And Ulf and Jaehoon clearly said this was a broken card detect design,
which should be solved via polling

Solution:

Hence broken-cd is set as a property. This cures the errors. The
powering down of vccio-sd during reboot is cured by adding
regulator-boot-on.

This solutions has been fairly widely reviewed and tested.

Fixes: e58c5e739d6f ("ARM: dts: rockchip: move shared tinker-board nodes to a common dtsi")
Cc: stable@vger.kernel.org
[Heiko: slightly inaccurate fixes but tinker is a sbc (aka like a Pi) where
 we can hopefully expect people not to rely on overly old stable kernels]
Signed-off-by: David Summers <beagleboard@davidjohnsummers.uk>
Reviewed-by: Jonas Karlman <jonas@kwiboo.se>
Tested-by: Jonas Karlman <jonas@kwiboo.se>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm/boot/dts/rk3288-tinker.dtsi | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/rk3288-tinker.dtsi b/arch/arm/boot/dts/rk3288-tinker.dtsi
index aa107ee41b8b8..ef653c3209bcc 100644
--- a/arch/arm/boot/dts/rk3288-tinker.dtsi
+++ b/arch/arm/boot/dts/rk3288-tinker.dtsi
@@ -254,6 +254,7 @@
 			};
 
 			vccio_sd: LDO_REG5 {
+				regulator-boot-on;
 				regulator-min-microvolt = <1800000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-name = "vccio_sd";
@@ -430,7 +431,7 @@
 	bus-width = <4>;
 	cap-mmc-highspeed;
 	cap-sd-highspeed;
-	card-detect-delay = <200>;
+	broken-cd;
 	disable-wp;			/* wp not hooked up */
 	pinctrl-names = "default";
 	pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>;
-- 
GitLab


From 6fd8b9780ec1a49ac46e0aaf8775247205e66231 Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Wed, 13 Mar 2019 18:45:36 +0000
Subject: [PATCH 0062/1861] arm64: dts: rockchip: fix rk3328 rgmii high tx
 error rate

Several rk3328 based boards experience high rgmii tx error rates.
This is due to several pins in the rk3328.dtsi rgmii pinmux that are
missing a defined pull strength setting.
This causes the pinmux driver to default to 2ma (bit mask 00).

These pins are only defined in the rk3328.dtsi, and are not listed in
the rk3328 specification.
The TRM only lists them as "Reserved"
(RK3328 TRM V1.1, 3.3.3 Detail Register Description, GRF_GPIO0B_IOMUX,
GRF_GPIO0C_IOMUX, GRF_GPIO0D_IOMUX).
However, removal of these pins from the rgmii pinmux definition causes
the interface to fail to transmit.

Also, the rgmii tx and rx pins defined in the dtsi are not consistent
with the rk3328 specification, with tx pins currently set to 12ma and
rx pins set to 2ma.

Fix this by setting tx pins to 8ma and the rx pins to 4ma, consistent
with the specification.
Defining the drive strength for the undefined pins eliminated the high
tx packet error rate observed under heavy data transfers.
Aligning the drive strength to the TRM values eliminated the occasional
packet retry errors under iperf3 testing.
This allows much higher data rates with no recorded tx errors.

Tested on the rk3328-roc-cc board.

Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs")
Cc: stable@vger.kernel.org
Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3328.dtsi | 44 ++++++++++++------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 84f14b132e8f5..c55a3f1a87ff6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -1642,50 +1642,50 @@
 			rgmiim1_pins: rgmiim1-pins {
 				rockchip,pins =
 					/* mac_txclk */
-					<1 RK_PB4 2 &pcfg_pull_none_12ma>,
+					<1 RK_PB4 2 &pcfg_pull_none_8ma>,
 					/* mac_rxclk */
-					<1 RK_PB5 2 &pcfg_pull_none_2ma>,
+					<1 RK_PB5 2 &pcfg_pull_none_4ma>,
 					/* mac_mdio */
-					<1 RK_PC3 2 &pcfg_pull_none_2ma>,
+					<1 RK_PC3 2 &pcfg_pull_none_4ma>,
 					/* mac_txen */
-					<1 RK_PD1 2 &pcfg_pull_none_12ma>,
+					<1 RK_PD1 2 &pcfg_pull_none_8ma>,
 					/* mac_clk */
-					<1 RK_PC5 2 &pcfg_pull_none_2ma>,
+					<1 RK_PC5 2 &pcfg_pull_none_4ma>,
 					/* mac_rxdv */
-					<1 RK_PC6 2 &pcfg_pull_none_2ma>,
+					<1 RK_PC6 2 &pcfg_pull_none_4ma>,
 					/* mac_mdc */
-					<1 RK_PC7 2 &pcfg_pull_none_2ma>,
+					<1 RK_PC7 2 &pcfg_pull_none_4ma>,
 					/* mac_rxd1 */
-					<1 RK_PB2 2 &pcfg_pull_none_2ma>,
+					<1 RK_PB2 2 &pcfg_pull_none_4ma>,
 					/* mac_rxd0 */
-					<1 RK_PB3 2 &pcfg_pull_none_2ma>,
+					<1 RK_PB3 2 &pcfg_pull_none_4ma>,
 					/* mac_txd1 */
-					<1 RK_PB0 2 &pcfg_pull_none_12ma>,
+					<1 RK_PB0 2 &pcfg_pull_none_8ma>,
 					/* mac_txd0 */
-					<1 RK_PB1 2 &pcfg_pull_none_12ma>,
+					<1 RK_PB1 2 &pcfg_pull_none_8ma>,
 					/* mac_rxd3 */
-					<1 RK_PB6 2 &pcfg_pull_none_2ma>,
+					<1 RK_PB6 2 &pcfg_pull_none_4ma>,
 					/* mac_rxd2 */
-					<1 RK_PB7 2 &pcfg_pull_none_2ma>,
+					<1 RK_PB7 2 &pcfg_pull_none_4ma>,
 					/* mac_txd3 */
-					<1 RK_PC0 2 &pcfg_pull_none_12ma>,
+					<1 RK_PC0 2 &pcfg_pull_none_8ma>,
 					/* mac_txd2 */
-					<1 RK_PC1 2 &pcfg_pull_none_12ma>,
+					<1 RK_PC1 2 &pcfg_pull_none_8ma>,
 
 					/* mac_txclk */
-					<0 RK_PB0 1 &pcfg_pull_none>,
+					<0 RK_PB0 1 &pcfg_pull_none_8ma>,
 					/* mac_txen */
-					<0 RK_PB4 1 &pcfg_pull_none>,
+					<0 RK_PB4 1 &pcfg_pull_none_8ma>,
 					/* mac_clk */
-					<0 RK_PD0 1 &pcfg_pull_none>,
+					<0 RK_PD0 1 &pcfg_pull_none_4ma>,
 					/* mac_txd1 */
-					<0 RK_PC0 1 &pcfg_pull_none>,
+					<0 RK_PC0 1 &pcfg_pull_none_8ma>,
 					/* mac_txd0 */
-					<0 RK_PC1 1 &pcfg_pull_none>,
+					<0 RK_PC1 1 &pcfg_pull_none_8ma>,
 					/* mac_txd3 */
-					<0 RK_PC7 1 &pcfg_pull_none>,
+					<0 RK_PC7 1 &pcfg_pull_none_8ma>,
 					/* mac_txd2 */
-					<0 RK_PC6 1 &pcfg_pull_none>;
+					<0 RK_PC6 1 &pcfg_pull_none_8ma>;
 			};
 
 			rmiim1_pins: rmiim1-pins {
-- 
GitLab


From 09f91381fa5de1d44bc323d8bf345f5d57b3d9b5 Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Wed, 13 Mar 2019 19:02:30 +0000
Subject: [PATCH 0063/1861] arm64: dts: rockchip: fix rk3328 sdmmc0 write
 errors

Various rk3328 based boards experience occasional sdmmc0 write errors.
This is due to the rk3328.dtsi tx drive levels being set to 4ma, vs
8ma per the rk3328 datasheet default settings.

Fix this by setting the tx signal pins to 8ma.
Inspiration from tonymac32's patch,
https://github.com/ayufan-rock64/linux-kernel/commit/dc1212b347e0da17c5460bcc0a56b07d02bac3f8

Fixes issues on the rk3328-roc-cc and the rk3328-rock64 (as per the
above commit message).

Tested on the rk3328-roc-cc board.

Fixes: 52e02d377a72 ("arm64: dts: rockchip: add core dtsi file for RK3328 SoCs")
Cc: stable@vger.kernel.org
Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3328.dtsi | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index c55a3f1a87ff6..dabef1a21649b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -1445,11 +1445,11 @@
 
 		sdmmc0 {
 			sdmmc0_clk: sdmmc0-clk {
-				rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_4ma>;
+				rockchip,pins = <1 RK_PA6 1 &pcfg_pull_none_8ma>;
 			};
 
 			sdmmc0_cmd: sdmmc0-cmd {
-				rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_4ma>;
+				rockchip,pins = <1 RK_PA4 1 &pcfg_pull_up_8ma>;
 			};
 
 			sdmmc0_dectn: sdmmc0-dectn {
@@ -1461,14 +1461,14 @@
 			};
 
 			sdmmc0_bus1: sdmmc0-bus1 {
-				rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>;
+				rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>;
 			};
 
 			sdmmc0_bus4: sdmmc0-bus4 {
-				rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_4ma>,
-						<1 RK_PA1 1 &pcfg_pull_up_4ma>,
-						<1 RK_PA2 1 &pcfg_pull_up_4ma>,
-						<1 RK_PA3 1 &pcfg_pull_up_4ma>;
+				rockchip,pins = <1 RK_PA0 1 &pcfg_pull_up_8ma>,
+						<1 RK_PA1 1 &pcfg_pull_up_8ma>,
+						<1 RK_PA2 1 &pcfg_pull_up_8ma>,
+						<1 RK_PA3 1 &pcfg_pull_up_8ma>;
 			};
 
 			sdmmc0_gpio: sdmmc0-gpio {
-- 
GitLab


From 6b538cc21334b83f09b25dec4aa2d2726bf07ed0 Mon Sep 17 00:00:00 2001
From: Rodrigo Rivas Costa <rodrigorivascosta@gmail.com>
Date: Fri, 15 Mar 2019 20:09:10 +0100
Subject: [PATCH 0064/1861] HID: steam: fix deadlock with input devices.

When using this driver with the wireless dongle and some usermode
program that monitors every input device (acpid, for example), while
another usermode client opens and closes the low-level device
repeadedly, the system eventually deadlocks.

The reason is that steam_input_register_device() must not be called with
the mutex held, because the input subsystem has its own synchronization
that clashes with this one: it is possible that steam_input_open() is
called before input_register_device() returns, and since
steam_input_open() needs to lock the mutex, it deadlocks.

However we must hold the mutex when calling any function that sends
commands to the controller. If not, random commands end up falling fail.

Reported-by: Simon Gene Gottlieb <simon@gottliebtfreitag.de>
Signed-off-by: Rodrigo Rivas Costa <rodrigorivascosta@gmail.com>
Tested-by: Simon Gene Gottlieb <simon@gottliebtfreitag.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-steam.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
index 8141cadfca0e3..8dae0f9b819e0 100644
--- a/drivers/hid/hid-steam.c
+++ b/drivers/hid/hid-steam.c
@@ -499,6 +499,7 @@ static void steam_battery_unregister(struct steam_device *steam)
 static int steam_register(struct steam_device *steam)
 {
 	int ret;
+	bool client_opened;
 
 	/*
 	 * This function can be called several times in a row with the
@@ -511,9 +512,11 @@ static int steam_register(struct steam_device *steam)
 		 * Unlikely, but getting the serial could fail, and it is not so
 		 * important, so make up a serial number and go on.
 		 */
+		mutex_lock(&steam->mutex);
 		if (steam_get_serial(steam) < 0)
 			strlcpy(steam->serial_no, "XXXXXXXXXX",
 					sizeof(steam->serial_no));
+		mutex_unlock(&steam->mutex);
 
 		hid_info(steam->hdev, "Steam Controller '%s' connected",
 				steam->serial_no);
@@ -528,13 +531,15 @@ static int steam_register(struct steam_device *steam)
 	}
 
 	mutex_lock(&steam->mutex);
-	if (!steam->client_opened) {
+	client_opened = steam->client_opened;
+	if (!client_opened)
 		steam_set_lizard_mode(steam, lizard_mode);
+	mutex_unlock(&steam->mutex);
+
+	if (!client_opened)
 		ret = steam_input_register(steam);
-	} else {
+	else
 		ret = 0;
-	}
-	mutex_unlock(&steam->mutex);
 
 	return ret;
 }
@@ -630,14 +635,21 @@ static void steam_client_ll_close(struct hid_device *hdev)
 {
 	struct steam_device *steam = hdev->driver_data;
 
+	unsigned long flags;
+	bool connected;
+
+	spin_lock_irqsave(&steam->lock, flags);
+	connected = steam->connected;
+	spin_unlock_irqrestore(&steam->lock, flags);
+
 	mutex_lock(&steam->mutex);
 	steam->client_opened = false;
+	if (connected)
+		steam_set_lizard_mode(steam, lizard_mode);
 	mutex_unlock(&steam->mutex);
 
-	if (steam->connected) {
-		steam_set_lizard_mode(steam, lizard_mode);
+	if (connected)
 		steam_input_register(steam);
-	}
 }
 
 static int steam_client_ll_raw_request(struct hid_device *hdev,
-- 
GitLab


From 94a9992f7dbdfb28976b565af220e0c4a117144a Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 8 Mar 2019 13:11:17 +0800
Subject: [PATCH 0065/1861] HID: Increase maximum report size allowed by
 hid_field_extract()

Commit 71f6fa90a353 ("HID: increase maximum global item tag report size
to 256") increases the max report size from 128 to 256.

We also need to update the report size in hid_field_extract() otherwise
it complains and truncates now valid report size:
[ 406.165461] hid-sensor-hub 001F:8086:22D8.0002: hid_field_extract() called with n (192) > 32! (kworker/5:1)

BugLink: https://bugs.launchpad.net/bugs/1818547
Fixes: 71f6fa90a353 ("HID: increase maximum global item tag report size to 256")
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 9993b692598fb..860e21ec6a492 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1301,10 +1301,10 @@ static u32 __extract(u8 *report, unsigned offset, int n)
 u32 hid_field_extract(const struct hid_device *hid, u8 *report,
 			unsigned offset, unsigned n)
 {
-	if (n > 32) {
-		hid_warn(hid, "hid_field_extract() called with n (%d) > 32! (%s)\n",
+	if (n > 256) {
+		hid_warn(hid, "hid_field_extract() called with n (%d) > 256! (%s)\n",
 			 n, current->comm);
-		n = 32;
+		n = 256;
 	}
 
 	return __extract(report, offset, n);
-- 
GitLab


From 9729e3b65a64dfe210972223624d8152ba502e98 Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@google.com>
Date: Thu, 7 Mar 2019 10:35:58 +0800
Subject: [PATCH 0066/1861] ASoC: mediatek: mt8183: skip for i2s5 in
 mck_disable

Skip for i2s5 in mck_disable which is also bypassed in mck_enable.

Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/mediatek/mt8183/mt8183-afe-clk.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/soc/mediatek/mt8183/mt8183-afe-clk.c b/sound/soc/mediatek/mt8183/mt8183-afe-clk.c
index f523ad103acc4..48e81c5d52fc2 100644
--- a/sound/soc/mediatek/mt8183/mt8183-afe-clk.c
+++ b/sound/soc/mediatek/mt8183/mt8183-afe-clk.c
@@ -605,6 +605,10 @@ void mt8183_mck_disable(struct mtk_base_afe *afe, int mck_id)
 	int m_sel_id = mck_div[mck_id].m_sel_id;
 	int div_clk_id = mck_div[mck_id].div_clk_id;
 
+	/* i2s5 mck not support */
+	if (mck_id == MT8183_I2S5_MCK)
+		return;
+
 	clk_disable_unprepare(afe_priv->clk[div_clk_id]);
 	if (m_sel_id >= 0)
 		clk_disable_unprepare(afe_priv->clk[m_sel_id]);
-- 
GitLab


From 4834d7070c85a5fb69637265dbbb05d13043280c Mon Sep 17 00:00:00 2001
From: Shuming Fan <shumingf@realtek.com>
Date: Fri, 8 Mar 2019 11:36:08 +0800
Subject: [PATCH 0067/1861] ASoC: rt5682: Check JD status when system resume

The IRQ function may not work when system suspend.
We remove snd_soc_dapm_force_enable_pin function call to
make sure the bias off when idle and run into suspend/resume function.

Signed-off-by: Shuming Fan <shumingf@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5682.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index 9d5acd2d04abd..b7e2e9937e6d4 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -910,13 +910,20 @@ static int rt5682_headset_detect(struct snd_soc_component *component,
 		int jack_insert)
 {
 	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
 	unsigned int val, count;
 
 	if (jack_insert) {
-		snd_soc_dapm_force_enable_pin(dapm, "CBJ Power");
-		snd_soc_dapm_sync(dapm);
+
+		snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1,
+			RT5682_PWR_VREF2, RT5682_PWR_VREF2);
+		snd_soc_component_update_bits(component,
+				RT5682_PWR_ANLG_1, RT5682_PWR_FV2, 0);
+		usleep_range(15000, 20000);
+		snd_soc_component_update_bits(component,
+				RT5682_PWR_ANLG_1, RT5682_PWR_FV2, RT5682_PWR_FV2);
+		snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
+			RT5682_PWR_CBJ, RT5682_PWR_CBJ);
+
 		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
 			RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH);
 
@@ -944,8 +951,10 @@ static int rt5682_headset_detect(struct snd_soc_component *component,
 		rt5682_enable_push_button_irq(component, false);
 		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
 			RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
-		snd_soc_dapm_disable_pin(dapm, "CBJ Power");
-		snd_soc_dapm_sync(dapm);
+		snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1,
+			RT5682_PWR_VREF2, 0);
+		snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
+			RT5682_PWR_CBJ, 0);
 
 		rt5682->jack_type = 0;
 	}
@@ -1591,8 +1600,6 @@ static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = {
 		0, NULL, 0),
 	SND_SOC_DAPM_SUPPLY("Vref1", RT5682_PWR_ANLG_1, RT5682_PWR_VREF1_BIT, 0,
 		rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
-	SND_SOC_DAPM_SUPPLY("Vref2", RT5682_PWR_ANLG_1, RT5682_PWR_VREF2_BIT, 0,
-		rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
 
 	/* ASRC */
 	SND_SOC_DAPM_SUPPLY_S("DAC STO1 ASRC", 1, RT5682_PLL_TRACK_1,
@@ -1627,9 +1634,6 @@ static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = {
 	SND_SOC_DAPM_PGA("BST1 CBJ", SND_SOC_NOPM,
 		0, 0, NULL, 0),
 
-	SND_SOC_DAPM_SUPPLY("CBJ Power", RT5682_PWR_ANLG_3,
-		RT5682_PWR_CBJ_BIT, 0, NULL, 0),
-
 	/* REC Mixer */
 	SND_SOC_DAPM_MIXER("RECMIX1L", SND_SOC_NOPM, 0, 0, rt5682_rec1_l_mix,
 		ARRAY_SIZE(rt5682_rec1_l_mix)),
@@ -1792,17 +1796,13 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = {
 
 	/*Vref*/
 	{"MICBIAS1", NULL, "Vref1"},
-	{"MICBIAS1", NULL, "Vref2"},
 	{"MICBIAS2", NULL, "Vref1"},
-	{"MICBIAS2", NULL, "Vref2"},
 
 	{"CLKDET SYS", NULL, "CLKDET"},
 
 	{"IN1P", NULL, "LDO2"},
 
 	{"BST1 CBJ", NULL, "IN1P"},
-	{"BST1 CBJ", NULL, "CBJ Power"},
-	{"CBJ Power", NULL, "Vref2"},
 
 	{"RECMIX1L", "CBJ Switch", "BST1 CBJ"},
 	{"RECMIX1L", NULL, "RECMIX1L Power"},
@@ -1912,9 +1912,7 @@ static const struct snd_soc_dapm_route rt5682_dapm_routes[] = {
 	{"HP Amp", NULL, "Capless"},
 	{"HP Amp", NULL, "Charge Pump"},
 	{"HP Amp", NULL, "CLKDET SYS"},
-	{"HP Amp", NULL, "CBJ Power"},
 	{"HP Amp", NULL, "Vref1"},
-	{"HP Amp", NULL, "Vref2"},
 	{"HPOL Playback", "Switch", "HP Amp"},
 	{"HPOR Playback", "Switch", "HP Amp"},
 	{"HPOL", NULL, "HPOL Playback"},
@@ -2363,6 +2361,8 @@ static int rt5682_resume(struct snd_soc_component *component)
 	regcache_cache_only(rt5682->regmap, false);
 	regcache_sync(rt5682->regmap);
 
+	rt5682_irq(0, rt5682);
+
 	return 0;
 }
 #else
-- 
GitLab


From 675212bfb23394514b7f68ebf3954ba936281ccc Mon Sep 17 00:00:00 2001
From: Shuming Fan <shumingf@realtek.com>
Date: Mon, 18 Mar 2019 15:17:13 +0800
Subject: [PATCH 0068/1861] ASoC: rt5682: fix jack type detection issue

The jack type detection needs the main bias power of analog.
The modification makes sure the main bias power on/off while jack plug/unplug.

Signed-off-by: Shuming Fan <shumingf@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5682.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index b7e2e9937e6d4..b98974edf6ad1 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -915,7 +915,8 @@ static int rt5682_headset_detect(struct snd_soc_component *component,
 	if (jack_insert) {
 
 		snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1,
-			RT5682_PWR_VREF2, RT5682_PWR_VREF2);
+			RT5682_PWR_VREF2 | RT5682_PWR_MB,
+			RT5682_PWR_VREF2 | RT5682_PWR_MB);
 		snd_soc_component_update_bits(component,
 				RT5682_PWR_ANLG_1, RT5682_PWR_FV2, 0);
 		usleep_range(15000, 20000);
@@ -952,7 +953,7 @@ static int rt5682_headset_detect(struct snd_soc_component *component,
 		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
 			RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
 		snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1,
-			RT5682_PWR_VREF2, 0);
+			RT5682_PWR_VREF2 | RT5682_PWR_MB, 0);
 		snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
 			RT5682_PWR_CBJ, 0);
 
@@ -2301,16 +2302,13 @@ static int rt5682_set_bias_level(struct snd_soc_component *component,
 	switch (level) {
 	case SND_SOC_BIAS_PREPARE:
 		regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
-			RT5682_PWR_MB | RT5682_PWR_BG,
-			RT5682_PWR_MB | RT5682_PWR_BG);
+			RT5682_PWR_BG, RT5682_PWR_BG);
 		regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
 			RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO,
 			RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO);
 		break;
 
 	case SND_SOC_BIAS_STANDBY:
-		regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
-			RT5682_PWR_MB, RT5682_PWR_MB);
 		regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
 			RT5682_DIG_GATE_CTRL, RT5682_DIG_GATE_CTRL);
 		break;
@@ -2318,7 +2316,7 @@ static int rt5682_set_bias_level(struct snd_soc_component *component,
 		regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
 			RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO, 0);
 		regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
-			RT5682_PWR_MB | RT5682_PWR_BG, 0);
+			RT5682_PWR_BG, 0);
 		break;
 
 	default:
-- 
GitLab


From 1c5b6a27e432e4fe170a924c8b41012271496a4c Mon Sep 17 00:00:00 2001
From: Shuming Fan <shumingf@realtek.com>
Date: Mon, 18 Mar 2019 15:17:42 +0800
Subject: [PATCH 0069/1861] ASoC: rt5682: recording has no sound after booting

If ASRC turns on, HW will use clk_dac as the reference clock
whether recording or playback.
Both of clk_dac and clk_adc should set proper clock while using ASRC.

Signed-off-by: Shuming Fan <shumingf@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5682.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index b98974edf6ad1..86a7fa31c294b 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -1208,7 +1208,7 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w,
 	struct snd_soc_component *component =
 		snd_soc_dapm_to_component(w->dapm);
 	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
-	int ref, val, reg, sft, mask, idx = -EINVAL;
+	int ref, val, reg, idx = -EINVAL;
 	static const int div_f[] = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48};
 	static const int div_o[] = {1, 2, 4, 6, 8, 12, 16, 24, 32, 48};
 
@@ -1222,15 +1222,10 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w,
 
 	idx = rt5682_div_sel(rt5682, ref, div_f, ARRAY_SIZE(div_f));
 
-	if (w->shift == RT5682_PWR_ADC_S1F_BIT) {
+	if (w->shift == RT5682_PWR_ADC_S1F_BIT)
 		reg = RT5682_PLL_TRACK_3;
-		sft = RT5682_ADC_OSR_SFT;
-		mask = RT5682_ADC_OSR_MASK;
-	} else {
+	else
 		reg = RT5682_PLL_TRACK_2;
-		sft = RT5682_DAC_OSR_SFT;
-		mask = RT5682_DAC_OSR_MASK;
-	}
 
 	snd_soc_component_update_bits(component, reg,
 		RT5682_FILTER_CLK_DIV_MASK, idx << RT5682_FILTER_CLK_DIV_SFT);
@@ -1242,7 +1237,8 @@ static int set_filter_clk(struct snd_soc_dapm_widget *w,
 	}
 
 	snd_soc_component_update_bits(component, RT5682_ADDA_CLK_1,
-		mask, idx << sft);
+		RT5682_ADC_OSR_MASK | RT5682_DAC_OSR_MASK,
+		(idx << RT5682_ADC_OSR_SFT) | (idx << RT5682_DAC_OSR_SFT));
 
 	return 0;
 }
-- 
GitLab


From 0172d9e322035bf7bb66a7dfdd795c38d71dbba9 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 15 Nov 2018 11:19:44 -0500
Subject: [PATCH 0070/1861] tools/memory-model: Rename some RCU relations

In preparation for adding support for SRCU, rename "crit" to
"rcu-rscs", rename "rscs" to "rcu-rscsi", and remove the restriction
to only the outermost level of nesting.

The name change is needed for disambiguating RCU read-side critical
sections from SRCU read-side critical sections.  Adding the "i" at the
end of "rcu-rscsi" emphasizes that the relation is inverted; it links
rcu_read_unlock() events to their corresponding preceding
rcu_read_lock() events.

The restriction to outermost nesting levels was never essential; it
was included mostly to show that it could be done.  Rather than add
equivalent unnecessary code for SRCU lock nesting, it seemed better to
remove the existing code.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
---
 tools/memory-model/linux-kernel.bell |  9 +++------
 tools/memory-model/linux-kernel.cat  | 10 +++++-----
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 796513362c052..353c8d68e0300 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -34,7 +34,7 @@ enum Barriers = 'wmb (*smp_wmb*) ||
 instructions F[Barriers]
 
 (* Compute matching pairs of nested Rcu-lock and Rcu-unlock *)
-let matched = let rec
+let rcu-rscs = let rec
 	    unmatched-locks = Rcu-lock \ domain(matched)
 	and unmatched-unlocks = Rcu-unlock \ range(matched)
 	and unmatched = unmatched-locks | unmatched-unlocks
@@ -46,8 +46,5 @@ let matched = let rec
 	in matched
 
 (* Validate nesting *)
-flag ~empty Rcu-lock \ domain(matched) as unbalanced-rcu-locking
-flag ~empty Rcu-unlock \ range(matched) as unbalanced-rcu-locking
-
-(* Outermost level of nesting only *)
-let crit = matched \ (po^-1 ; matched ; po^-1)
+flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking
+flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 8f23c74a96fdc..ab9de9c1234b0 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -95,7 +95,7 @@ acyclic pb as propagation
  * onward on the one hand and from the rcu_read_unlock() backwards on the
  * other hand.
  *)
-let rscs = po ; crit^-1 ; po?
+let rcu-rscsi = po ; rcu-rscs^-1 ; po?
 
 (*
  * The synchronize_rcu() strong fence is special in that it can order not
@@ -109,10 +109,10 @@ let rcu-link = hb* ; pb* ; prop
  * critical sections (joined by rcu-link) acts as a generalized strong fence.
  *)
 let rec rcu-fence = gp |
-	(gp ; rcu-link ; rscs) |
-	(rscs ; rcu-link ; gp) |
-	(gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) |
-	(rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) |
+	(gp ; rcu-link ; rcu-rscsi) |
+	(rcu-rscsi ; rcu-link ; gp) |
+	(gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) |
+	(rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; gp) |
 	(rcu-fence ; rcu-link ; rcu-fence)
 
 (* rb orders instructions just as pb does *)
-- 
GitLab


From 284749b0aebbf3ab26ff92198545aea36165f6bf Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 15 Nov 2018 11:19:58 -0500
Subject: [PATCH 0071/1861] tools/memory-model: Refactor some RCU relations

In preparation for adding support for SRCU, refactor the definitions
of rcu-fence, rcu-rscsi, rcu-link, and rb by moving the po and po?
terms from the first two to the second two.  An rcu-gp relation is
added; it is equivalent to gp with the po and po? terms removed.

This is necessary because for SRCU, we will have to use the loc
relation to check that the terms at the start and end of each disjunct
in the definition of rcu-fence refer to the same srcu_struct
location.  If these terms are hidden behind po and po?, there's no way
to carry out this check.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
---
 tools/memory-model/linux-kernel.cat | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index ab9de9c1234b0..b8e6197f05af7 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -91,32 +91,37 @@ acyclic pb as propagation
 (*******)
 
 (*
- * Effect of read-side critical section proceeds from the rcu_read_lock()
- * onward on the one hand and from the rcu_read_unlock() backwards on the
+ * Effects of read-side critical sections proceed from the rcu_read_unlock()
+ * backwards on the one hand, and from the rcu_read_lock() forwards on the
  * other hand.
+ *
+ * In the definition of rcu-fence below, the po term at the left-hand side
+ * of each disjunct and the po? term at the right-hand end have been factored
+ * out.  They have been moved into the definitions of rcu-link and rb.
  *)
-let rcu-rscsi = po ; rcu-rscs^-1 ; po?
+let rcu-gp = [Sync-rcu]		(* Compare with gp *)
+let rcu-rscsi = rcu-rscs^-1
 
 (*
  * The synchronize_rcu() strong fence is special in that it can order not
  * one but two non-rf relations, but only in conjunction with an RCU
  * read-side critical section.
  *)
-let rcu-link = hb* ; pb* ; prop
+let rcu-link = po? ; hb* ; pb* ; prop ; po
 
 (*
  * Any sequence containing at least as many grace periods as RCU read-side
  * critical sections (joined by rcu-link) acts as a generalized strong fence.
  *)
-let rec rcu-fence = gp |
-	(gp ; rcu-link ; rcu-rscsi) |
-	(rcu-rscsi ; rcu-link ; gp) |
-	(gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) |
-	(rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; gp) |
+let rec rcu-fence = rcu-gp |
+	(rcu-gp ; rcu-link ; rcu-rscsi) |
+	(rcu-rscsi ; rcu-link ; rcu-gp) |
+	(rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) |
+	(rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) |
 	(rcu-fence ; rcu-link ; rcu-fence)
 
 (* rb orders instructions just as pb does *)
-let rb = prop ; rcu-fence ; hb* ; pb*
+let rb = prop ; po ; rcu-fence ; po? ; hb* ; pb*
 
 irreflexive rb as rcu
 
-- 
GitLab


From a3f600d92da564ad35f237c8aeab268ca49377cc Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 15 Nov 2018 11:20:37 -0500
Subject: [PATCH 0072/1861] tools/memory-model: Add SRCU support

Add support for SRCU.  Herd creates srcu events and linux-kernel.def
associates them with three possible annotations (srcu-lock,
srcu-unlock, and sync-srcu) corresponding to the API routines
srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu().

The linux-kernel.bell file now declares the annotations
and determines matching lock/unlock pairs delimiting SRCU read-side
critical sections, and it also checks for synchronize_srcu() calls
inside an RCU critical section (which would generate a "sleeping in
atomic context" error in real kernel code).  The linux-kernel.cat file
now adds SRCU-induced ordering, analogous to the existing RCU-induced
ordering, to the gp and rcu-fence relations.

Curiously enough, these small changes to the model's .cat code are all
that is needed to describe SRCU.

Portions of this patch (linux-kernel.def and the first hunk in
linux-kernel.bell) were written by Luc Maranget.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Luc Maranget <luc.maranget@inria.fr>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Tested-by: Andrea Parri <andrea.parri@amarulasolutions.com>
---
 tools/memory-model/linux-kernel.bell | 25 +++++++++++++++++++++++++
 tools/memory-model/linux-kernel.cat  | 18 ++++++++++++++----
 tools/memory-model/linux-kernel.def  |  5 +++++
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 353c8d68e0300..9c42cd9ddcb41 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -33,6 +33,12 @@ enum Barriers = 'wmb (*smp_wmb*) ||
 		'after-unlock-lock (*smp_mb__after_unlock_lock*)
 instructions F[Barriers]
 
+(* SRCU *)
+enum SRCU = 'srcu-lock || 'srcu-unlock || 'sync-srcu
+instructions SRCU[SRCU]
+(* All srcu events *)
+let Srcu = Srcu-lock | Srcu-unlock | Sync-srcu
+
 (* Compute matching pairs of nested Rcu-lock and Rcu-unlock *)
 let rcu-rscs = let rec
 	    unmatched-locks = Rcu-lock \ domain(matched)
@@ -48,3 +54,22 @@ let rcu-rscs = let rec
 (* Validate nesting *)
 flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking
 flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking
+
+(* Compute matching pairs of nested Srcu-lock and Srcu-unlock *)
+let srcu-rscs = let rec
+	    unmatched-locks = Srcu-lock \ domain(matched)
+	and unmatched-unlocks = Srcu-unlock \ range(matched)
+	and unmatched = unmatched-locks | unmatched-unlocks
+	and unmatched-po = ([unmatched] ; po ; [unmatched]) & loc
+	and unmatched-locks-to-unlocks =
+		([unmatched-locks] ; po ; [unmatched-unlocks]) & loc
+	and matched = matched | (unmatched-locks-to-unlocks \
+		(unmatched-po ; unmatched-po))
+	in matched
+
+(* Validate nesting *)
+flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-locking
+flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking
+
+(* Check for use of synchronize_srcu() inside an RCU critical section *)
+flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index b8e6197f05af7..8dcb37835b613 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -33,7 +33,7 @@ let mb = ([M] ; fencerel(Mb) ; [M]) |
 	([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
 	([M] ; po ; [UL] ; (co | po) ; [LKW] ;
 		fencerel(After-unlock-lock) ; [M])
-let gp = po ; [Sync-rcu] ; po?
+let gp = po ; [Sync-rcu | Sync-srcu] ; po?
 
 let strong-fence = mb | gp
 
@@ -92,15 +92,18 @@ acyclic pb as propagation
 
 (*
  * Effects of read-side critical sections proceed from the rcu_read_unlock()
- * backwards on the one hand, and from the rcu_read_lock() forwards on the
- * other hand.
+ * or srcu_read_unlock() backwards on the one hand, and from the
+ * rcu_read_lock() or srcu_read_lock() forwards on the other hand.
  *
  * In the definition of rcu-fence below, the po term at the left-hand side
  * of each disjunct and the po? term at the right-hand end have been factored
  * out.  They have been moved into the definitions of rcu-link and rb.
+ * This was necessary in order to apply the "& loc" tests correctly.
  *)
 let rcu-gp = [Sync-rcu]		(* Compare with gp *)
+let srcu-gp = [Sync-srcu]
 let rcu-rscsi = rcu-rscs^-1
+let srcu-rscsi = srcu-rscs^-1
 
 (*
  * The synchronize_rcu() strong fence is special in that it can order not
@@ -112,12 +115,19 @@ let rcu-link = po? ; hb* ; pb* ; prop ; po
 (*
  * Any sequence containing at least as many grace periods as RCU read-side
  * critical sections (joined by rcu-link) acts as a generalized strong fence.
+ * Likewise for SRCU grace periods and read-side critical sections, provided
+ * the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
+ * struct srcu_struct location.
  *)
-let rec rcu-fence = rcu-gp |
+let rec rcu-fence = rcu-gp | srcu-gp |
 	(rcu-gp ; rcu-link ; rcu-rscsi) |
+	((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
 	(rcu-rscsi ; rcu-link ; rcu-gp) |
+	((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
 	(rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) |
+	((srcu-gp ; rcu-link ; rcu-fence ; rcu-link ; srcu-rscsi) & loc) |
 	(rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) |
+	((srcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; srcu-gp) & loc) |
 	(rcu-fence ; rcu-link ; rcu-fence)
 
 (* rb orders instructions just as pb does *)
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index b27911cc087d4..1d6a120cde14e 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -47,6 +47,11 @@ rcu_read_unlock() { __fence{rcu-unlock}; }
 synchronize_rcu() { __fence{sync-rcu}; }
 synchronize_rcu_expedited() { __fence{sync-rcu}; }
 
+// SRCU
+srcu_read_lock(X)  __srcu{srcu-lock}(X)
+srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X); }
+synchronize_srcu(X)  { __srcu{sync-srcu}(X); }
+
 // Atomic
 atomic_read(X) READ_ONCE(*X)
 atomic_set(X,V) { WRITE_ONCE(*X,V); }
-- 
GitLab


From ad9fd20b6dadb0cb14551477fcebe0fdf2e697dd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Mon, 26 Nov 2018 14:26:43 -0800
Subject: [PATCH 0073/1861] tools/memory-model: Update README for addition of
 SRCU

This commit updates the section on LKMM limitations to no longer say
that SRCU is not modeled, but instead describe how LKMM's modeling of
SRCU departs from the Linux-kernel implementation.

TL;DR:  There is no known valid use case that cares about the Linux
kernel's ability to have partially overlapping SRCU read-side critical
sections.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Acked-by: Andrea Parri <andrea.parri@amarulasolutions.com>
---
 tools/memory-model/README | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/tools/memory-model/README b/tools/memory-model/README
index 0f2c366518c68..9d7d4f23503fd 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -221,8 +221,29 @@ The Linux-kernel memory model has the following limitations:
 		additional call_rcu() process to the site of the
 		emulated rcu-barrier().
 
-	e.	Sleepable RCU (SRCU) is not modeled.  It can be
-		emulated, but perhaps not simply.
+	e.	Although sleepable RCU (SRCU) is now modeled, there
+		are some subtle differences between its semantics and
+		those in the Linux kernel.  For example, the kernel
+		might interpret the following sequence as two partially
+		overlapping SRCU read-side critical sections:
+
+			 1  r1 = srcu_read_lock(&my_srcu);
+			 2  do_something_1();
+			 3  r2 = srcu_read_lock(&my_srcu);
+			 4  do_something_2();
+			 5  srcu_read_unlock(&my_srcu, r1);
+			 6  do_something_3();
+			 7  srcu_read_unlock(&my_srcu, r2);
+
+		In contrast, LKMM will interpret this as a nested pair of
+		SRCU read-side critical sections, with the outer critical
+		section spanning lines 1-7 and the inner critical section
+		spanning lines 3-5.
+
+		This difference would be more of a concern had anyone
+		identified a reasonable use case for partially overlapping
+		SRCU read-side critical sections.  For more information,
+		please see: https://paulmck.livejournal.com/40593.html
 
 	f.	Reader-writer locking is not modeled.  It can be
 		emulated in litmus tests using atomic read-modify-write
-- 
GitLab


From 648e717586f2a832687fe44e2e0afb7a6fdea232 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 11 Dec 2018 11:38:53 -0500
Subject: [PATCH 0074/1861] tools/memory-model: Update
 Documentation/explanation.txt to include SRCU support

The recent commit adding support for SRCU to the Linux Kernel Memory
Model ended up changing the names and meanings of several relations.
This patch updates the explanation.txt documentation file to reflect
those changes.

It also revises the statement of the RCU Guarantee to a more accurate
form, and it adds a short paragraph mentioning the new support for SRCU.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Akira Yokosawa <akiyks@gmail.com>
Cc: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Daniel Lustig <dlustig@nvidia.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Jade Alglave <j.alglave@ucl.ac.uk>
Cc: Luc Maranget <luc.maranget@inria.fr>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: "Paul E. McKenney" <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Acked-by: Andrea Parri <andrea.parri@amarulasolutions.com>
---
 .../Documentation/explanation.txt             | 289 +++++++++---------
 1 file changed, 152 insertions(+), 137 deletions(-)

diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 35bff92cc7737..68caa9a976d0c 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model
   19. AND THEN THERE WAS ALPHA
   20. THE HAPPENS-BEFORE RELATION: hb
   21. THE PROPAGATES-BEFORE RELATION: pb
-  22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
+  22. RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb
   23. LOCKING
   24. ODDS AND ENDS
 
@@ -1430,8 +1430,8 @@ they execute means that it cannot have cycles.  This requirement is
 the content of the LKMM's "propagation" axiom.
 
 
-RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
-----------------------------------------------------
+RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb
+-------------------------------------------------------------
 
 RCU (Read-Copy-Update) is a powerful synchronization mechanism.  It
 rests on two concepts: grace periods and read-side critical sections.
@@ -1446,17 +1446,19 @@ As far as memory models are concerned, RCU's main feature is its
 Grace-Period Guarantee, which states that a critical section can never
 span a full grace period.  In more detail, the Guarantee says:
 
-	If a critical section starts before a grace period then it
-	must end before the grace period does.  In addition, every
-	store that propagates to the critical section's CPU before the
-	end of the critical section must propagate to every CPU before
-	the end of the grace period.
+	For any critical section C and any grace period G, at least
+	one of the following statements must hold:
 
-	If a critical section ends after a grace period ends then it
-	must start after the grace period does.  In addition, every
-	store that propagates to the grace period's CPU before the
-	start of the grace period must propagate to every CPU before
-	the start of the critical section.
+(1)	C ends before G does, and in addition, every store that
+	propagates to C's CPU before the end of C must propagate to
+	every CPU before G ends.
+
+(2)	G starts before C does, and in addition, every store that
+	propagates to G's CPU before the start of G must propagate
+	to every CPU before C starts.
+
+In particular, it is not possible for a critical section to both start
+before and end after a grace period.
 
 Here is a simple example of RCU in action:
 
@@ -1483,10 +1485,11 @@ The Grace Period Guarantee tells us that when this code runs, it will
 never end with r1 = 1 and r2 = 0.  The reasoning is as follows.  r1 = 1
 means that P0's store to x propagated to P1 before P1 called
 synchronize_rcu(), so P0's critical section must have started before
-P1's grace period.  On the other hand, r2 = 0 means that P0's store to
-y, which occurs before the end of the critical section, did not
-propagate to P1 before the end of the grace period, violating the
-Guarantee.
+P1's grace period, contrary to part (2) of the Guarantee.  On the
+other hand, r2 = 0 means that P0's store to y, which occurs before the
+end of the critical section, did not propagate to P1 before the end of
+the grace period, contrary to part (1).  Together the results violate
+the Guarantee.
 
 In the kernel's implementations of RCU, the requirements for stores
 to propagate to every CPU are fulfilled by placing strong fences at
@@ -1504,11 +1507,11 @@ before" or "ends after" a grace period?  Some aspects of the meaning
 are pretty obvious, as in the example above, but the details aren't
 entirely clear.  The LKMM formalizes this notion by means of the
 rcu-link relation.  rcu-link encompasses a very general notion of
-"before": Among other things, X ->rcu-link Z includes cases where X
-happens-before or is equal to some event Y which is equal to or comes
-before Z in the coherence order.  When Y = Z this says that X ->rfe Z
-implies X ->rcu-link Z.  In addition, when Y = X it says that X ->fr Z
-and X ->co Z each imply X ->rcu-link Z.
+"before": If E and F are RCU fence events (i.e., rcu_read_lock(),
+rcu_read_unlock(), or synchronize_rcu()) then among other things,
+E ->rcu-link F includes cases where E is po-before some memory-access
+event X, F is po-after some memory-access event Y, and we have any of
+X ->rfe Y, X ->co Y, or X ->fr Y.
 
 The formal definition of the rcu-link relation is more than a little
 obscure, and we won't give it here.  It is closely related to the pb
@@ -1516,171 +1519,173 @@ relation, and the details don't matter unless you want to comb through
 a somewhat lengthy formal proof.  Pretty much all you need to know
 about rcu-link is the information in the preceding paragraph.
 
-The LKMM also defines the gp and rscs relations.  They bring grace
-periods and read-side critical sections into the picture, in the
+The LKMM also defines the rcu-gp and rcu-rscsi relations.  They bring
+grace periods and read-side critical sections into the picture, in the
 following way:
 
-	E ->gp F means there is a synchronize_rcu() fence event S such
-	that E ->po S and either S ->po F or S = F.  In simple terms,
-	there is a grace period po-between E and F.
+	E ->rcu-gp F means that E and F are in fact the same event,
+	and that event is a synchronize_rcu() fence (i.e., a grace
+	period).
 
-	E ->rscs F means there is a critical section delimited by an
-	rcu_read_lock() fence L and an rcu_read_unlock() fence U, such
-	that E ->po U and either L ->po F or L = F.  You can think of
-	this as saying that E and F are in the same critical section
-	(in fact, it also allows E to be po-before the start of the
-	critical section and F to be po-after the end).
+	E ->rcu-rscsi F means that E and F are the rcu_read_unlock()
+	and rcu_read_lock() fence events delimiting some read-side
+	critical section.  (The 'i' at the end of the name emphasizes
+	that this relation is "inverted": It links the end of the
+	critical section to the start.)
 
 If we think of the rcu-link relation as standing for an extended
-"before", then X ->gp Y ->rcu-link Z says that X executes before a
-grace period which ends before Z executes.  (In fact it covers more
-than this, because it also includes cases where X executes before a
-grace period and some store propagates to Z's CPU before Z executes
-but doesn't propagate to some other CPU until after the grace period
-ends.)  Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or
-before the start of) a critical section which starts before Z
-executes.
-
-The LKMM goes on to define the rcu-fence relation as a sequence of gp
-and rscs links separated by rcu-link links, in which the number of gp
-links is >= the number of rscs links.  For example:
+"before", then X ->rcu-gp Y ->rcu-link Z roughly says that X is a
+grace period which ends before Z begins.  (In fact it covers more than
+this, because it also includes cases where some store propagates to
+Z's CPU before Z begins but doesn't propagate to some other CPU until
+after X ends.)  Similarly, X ->rcu-rscsi Y ->rcu-link Z says that X is
+the end of a critical section which starts before Z begins.
+
+The LKMM goes on to define the rcu-fence relation as a sequence of
+rcu-gp and rcu-rscsi links separated by rcu-link links, in which the
+number of rcu-gp links is >= the number of rcu-rscsi links.  For
+example:
 
-	X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+	X ->rcu-gp Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V
 
 would imply that X ->rcu-fence V, because this sequence contains two
-gp links and only one rscs link.  (It also implies that X ->rcu-fence T
-and Z ->rcu-fence V.)  On the other hand:
+rcu-gp links and one rcu-rscsi link.  (It also implies that
+X ->rcu-fence T and Z ->rcu-fence V.)  On the other hand:
 
-	X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+	X ->rcu-rscsi Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V
 
 does not imply X ->rcu-fence V, because the sequence contains only
-one gp link but two rscs links.
+one rcu-gp link but two rcu-rscsi links.
 
 The rcu-fence relation is important because the Grace Period Guarantee
 means that rcu-fence acts kind of like a strong fence.  In particular,
-if W is a write and we have W ->rcu-fence Z, the Guarantee says that W
-will propagate to every CPU before Z executes.
+E ->rcu-fence F implies not only that E begins before F ends, but also
+that any write po-before E will propagate to every CPU before any
+instruction po-after F can execute.  (However, it does not imply that
+E must execute before F; in fact, each synchronize_rcu() fence event
+is linked to itself by rcu-fence as a degenerate case.)
 
 To prove this in full generality requires some intellectual effort.
 We'll consider just a very simple case:
 
-	W ->gp X ->rcu-link Y ->rscs Z.
+	G ->rcu-gp W ->rcu-link Z ->rcu-rscsi F.
 
-This formula means that there is a grace period G and a critical
-section C such that:
+This formula means that G and W are the same event (a grace period),
+and there are events X, Y and a read-side critical section C such that:
 
-	1. W is po-before G;
+	1. G = W is po-before or equal to X;
 
-	2. X is equal to or po-after G;
+	2. X comes "before" Y in some sense (including rfe, co and fr);
 
-	3. X comes "before" Y in some sense;
+	2. Y is po-before Z;
 
-	4. Y is po-before the end of C;
+	4. Z is the rcu_read_unlock() event marking the end of C;
 
-	5. Z is equal to or po-after the start of C.
+	5. F is the rcu_read_lock() event marking the start of C.
 
-From 2 - 4 we deduce that the grace period G ends before the critical
-section C.  Then the second part of the Grace Period Guarantee says
-not only that G starts before C does, but also that W (which executes
-on G's CPU before G starts) must propagate to every CPU before C
-starts.  In particular, W propagates to every CPU before Z executes
-(or finishes executing, in the case where Z is equal to the
-rcu_read_lock() fence event which starts C.)  This sort of reasoning
-can be expanded to handle all the situations covered by rcu-fence.
+From 1 - 4 we deduce that the grace period G ends before the critical
+section C.  Then part (2) of the Grace Period Guarantee says not only
+that G starts before C does, but also that any write which executes on
+G's CPU before G starts must propagate to every CPU before C starts.
+In particular, the write propagates to every CPU before F finishes
+executing and hence before any instruction po-after F can execute.
+This sort of reasoning can be extended to handle all the situations
+covered by rcu-fence.
 
 Finally, the LKMM defines the RCU-before (rb) relation in terms of
 rcu-fence.  This is done in essentially the same way as the pb
 relation was defined in terms of strong-fence.  We will omit the
-details; the end result is that E ->rb F implies E must execute before
-F, just as E ->pb F does (and for much the same reasons).
+details; the end result is that E ->rb F implies E must execute
+before F, just as E ->pb F does (and for much the same reasons).
 
 Putting this all together, the LKMM expresses the Grace Period
 Guarantee by requiring that the rb relation does not contain a cycle.
-Equivalently, this "rcu" axiom requires that there are no events E and
-F with E ->rcu-link F ->rcu-fence E.  Or to put it a third way, the
-axiom requires that there are no cycles consisting of gp and rscs
-alternating with rcu-link, where the number of gp links is >= the
-number of rscs links.
+Equivalently, this "rcu" axiom requires that there are no events E
+and F with E ->rcu-link F ->rcu-fence E.  Or to put it a third way,
+the axiom requires that there are no cycles consisting of rcu-gp and
+rcu-rscsi alternating with rcu-link, where the number of rcu-gp links
+is >= the number of rcu-rscsi links.
 
 Justifying the axiom isn't easy, but it is in fact a valid
 formalization of the Grace Period Guarantee.  We won't attempt to go
 through the detailed argument, but the following analysis gives a
-taste of what is involved.  Suppose we have a violation of the first
-part of the Guarantee: A critical section starts before a grace
-period, and some store propagates to the critical section's CPU before
-the end of the critical section but doesn't propagate to some other
-CPU until after the end of the grace period.
+taste of what is involved.  Suppose both parts of the Guarantee are
+violated: A critical section starts before a grace period, and some
+store propagates to the critical section's CPU before the end of the
+critical section but doesn't propagate to some other CPU until after
+the end of the grace period.
 
 Putting symbols to these ideas, let L and U be the rcu_read_lock() and
 rcu_read_unlock() fence events delimiting the critical section in
 question, and let S be the synchronize_rcu() fence event for the grace
 period.  Saying that the critical section starts before S means there
-are events E and F where E is po-after L (which marks the start of the
-critical section), E is "before" F in the sense of the rcu-link
-relation, and F is po-before the grace period S:
+are events Q and R where Q is po-after L (which marks the start of the
+critical section), Q is "before" R in the sense used by the rcu-link
+relation, and R is po-before the grace period S.  Thus we have:
 
-	L ->po E ->rcu-link F ->po S.
+	L ->rcu-link S.
 
-Let W be the store mentioned above, let Z come before the end of the
+Let W be the store mentioned above, let Y come before the end of the
 critical section and witness that W propagates to the critical
-section's CPU by reading from W, and let Y on some arbitrary CPU be a
-witness that W has not propagated to that CPU, where Y happens after
+section's CPU by reading from W, and let Z on some arbitrary CPU be a
+witness that W has not propagated to that CPU, where Z happens after
 some event X which is po-after S.  Symbolically, this amounts to:
 
-	S ->po X ->hb* Y ->fr W ->rf Z ->po U.
+	S ->po X ->hb* Z ->fr W ->rf Y ->po U.
 
-The fr link from Y to W indicates that W has not propagated to Y's CPU
-at the time that Y executes.  From this, it can be shown (see the
-discussion of the rcu-link relation earlier) that X and Z are related
-by rcu-link, yielding:
+The fr link from Z to W indicates that W has not propagated to Z's CPU
+at the time that Z executes.  From this, it can be shown (see the
+discussion of the rcu-link relation earlier) that S and U are related
+by rcu-link:
 
-	S ->po X ->rcu-link Z ->po U.
+	S ->rcu-link U.
 
-The formulas say that S is po-between F and X, hence F ->gp X.  They
-also say that Z comes before the end of the critical section and E
-comes after its start, hence Z ->rscs E.  From all this we obtain:
+Since S is a grace period we have S ->rcu-gp S, and since L and U are
+the start and end of the critical section C we have U ->rcu-rscsi L.
+From this we obtain:
 
-	F ->gp X ->rcu-link Z ->rscs E ->rcu-link F,
+	S ->rcu-gp S ->rcu-link U ->rcu-rscsi L ->rcu-link S,
 
 a forbidden cycle.  Thus the "rcu" axiom rules out this violation of
 the Grace Period Guarantee.
 
 For something a little more down-to-earth, let's see how the axiom
 works out in practice.  Consider the RCU code example from above, this
-time with statement labels added to the memory access instructions:
+time with statement labels added:
 
 	int x, y;
 
 	P0()
 	{
-		rcu_read_lock();
-		W: WRITE_ONCE(x, 1);
-		X: WRITE_ONCE(y, 1);
-		rcu_read_unlock();
+		L: rcu_read_lock();
+		X: WRITE_ONCE(x, 1);
+		Y: WRITE_ONCE(y, 1);
+		U: rcu_read_unlock();
 	}
 
 	P1()
 	{
 		int r1, r2;
 
-		Y: r1 = READ_ONCE(x);
-		synchronize_rcu();
-		Z: r2 = READ_ONCE(y);
+		Z: r1 = READ_ONCE(x);
+		S: synchronize_rcu();
+		W: r2 = READ_ONCE(y);
 	}
 
 
-If r2 = 0 at the end then P0's store at X overwrites the value that
-P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X.
-In addition, there is a synchronize_rcu() between Y and Z, so therefore
-we have Y ->gp Z.
+If r2 = 0 at the end then P0's store at Y overwrites the value that
+P1's load at W reads from, so we have W ->fre Y.  Since S ->po W and
+also Y ->po U, we get S ->rcu-link U.  In addition, S ->rcu-gp S
+because S is a grace period.
 
-If r1 = 1 at the end then P1's load at Y reads from P0's store at W,
-so we have W ->rcu-link Y.  In addition, W and X are in the same critical
-section, so therefore we have X ->rscs W.
+If r1 = 1 at the end then P1's load at Z reads from P0's store at X,
+so we have X ->rfe Z.  Together with L ->po X and Z ->po S, this
+yields L ->rcu-link S.  And since L and U are the start and end of a
+critical section, we have U ->rcu-rscsi L.
 
-Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle,
-violating the "rcu" axiom.  Hence the outcome is not allowed by the
-LKMM, as we would expect.
+Then U ->rcu-rscsi L ->rcu-link S ->rcu-gp S ->rcu-link U is a
+forbidden cycle, violating the "rcu" axiom.  Hence the outcome is not
+allowed by the LKMM, as we would expect.
 
 For contrast, let's see what can happen in a more complicated example:
 
@@ -1690,51 +1695,52 @@ For contrast, let's see what can happen in a more complicated example:
 	{
 		int r0;
 
-		rcu_read_lock();
-		W: r0 = READ_ONCE(x);
-		X: WRITE_ONCE(y, 1);
-		rcu_read_unlock();
+		L0: rcu_read_lock();
+		    r0 = READ_ONCE(x);
+		    WRITE_ONCE(y, 1);
+		U0: rcu_read_unlock();
 	}
 
 	P1()
 	{
 		int r1;
 
-		Y: r1 = READ_ONCE(y);
-		synchronize_rcu();
-		Z: WRITE_ONCE(z, 1);
+		    r1 = READ_ONCE(y);
+		S1: synchronize_rcu();
+		    WRITE_ONCE(z, 1);
 	}
 
 	P2()
 	{
 		int r2;
 
-		rcu_read_lock();
-		U: r2 = READ_ONCE(z);
-		V: WRITE_ONCE(x, 1);
-		rcu_read_unlock();
+		L2: rcu_read_lock();
+		    r2 = READ_ONCE(z);
+		    WRITE_ONCE(x, 1);
+		U2: rcu_read_unlock();
 	}
 
 If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows
-that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W.
-However this cycle is not forbidden, because the sequence of relations
-contains fewer instances of gp (one) than of rscs (two).  Consequently
-the outcome is allowed by the LKMM.  The following instruction timing
-diagram shows how it might actually occur:
+that U0 ->rcu-rscsi L0 ->rcu-link S1 ->rcu-gp S1 ->rcu-link U2 ->rcu-rscsi
+L2 ->rcu-link U0.  However this cycle is not forbidden, because the
+sequence of relations contains fewer instances of rcu-gp (one) than of
+rcu-rscsi (two).  Consequently the outcome is allowed by the LKMM.
+The following instruction timing diagram shows how it might actually
+occur:
 
 P0			P1			P2
 --------------------	--------------------	--------------------
 rcu_read_lock()
-X: WRITE_ONCE(y, 1)
-			Y: r1 = READ_ONCE(y)
+WRITE_ONCE(y, 1)
+			r1 = READ_ONCE(y)
 			synchronize_rcu() starts
 			.			rcu_read_lock()
-			.			V: WRITE_ONCE(x, 1)
-W: r0 = READ_ONCE(x)	.
+			.			WRITE_ONCE(x, 1)
+r0 = READ_ONCE(x)	.
 rcu_read_unlock()	.
 			synchronize_rcu() ends
-			Z: WRITE_ONCE(z, 1)
-						U: r2 = READ_ONCE(z)
+			WRITE_ONCE(z, 1)
+						r2 = READ_ONCE(z)
 						rcu_read_unlock()
 
 This requires P0 and P2 to execute their loads and stores out of
@@ -1744,6 +1750,15 @@ section in P0 both starts before P1's grace period does and ends
 before it does, and the critical section in P2 both starts after P1's
 grace period does and ends after it does.
 
+Addendum: The LKMM now supports SRCU (Sleepable Read-Copy-Update) in
+addition to normal RCU.  The ideas involved are much the same as
+above, with new relations srcu-gp and srcu-rscsi added to represent
+SRCU grace periods and read-side critical sections.  There is a
+restriction on the srcu-gp and srcu-rscsi links that can appear in an
+rcu-fence sequence (the srcu-rscsi links must be paired with srcu-gp
+links having the same SRCU domain with proper nesting); the details
+are relatively unimportant.
+
 
 LOCKING
 -------
-- 
GitLab


From 9393998e9ee094f99d18783cc85c489e20f0e0e7 Mon Sep 17 00:00:00 2001
From: Luc Maranget <Luc.Maranget@inria.fr>
Date: Thu, 27 Dec 2018 16:27:12 +0100
Subject: [PATCH 0075/1861] tools/memory-model: Dynamically check SRCU
 lock-to-unlock matching

This commit checks that the return value of srcu_read_lock() is passed
to the matching srcu_read_unlock(), where "matching" is determined by
nesting.  This check operates as follows:

   1. srcu_read_lock() creates an integer token, which is stored into
      the generated events.
   2. srcu_read_unlock() records its second (token) argument into the
      generated event.
   3. A new herd primitive 'different-values' filters out pairs of events
      with identical values from the relation passed as its argument.
   4. The bell file applies the above primitive to the (srcu)
      read-side-critical-section relation 'srcu-rscs' and flags non-empty
      results.

BEWARE: Works only with herd version 7.51+6 and onwards.

Signed-off-by: Luc Maranget <Luc.Maranget@inria.fr>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Apply Andrea Parri's off-list feedback. ]
Acked-by: Alan Stern <stern@rowland.harvard.edu>
---
 tools/memory-model/linux-kernel.bell | 3 +++
 tools/memory-model/linux-kernel.cat  | 2 ++
 tools/memory-model/linux-kernel.def  | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 9c42cd9ddcb41..def9131d3d8e3 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -73,3 +73,6 @@ flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking
 
 (* Check for use of synchronize_srcu() inside an RCU critical section *)
 flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
+
+(* Validate SRCU dynamic match *)
+flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 8dcb37835b613..95bf45f1215fc 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0+
 (*
+ * Requires herd version 7.51+6 or higher.
+ *
  * Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
  * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 1d6a120cde14e..0c3f0ef486f46 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -49,7 +49,7 @@ synchronize_rcu_expedited() { __fence{sync-rcu}; }
 
 // SRCU
 srcu_read_lock(X)  __srcu{srcu-lock}(X)
-srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X); }
+srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); }
 synchronize_srcu(X)  { __srcu{sync-srcu}(X); }
 
 // Atomic
-- 
GitLab


From 034fb712a620c84efa78e2889845d5dea95f688f Mon Sep 17 00:00:00 2001
From: Andrea Parri <andrea.parri@amarulasolutions.com>
Date: Thu, 31 Jan 2019 08:08:40 -0800
Subject: [PATCH 0076/1861] tools/memory-model: Avoid duplicating herdtools
 versions

Currently, herdtools version information appears no fewer than three
times in the LKMM source, which is difficult to maintain.  This commit
therefore places the required version in one place, namely the
tools/memory-model/README file.

Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
---
 tools/memory-model/README           | 8 ++++++--
 tools/memory-model/linux-kernel.cat | 2 --
 tools/memory-model/lock.cat         | 3 ---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/tools/memory-model/README b/tools/memory-model/README
index 9d7d4f23503fd..2b87f3971548c 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,13 +20,17 @@ that litmus test to be exercised within the Linux kernel.
 REQUIREMENTS
 ============
 
-Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded
-separately:
+Version 7.52 or higher of the "herd7" and "klitmus7" tools must be
+downloaded separately:
 
   https://github.com/herd/herdtools7
 
 See "herdtools7/INSTALL.md" for installation instructions.
 
+Note that although these tools usually provide backwards compatibility,
+this is not absolutely guaranteed.  Therefore, if a later version does
+not work, please try using the exact version called out above.
+
 
 ==================
 BASIC USAGE: HERD7
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 95bf45f1215fc..8dcb37835b613 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0+
 (*
- * Requires herd version 7.51+6 or higher.
- *
  * Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
  * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index 305ded17e7411..a059d1a6d8a29 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -6,9 +6,6 @@
 
 (*
  * Generate coherence orders and handle lock operations
- *
- * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48.
- * spin_is_locked() is functional from herd7 version 7.49.
  *)
 
 include "cross.cat"
-- 
GitLab


From 487ecc460732cab178897e81a7f5ef6183eff143 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj38.park@gmail.com>
Date: Fri, 25 Jan 2019 06:55:47 +0900
Subject: [PATCH 0077/1861] sched/Documentation/kokr: Update Korean translation
 to update wake_up() & co. memory-barrier guarantees

Translate this commit to Korean:

  7696f9910a9a ("sched/Documentation: Update wake_up() & co. memory-barrier guarantees")

Signed-off-by: SeongJae Park <sj38.park@gmail.com>
Reviewed-by: Yunjae Lee <lyj7694@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 .../translations/ko_KR/memory-barriers.txt    | 43 +++++++++++--------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 7f01fb1c10842..4a6cf4d58cbf4 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -2146,33 +2146,40 @@ set_current_state() 는 다음의 것들로 감싸질 수도 있습니다:
 	event_indicated = 1;
 	wake_up_process(event_daemon);
 
-wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다.  만약 그것들이 뭔가를
-깨운다면요.  이 배리어는 태스크 상태가 지워지기 전에 수행되므로, 이벤트를
-알리기 위한 STORE 와 태스크 상태를 TASK_RUNNING 으로 설정하는 STORE 사이에
-위치하게 됩니다.
+wake_up() 이 무언가를 깨우게 되면, 이 함수는 범용 메모리 배리어를 수행합니다.
+이 함수가 아무것도 깨우지 않는다면 메모리 배리어는 수행될 수도, 수행되지 않을
+수도 있습니다; 이 경우에 메모리 배리어를 수행할 거라 오해해선 안됩니다.  이
+배리어는 태스크 상태가 접근되기 전에 수행되는데, 자세히 말하면 이 이벤트를
+알리기 위한 STORE 와 TASK_RUNNING 으로 상태를 쓰는 STORE 사이에 수행됩니다:
 
-	CPU 1				CPU 2
+	CPU 1 (Sleeper)			CPU 2 (Waker)
 	===============================	===============================
 	set_current_state();		STORE event_indicated
 	  smp_store_mb();		wake_up();
-	    STORE current->state	  <쓰기 배리어>
-	    <범용 배리어>		  STORE current->state
-	LOAD event_indicated
+	    STORE current->state	  ...
+	    <범용 배리어>		  <범용 배리어>
+	LOAD event_indicated		  if ((LOAD task->state) & TASK_NORMAL)
+					    STORE task->state
 
-한번더 말합니다만, 이 쓰기 메모리 배리어는 이 코드가 정말로 뭔가를 깨울 때에만
-실행됩니다.  이걸 설명하기 위해, X 와 Y 는 모두 0 으로 초기화 되어 있다는 가정
-하에 아래의 이벤트 시퀀스를 생각해 봅시다:
+여기서 "task" 는 깨어나지는 쓰레드이고 CPU 1 의 "current" 와 같습니다.
+
+반복하지만, wake_up() 이 무언가를 정말 깨운다면 범용 메모리 배리어가 수행될
+것이 보장되지만, 그렇지 않다면 그런 보장이 없습니다.  이걸 이해하기 위해, X 와
+Y 는 모두 0 으로 초기화 되어 있다는 가정 하에 아래의 이벤트 시퀀스를 생각해
+봅시다:
 
 	CPU 1				CPU 2
 	===============================	===============================
-	X = 1;				STORE event_indicated
+	X = 1;				Y = 1;
 	smp_mb();			wake_up();
-	Y = 1;				wait_event(wq, Y == 1);
-	wake_up();			  load from Y sees 1, no memory barrier
-					load from X might see 0
+	LOAD Y				LOAD X
+
+정말로 깨우기가 행해졌다면, 두 로드 중 (최소한) 하나는 1 을 보게 됩니다.
+반면에, 실제 깨우기가 행해지지 않았다면, 두 로드 모두 0을 볼 수도 있습니다.
 
-위 예제에서의 경우와 달리 깨우기가 정말로 행해졌다면, CPU 2 의 X 로드는 1 을
-본다고 보장될 수 있을 겁니다.
+wake_up_process() 는 항상 범용 메모리 배리어를 수행합니다.  이 배리어 역시
+태스크 상태가 접근되기 전에 수행됩니다.  특히, 앞의 예제 코드에서 wake_up() 이
+wake_up_process() 로 대체된다면 두 로드 중 하나는 1을 볼 것이 보장됩니다.
 
 사용 가능한 깨우기류 함수들로 다음과 같은 것들이 있습니다:
 
@@ -2192,6 +2199,8 @@ wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다.  만약 
 	wake_up_poll();
 	wake_up_process();
 
+메모리 순서규칙 관점에서, 이 함수들은 모두 wake_up() 과 같거나 보다 강한 순서
+보장을 제공합니다.
 
 [!] 잠재우는 코드와 깨우는 코드에 내포되는 메모리 배리어들은 깨우기 전에
 이루어진 스토어를 잠재우는 코드가 set_current_state() 를 호출한 후에 행하는
-- 
GitLab


From db467147f131b8ab799066b9779addb8603d3baf Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj38.park@gmail.com>
Date: Fri, 25 Jan 2019 06:55:48 +0900
Subject: [PATCH 0078/1861] locking/memory-barriers/kokr: Update Korean
 translation to replace smp_cond_acquire() with smp_cond_load_acquire()

Transalte this commit to Korean:

  2f359c7ea554 ("locking/memory-barriers: Replace smp_cond_acquire() with smp_cond_load_acquire()")

Signed-off-by: SeongJae Park <sj38.park@gmail.com>
Reviewed-by: Yunjae Lee <lyj7694@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/translations/ko_KR/memory-barriers.txt | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 4a6cf4d58cbf4..db0b9d8619f1a 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -493,10 +493,8 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니
      이 타입의 오퍼레이션은 단방향의 투과성 배리어처럼 동작합니다.  ACQUIRE
      오퍼레이션 뒤의 모든 메모리 오퍼레이션들이 ACQUIRE 오퍼레이션 후에
      일어난 것으로 시스템의 나머지 컴포넌트들에 보이게 될 것이 보장됩니다.
-     LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_acquire() 오퍼레이션도
-     ACQUIRE 오퍼레이션에 포함됩니다.  smp_cond_acquire() 오퍼레이션은 컨트롤
-     의존성과 smp_rmb() 를 사용해서 ACQUIRE 의 의미적 요구사항(semantic)을
-     충족시킵니다.
+     LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_load_acquire() 오퍼레이션도
+     ACQUIRE 오퍼레이션에 포함됩니다.
 
      ACQUIRE 오퍼레이션 앞의 메모리 오퍼레이션들은 ACQUIRE 오퍼레이션 완료 후에
      수행된 것처럼 보일 수 있습니다.
-- 
GitLab


From f1887143f5984f23d2360f2efed6ef481bb41117 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 11 Feb 2019 18:09:43 +0100
Subject: [PATCH 0079/1861] Documentation/atomic_t: Clarify signed vs unsigned

Clarify the whole signed vs unsigned issue for atomic_t.

There has been enough confusion on this topic to warrant a few explicit
words I feel.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/atomic_t.txt | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index 913396ac58243..dca3fb0554db4 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -56,6 +56,23 @@ Barriers:
   smp_mb__{before,after}_atomic()
 
 
+TYPES (signed vs unsigned)
+-----
+
+While atomic_t, atomic_long_t and atomic64_t use int, long and s64
+respectively (for hysterical raisins), the kernel uses -fno-strict-overflow
+(which implies -fwrapv) and defines signed overflow to behave like
+2s-complement.
+
+Therefore, an explicitly unsigned variant of the atomic ops is strictly
+unnecessary and we can simply cast, there is no UB.
+
+There was a bug in UBSAN prior to GCC-8 that would generate UB warnings for
+signed types.
+
+With this we also conform to the C/C++ _Atomic behaviour and things like
+P1236R1.
+
 
 SEMANTICS
 ---------
-- 
GitLab


From 00206a69ee32f03e6f40837684dcbe475ea02266 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Mon, 18 Mar 2019 02:32:36 +0100
Subject: [PATCH 0080/1861] percpu: stop printing kernel addresses

Since commit ad67b74d2469d9b8 ("printk: hash addresses printed with %p"),
at boot "____ptrval____" is printed instead of actual addresses:

    percpu: Embedded 38 pages/cpu @(____ptrval____) s124376 r0 d31272 u524288

Instead of changing the print to "%px", and leaking kernel addresses,
just remove the print completely, cfr. e.g. commit 071929dbdd865f77
("arm64: Stop printing the virtual memory layout").

Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 mm/percpu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 2e6fc8d552c96..68dd2e7e73b5f 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2567,8 +2567,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 		ai->groups[group].base_offset = areas[group] - base;
 	}
 
-	pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
-		PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
+	pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n",
+		PFN_DOWN(size_sum), ai->static_size, ai->reserved_size,
 		ai->dyn_size, ai->unit_size);
 
 	rc = pcpu_setup_first_chunk(ai, base);
@@ -2692,8 +2692,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 	}
 
 	/* we're ready, commit */
-	pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n",
-		unit_pages, psize_str, vm.addr, ai->static_size,
+	pr_info("%d %s pages/cpu s%zu r%zu d%zu\n",
+		unit_pages, psize_str, ai->static_size,
 		ai->reserved_size, ai->dyn_size);
 
 	rc = pcpu_setup_first_chunk(ai, vm.addr);
-- 
GitLab


From 45b06682113b102bdf38678311da93a689b0b78d Mon Sep 17 00:00:00 2001
From: Matthias Wieloch <matthias.wieloch@few-bauer.de>
Date: Mon, 18 Mar 2019 11:50:45 +0100
Subject: [PATCH 0081/1861] clk: at91: fix programmable clock for sama5d2

The prescaler formula of the programmable clock has changed for sama5d2.
Update the driver accordingly.

Fixes: a2038077de9a ("clk: at91: add sama5d2 PMC driver")
Cc: <stable@vger.kernel.org> # v4.20+
Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
[nicolas.ferre@microchip.com: adapt the prescaler range,
		fix clk_programmable_recalc_rate, split patch]
Signed-off-by: Matthias Wieloch <matthias.wieloch@few-bauer.de>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/at91/clk-programmable.c | 57 ++++++++++++++++++++++-------
 drivers/clk/at91/pmc.h              |  2 +
 drivers/clk/at91/sama5d2.c          | 10 ++++-
 3 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 89d6f3736dbf6..f8edbb65eda35 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c
@@ -20,8 +20,7 @@
 #define PROG_ID_MAX		7
 
 #define PROG_STATUS_MASK(id)	(1 << ((id) + 8))
-#define PROG_PRES_MASK		0x7
-#define PROG_PRES(layout, pckr)	((pckr >> layout->pres_shift) & PROG_PRES_MASK)
+#define PROG_PRES(layout, pckr)	((pckr >> layout->pres_shift) & layout->pres_mask)
 #define PROG_MAX_RM9200_CSS	3
 
 struct clk_programmable {
@@ -37,20 +36,29 @@ static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw,
 						  unsigned long parent_rate)
 {
 	struct clk_programmable *prog = to_clk_programmable(hw);
+	const struct clk_programmable_layout *layout = prog->layout;
 	unsigned int pckr;
+	unsigned long rate;
 
 	regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
 
-	return parent_rate >> PROG_PRES(prog->layout, pckr);
+	if (layout->is_pres_direct)
+		rate = parent_rate / (PROG_PRES(layout, pckr) + 1);
+	else
+		rate = parent_rate >> PROG_PRES(layout, pckr);
+
+	return rate;
 }
 
 static int clk_programmable_determine_rate(struct clk_hw *hw,
 					   struct clk_rate_request *req)
 {
+	struct clk_programmable *prog = to_clk_programmable(hw);
+	const struct clk_programmable_layout *layout = prog->layout;
 	struct clk_hw *parent;
 	long best_rate = -EINVAL;
 	unsigned long parent_rate;
-	unsigned long tmp_rate;
+	unsigned long tmp_rate = 0;
 	int shift;
 	int i;
 
@@ -60,10 +68,18 @@ static int clk_programmable_determine_rate(struct clk_hw *hw,
 			continue;
 
 		parent_rate = clk_hw_get_rate(parent);
-		for (shift = 0; shift < PROG_PRES_MASK; shift++) {
-			tmp_rate = parent_rate >> shift;
-			if (tmp_rate <= req->rate)
-				break;
+		if (layout->is_pres_direct) {
+			for (shift = 0; shift <= layout->pres_mask; shift++) {
+				tmp_rate = parent_rate / (shift + 1);
+				if (tmp_rate <= req->rate)
+					break;
+			}
+		} else {
+			for (shift = 0; shift < layout->pres_mask; shift++) {
+				tmp_rate = parent_rate >> shift;
+				if (tmp_rate <= req->rate)
+					break;
+			}
 		}
 
 		if (tmp_rate > req->rate)
@@ -137,16 +153,23 @@ static int clk_programmable_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (!div)
 		return -EINVAL;
 
-	shift = fls(div) - 1;
+	if (layout->is_pres_direct) {
+		shift = div - 1;
 
-	if (div != (1 << shift))
-		return -EINVAL;
+		if (shift > layout->pres_mask)
+			return -EINVAL;
+	} else {
+		shift = fls(div) - 1;
 
-	if (shift >= PROG_PRES_MASK)
-		return -EINVAL;
+		if (div != (1 << shift))
+			return -EINVAL;
+
+		if (shift >= layout->pres_mask)
+			return -EINVAL;
+	}
 
 	regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id),
-			   PROG_PRES_MASK << layout->pres_shift,
+			   layout->pres_mask << layout->pres_shift,
 			   shift << layout->pres_shift);
 
 	return 0;
@@ -202,19 +225,25 @@ at91_clk_register_programmable(struct regmap *regmap,
 }
 
 const struct clk_programmable_layout at91rm9200_programmable_layout = {
+	.pres_mask = 0x7,
 	.pres_shift = 2,
 	.css_mask = 0x3,
 	.have_slck_mck = 0,
+	.is_pres_direct = 0,
 };
 
 const struct clk_programmable_layout at91sam9g45_programmable_layout = {
+	.pres_mask = 0x7,
 	.pres_shift = 2,
 	.css_mask = 0x3,
 	.have_slck_mck = 1,
+	.is_pres_direct = 0,
 };
 
 const struct clk_programmable_layout at91sam9x5_programmable_layout = {
+	.pres_mask = 0x7,
 	.pres_shift = 4,
 	.css_mask = 0x7,
 	.have_slck_mck = 0,
+	.is_pres_direct = 0,
 };
diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h
index 672a79bda88c9..a0e5ce9c9b9ea 100644
--- a/drivers/clk/at91/pmc.h
+++ b/drivers/clk/at91/pmc.h
@@ -71,9 +71,11 @@ struct clk_pll_characteristics {
 };
 
 struct clk_programmable_layout {
+	u8 pres_mask;
 	u8 pres_shift;
 	u8 css_mask;
 	u8 have_slck_mck;
+	u8 is_pres_direct;
 };
 
 extern const struct clk_programmable_layout at91rm9200_programmable_layout;
diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c
index 1f70cb164b06f..81943fac4537e 100644
--- a/drivers/clk/at91/sama5d2.c
+++ b/drivers/clk/at91/sama5d2.c
@@ -125,6 +125,14 @@ static const struct {
 	  .pll = true },
 };
 
+static const struct clk_programmable_layout sama5d2_programmable_layout = {
+	.pres_mask = 0xff,
+	.pres_shift = 4,
+	.css_mask = 0x7,
+	.have_slck_mck = 0,
+	.is_pres_direct = 1,
+};
+
 static void __init sama5d2_pmc_setup(struct device_node *np)
 {
 	struct clk_range range = CLK_RANGE(0, 0);
@@ -249,7 +257,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np)
 
 		hw = at91_clk_register_programmable(regmap, name,
 						    parent_names, 6, i,
-						    &at91sam9x5_programmable_layout);
+						    &sama5d2_programmable_layout);
 		if (IS_ERR(hw))
 			goto err_free;
 	}
-- 
GitLab


From 3df64d7b0a4f70f1797f23cfd4cca5c4d48131fe Mon Sep 17 00:00:00 2001
From: CK Hu <ck.hu@mediatek.com>
Date: Mon, 14 Jan 2019 17:36:48 +0800
Subject: [PATCH 0082/1861] drm/mediatek: Implement gem prime vmap/vunmap
 function

For some application which need kernel virtual address, such as fbcon,
implement these function to map/unmap kernel virtual address of prime
buffer.

Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_drm_drv.c |  2 ++
 drivers/gpu/drm/mediatek/mtk_drm_gem.c | 46 ++++++++++++++++++++++++++
 drivers/gpu/drm/mediatek/mtk_drm_gem.h |  3 ++
 3 files changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index cf59ea9bccfdf..4c3375cfb68f0 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -341,6 +341,8 @@ static struct drm_driver mtk_drm_driver = {
 	.gem_prime_get_sg_table = mtk_gem_prime_get_sg_table,
 	.gem_prime_import_sg_table = mtk_gem_prime_import_sg_table,
 	.gem_prime_mmap = mtk_drm_gem_mmap_buf,
+	.gem_prime_vmap = mtk_drm_gem_prime_vmap,
+	.gem_prime_vunmap = mtk_drm_gem_prime_vunmap,
 	.fops = &mtk_drm_fops,
 
 	.name = DRIVER_NAME,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index 259b7b0de1d22..38483e9ee0712 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -241,3 +241,49 @@ err_gem_free:
 	kfree(mtk_gem);
 	return ERR_PTR(ret);
 }
+
+void *mtk_drm_gem_prime_vmap(struct drm_gem_object *obj)
+{
+	struct mtk_drm_gem_obj *mtk_gem = to_mtk_gem_obj(obj);
+	struct sg_table *sgt;
+	struct sg_page_iter iter;
+	unsigned int npages;
+	unsigned int i = 0;
+
+	if (mtk_gem->kvaddr)
+		return mtk_gem->kvaddr;
+
+	sgt = mtk_gem_prime_get_sg_table(obj);
+	if (IS_ERR(sgt))
+		return NULL;
+
+	npages = obj->size >> PAGE_SHIFT;
+	mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL);
+	if (!mtk_gem->pages)
+		goto out;
+
+	for_each_sg_page(sgt->sgl, &iter, sgt->orig_nents, 0) {
+		mtk_gem->pages[i++] = sg_page_iter_page(&iter);
+		if (i > npages)
+			break;
+	}
+	mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP,
+			       pgprot_writecombine(PAGE_KERNEL));
+
+out:
+	kfree((void *)sgt);
+
+	return mtk_gem->kvaddr;
+}
+
+void mtk_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+	struct mtk_drm_gem_obj *mtk_gem = to_mtk_gem_obj(obj);
+
+	if (!mtk_gem->pages)
+		return;
+
+	vunmap(vaddr);
+	mtk_gem->kvaddr = 0;
+	kfree((void *)mtk_gem->pages);
+}
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.h b/drivers/gpu/drm/mediatek/mtk_drm_gem.h
index 534639b43a1c7..c047a7ef294fd 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.h
@@ -37,6 +37,7 @@ struct mtk_drm_gem_obj {
 	dma_addr_t		dma_addr;
 	unsigned long		dma_attrs;
 	struct sg_table		*sg;
+	struct page		**pages;
 };
 
 #define to_mtk_gem_obj(x)	container_of(x, struct mtk_drm_gem_obj, base)
@@ -52,5 +53,7 @@ int mtk_drm_gem_mmap_buf(struct drm_gem_object *obj,
 struct sg_table *mtk_gem_prime_get_sg_table(struct drm_gem_object *obj);
 struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev,
 			struct dma_buf_attachment *attach, struct sg_table *sg);
+void *mtk_drm_gem_prime_vmap(struct drm_gem_object *obj);
+void mtk_drm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 
 #endif
-- 
GitLab


From d6db988a44294d7a91f60ecb87914694ba106305 Mon Sep 17 00:00:00 2001
From: CK Hu <ck.hu@mediatek.com>
Date: Mon, 14 Jan 2019 17:44:44 +0800
Subject: [PATCH 0083/1861] drm/mediatek: Add Mediatek framebuffer device

For Mediatek drm driver, use fbdev emulation to create a framebuffer
device.

Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_drm_drv.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 4c3375cfb68f0..57ce4708ef1b9 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -15,6 +15,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_of.h>
@@ -378,6 +379,10 @@ static int mtk_drm_bind(struct device *dev)
 	if (ret < 0)
 		goto err_deinit;
 
+	ret = drm_fbdev_generic_setup(drm, 32);
+	if (ret)
+		DRM_ERROR("Failed to initialize fbdev: %d\n", ret);
+
 	return 0;
 
 err_deinit:
-- 
GitLab


From 6c44b15e1c9076d925d5236ddadf1318b0a25ce2 Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kjlu@umn.edu>
Date: Thu, 14 Mar 2019 00:24:02 -0500
Subject: [PATCH 0084/1861] HID: logitech: check the return value of
 create_singlethread_workqueue

create_singlethread_workqueue may fail and return NULL. The fix checks if it is
NULL to avoid NULL pointer dereference.  Also, the fix moves the call of
create_singlethread_workqueue earlier to avoid resource-release issues.

Signed-off-by: Kangjie Lu <kjlu@umn.edu>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-logitech-hidpp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 15ed6177a7a36..0a243247b231a 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -2111,6 +2111,13 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
 		kfree(data);
 		return -ENOMEM;
 	}
+	data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue");
+	if (!data->wq) {
+		kfree(data->effect_ids);
+		kfree(data);
+		return -ENOMEM;
+	}
+
 	data->hidpp = hidpp;
 	data->feature_index = feature_index;
 	data->version = version;
@@ -2155,7 +2162,6 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
 	/* ignore boost value at response.fap.params[2] */
 
 	/* init the hardware command queue */
-	data->wq = create_singlethread_workqueue("hidpp-ff-sendqueue");
 	atomic_set(&data->workqueue_size, 0);
 
 	/* initialize with zero autocenter to get wheel in usable state */
-- 
GitLab


From 639e5eb3c7d67e407f2a71fccd95323751398f6f Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 19 Mar 2019 11:52:04 +0000
Subject: [PATCH 0085/1861] ASoC: wm_adsp: Correct handling of compressed
 streams that restart

Previously support was added to allow streams to be stopped and
started again without the DSP being power cycled and this was done
by clearing the buffer state in trigger start. Another supported
use-case is using the DSP for a trigger event then opening the
compressed stream later to receive the audio, unfortunately clearing
the buffer state in trigger start destroys the data received
from such a trigger. Correct this issue by moving the call to
wm_adsp_buffer_clear to be in trigger stop instead.

Fixes: 61fc060c40e6 ("ASoC: wm_adsp: Support streams which can start/stop with DSP active")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index b93fdc8d2d6fb..0de36b7eeeb22 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -3571,8 +3571,6 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd)
 		if (ret < 0)
 			break;
 
-		wm_adsp_buffer_clear(compr->buf);
-
 		/* Trigger the IRQ at one fragment of data */
 		ret = wm_adsp_buffer_write(compr->buf,
 					   HOST_BUFFER_FIELD(high_water_mark),
@@ -3584,6 +3582,7 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd)
 		}
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
+		wm_adsp_buffer_clear(compr->buf);
 		break;
 	default:
 		ret = -EINVAL;
-- 
GitLab


From 48ead31ce247dc8c0b01ad99d1a97da35421493b Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 19 Mar 2019 11:52:05 +0000
Subject: [PATCH 0086/1861] ASoC: wm_adsp: Correct error messages in
 wm_adsp_buffer_get_error

During recent logging improvements it seems two error messages lost
their updates during patch application/rebasing. Add these back in.

Fixes: 0d3fba3e7a56 ("ASoC: wm_adsp: Improve logging messages")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 0de36b7eeeb22..b3348a213c185 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -3535,11 +3535,11 @@ static int wm_adsp_buffer_get_error(struct wm_adsp_compr_buf *buf)
 
 	ret = wm_adsp_buffer_read(buf, HOST_BUFFER_FIELD(error), &buf->error);
 	if (ret < 0) {
-		adsp_err(buf->dsp, "Failed to check buffer error: %d\n", ret);
+		compr_err(buf, "Failed to check buffer error: %d\n", ret);
 		return ret;
 	}
 	if (buf->error != 0) {
-		adsp_err(buf->dsp, "Buffer error occurred: %d\n", buf->error);
+		compr_err(buf, "Buffer error occurred: %d\n", buf->error);
 		return -EIO;
 	}
 
-- 
GitLab


From a2225a6d155fcb247fe4c6d87f7c91807462966d Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 19 Mar 2019 11:52:06 +0000
Subject: [PATCH 0087/1861] ASoC: wm_adsp: Add locking to wm_adsp2_bus_error

Best to lock across handling the bus error to ensure the DSP doesn't
change power state as we are reading the status registers.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index b3348a213c185..c19a8a041d4dd 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -3922,11 +3922,13 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp)
 	struct regmap *regmap = dsp->regmap;
 	int ret = 0;
 
+	mutex_lock(&dsp->pwr_lock);
+
 	ret = regmap_read(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, &val);
 	if (ret) {
 		adsp_err(dsp,
 			"Failed to read Region Lock Ctrl register: %d\n", ret);
-		return IRQ_HANDLED;
+		goto error;
 	}
 
 	if (val & ADSP2_WDT_TIMEOUT_STS_MASK) {
@@ -3945,7 +3947,7 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp)
 			adsp_err(dsp,
 				 "Failed to read Bus Err Addr register: %d\n",
 				 ret);
-			return IRQ_HANDLED;
+			goto error;
 		}
 
 		adsp_err(dsp, "bus error address = 0x%x\n",
@@ -3958,7 +3960,7 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp)
 			adsp_err(dsp,
 				 "Failed to read Pmem Xmem Err Addr register: %d\n",
 				 ret);
-			return IRQ_HANDLED;
+			goto error;
 		}
 
 		adsp_err(dsp, "xmem error address = 0x%x\n",
@@ -3971,6 +3973,9 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp)
 	regmap_update_bits(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL,
 			   ADSP2_CTRL_ERR_EINT, ADSP2_CTRL_ERR_EINT);
 
+error:
+	mutex_unlock(&dsp->pwr_lock);
+
 	return IRQ_HANDLED;
 }
 EXPORT_SYMBOL_GPL(wm_adsp2_bus_error);
-- 
GitLab


From a2bcbc1b9ac2f982a438081a9f1b5d823332d514 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 19 Mar 2019 11:52:07 +0000
Subject: [PATCH 0088/1861] ASoC: wm_adsp: Shutdown any compressed streams on
 DSP watchdog timeout

If a watchdog timeout is received from the DSP it is safe to assume the
DSP is not functioning anymore and as such any active compressed streams
should be put into an error state.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 21 +++++++++++++++++++++
 sound/soc/codecs/wm_adsp.h |  1 +
 2 files changed, 22 insertions(+)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index c19a8a041d4dd..5608ed5deccac 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -2905,6 +2905,8 @@ int wm_adsp2_event(struct snd_soc_dapm_widget *w,
 		if (wm_adsp_fw[dsp->fw].num_caps != 0)
 			wm_adsp_buffer_free(dsp);
 
+		dsp->fatal_error = false;
+
 		mutex_unlock(&dsp->pwr_lock);
 
 		adsp_dbg(dsp, "Execution stopped\n");
@@ -3000,6 +3002,9 @@ static int wm_adsp_compr_attach(struct wm_adsp_compr *compr)
 {
 	struct wm_adsp_compr_buf *buf = NULL, *tmp;
 
+	if (compr->dsp->fatal_error)
+		return -EINVAL;
+
 	list_for_each_entry(tmp, &compr->dsp->buffer_list, list) {
 		if (!tmp->name || !strcmp(compr->name, tmp->name)) {
 			buf = tmp;
@@ -3916,6 +3921,21 @@ int wm_adsp2_lock(struct wm_adsp *dsp, unsigned int lock_regions)
 }
 EXPORT_SYMBOL_GPL(wm_adsp2_lock);
 
+static void wm_adsp_fatal_error(struct wm_adsp *dsp)
+{
+	struct wm_adsp_compr *compr;
+
+	dsp->fatal_error = true;
+
+	list_for_each_entry(compr, &dsp->compr_list, list) {
+		if (compr->stream) {
+			snd_compr_stop_error(compr->stream,
+					     SNDRV_PCM_STATE_XRUN);
+			snd_compr_fragment_elapsed(compr->stream);
+		}
+	}
+}
+
 irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp)
 {
 	unsigned int val;
@@ -3934,6 +3954,7 @@ irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp)
 	if (val & ADSP2_WDT_TIMEOUT_STS_MASK) {
 		adsp_err(dsp, "watchdog timeout error\n");
 		wm_adsp_stop_watchdog(dsp);
+		wm_adsp_fatal_error(dsp);
 	}
 
 	if (val & (ADSP2_SLAVE_ERR_MASK | ADSP2_REGION_LOCK_ERR_MASK)) {
diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h
index 59e07ad163296..8f09b4419a914 100644
--- a/sound/soc/codecs/wm_adsp.h
+++ b/sound/soc/codecs/wm_adsp.h
@@ -85,6 +85,7 @@ struct wm_adsp {
 	bool preloaded;
 	bool booted;
 	bool running;
+	bool fatal_error;
 
 	struct list_head ctl_list;
 
-- 
GitLab


From cef0d4948cb0a02db37ebfdc320e127c77ab1637 Mon Sep 17 00:00:00 2001
From: "He, Bo" <bo.he@intel.com>
Date: Thu, 14 Mar 2019 02:28:21 +0000
Subject: [PATCH 0089/1861] HID: debug: fix race condition with between
 rdesc_show() and device removal

There is a race condition that could happen if hid_debug_rdesc_show()
is running while hdev is in the process of going away (device removal,
system suspend, etc) which could result in NULL pointer dereference:

	 BUG: unable to handle kernel paging request at 0000000783316040
	 CPU: 1 PID: 1512 Comm: getevent Tainted: G     U     O 4.19.20-quilt-2e5dc0ac-00029-gc455a447dd55 #1
	 RIP: 0010:hid_dump_device+0x9b/0x160
	 Call Trace:
	  hid_debug_rdesc_show+0x72/0x1d0
	  seq_read+0xe0/0x410
	  full_proxy_read+0x5f/0x90
	  __vfs_read+0x3a/0x170
	  vfs_read+0xa0/0x150
	  ksys_read+0x58/0xc0
	  __x64_sys_read+0x1a/0x20
	  do_syscall_64+0x55/0x110
	  entry_SYSCALL_64_after_hwframe+0x49/0xbe

Grab driver_input_lock to make sure the input device exists throughout the
whole process of dumping the rdesc.

[jkosina@suse.cz: update changelog a bit]
Signed-off-by: he, bo <bo.he@intel.com>
Signed-off-by: "Zhang, Jun" <jun.zhang@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index ac9fda1b5a723..1384e57182af9 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1060,10 +1060,15 @@ static int hid_debug_rdesc_show(struct seq_file *f, void *p)
 	seq_printf(f, "\n\n");
 
 	/* dump parsed data and input mappings */
+	if (down_interruptible(&hdev->driver_input_lock))
+		return 0;
+
 	hid_dump_device(hdev, f);
 	seq_printf(f, "\n");
 	hid_dump_input_mapping(hdev, f);
 
+	up(&hdev->driver_input_lock);
+
 	return 0;
 }
 
-- 
GitLab


From 9b70c697e87286ade406e6a02091757307dd4b7c Mon Sep 17 00:00:00 2001
From: Maxime Jourdan <mjourdan@baylibre.com>
Date: Tue, 19 Mar 2019 11:25:37 +0100
Subject: [PATCH 0090/1861] clk: meson-gxbb: round the vdec dividers to closest

We want the video decoder clocks to always round to closest. While the
muxes are already using CLK_MUX_ROUND_CLOSEST, the corresponding
CLK_DIVIDER_ROUND_CLOSEST was forgotten for the dividers.

Fix this by adding the flag to the two vdec dividers.

Fixes: a565242eb9fc ("clk: meson: gxbb: add the video decoder clocks")
Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lkml.kernel.org/r/20190319102537.2043-1-mjourdan@baylibre.com
---
 drivers/clk/meson/gxbb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 04df2e208ed6e..29ffb4fde7145 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -2216,6 +2216,7 @@ static struct clk_regmap gxbb_vdec_1_div = {
 		.offset = HHI_VDEC_CLK_CNTL,
 		.shift = 0,
 		.width = 7,
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
 	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vdec_1_div",
@@ -2261,6 +2262,7 @@ static struct clk_regmap gxbb_vdec_hevc_div = {
 		.offset = HHI_VDEC2_CLK_CNTL,
 		.shift = 16,
 		.width = 7,
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
 	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vdec_hevc_div",
-- 
GitLab


From f53b9f146fa1d5c5bb6dc34e27176434b26cd0a7 Mon Sep 17 00:00:00 2001
From: Maxime Jourdan <mjourdan@baylibre.com>
Date: Tue, 19 Mar 2019 09:26:11 +0100
Subject: [PATCH 0091/1861] clk: meson: g12a: fix VPU clock muxes mask

There are 8 parents, use 0x7

Fixes: 085a4ea93d54 ("clk: meson: g12a: add peripheral clock controller")
Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lkml.kernel.org/r/20190319082611.6215-1-mjourdan@baylibre.com
---
 drivers/clk/meson/g12a.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c
index 0e1ce8c03259b..683769f6e90d6 100644
--- a/drivers/clk/meson/g12a.c
+++ b/drivers/clk/meson/g12a.c
@@ -967,7 +967,7 @@ static const char * const g12a_vpu_parent_names[] = {
 static struct clk_regmap g12a_vpu_0_sel = {
 	.data = &(struct clk_regmap_mux_data){
 		.offset = HHI_VPU_CLK_CNTL,
-		.mask = 0x3,
+		.mask = 0x7,
 		.shift = 9,
 	},
 	.hw.init = &(struct clk_init_data){
@@ -1011,7 +1011,7 @@ static struct clk_regmap g12a_vpu_0 = {
 static struct clk_regmap g12a_vpu_1_sel = {
 	.data = &(struct clk_regmap_mux_data){
 		.offset = HHI_VPU_CLK_CNTL,
-		.mask = 0x3,
+		.mask = 0x7,
 		.shift = 25,
 	},
 	.hw.init = &(struct clk_init_data){
-- 
GitLab


From 9e05e49c29fde3f5e0d82542cb89e26c0bc828d0 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 13 Mar 2019 14:55:03 +0100
Subject: [PATCH 0092/1861] clk: meson-g12a: fix VPU clock parents

First two VPU clock parents are wrong, fix it here.

Fixes: 085a4ea93d54 ("clk: meson: g12a: add peripheral clock controller")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://lkml.kernel.org/r/20190313135503.3198-1-narmstrong@baylibre.com
---
 drivers/clk/meson/g12a.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c
index 683769f6e90d6..f7b11e1eeebe8 100644
--- a/drivers/clk/meson/g12a.c
+++ b/drivers/clk/meson/g12a.c
@@ -960,7 +960,7 @@ static struct clk_regmap g12a_sd_emmc_c_clk0 = {
 /* VPU Clock */
 
 static const char * const g12a_vpu_parent_names[] = {
-	"fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7",
+	"fclk_div3", "fclk_div4", "fclk_div5", "fclk_div7",
 	"mpll1", "vid_pll", "hifi_pll", "gp0_pll",
 };
 
-- 
GitLab


From 0fcc4c8c044e117ac126ab6df4138ea9a67fa2a9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 19 Mar 2019 02:36:59 +0100
Subject: [PATCH 0093/1861] device_cgroup: fix RCU imbalance in error case

When dev_exception_add() returns an error (due to a failed memory
allocation), make sure that we move the RCU preemption count back to where
it was before we were called. We dropped the RCU read lock inside the loop
body, so we can't just "break".

sparse complains about this, too:

$ make -s C=2 security/device_cgroup.o
./include/linux/rcupdate.h:647:9: warning: context imbalance in
'propagate_exception' - unexpected unlock

Fixes: d591fb56618f ("device_cgroup: simplify cgroup tree walk in propagate_exception()")
Cc: stable@vger.kernel.org
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 security/device_cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index cd97929fac663..dc28914fa72e0 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -560,7 +560,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root,
 		    devcg->behavior == DEVCG_DEFAULT_ALLOW) {
 			rc = dev_exception_add(devcg, ex);
 			if (rc)
-				break;
+				return rc;
 		} else {
 			/*
 			 * in the other possible cases:
-- 
GitLab


From d6752e185c3168771787a02dc6a55f32260943cc Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 15 Mar 2019 11:51:12 -0700
Subject: [PATCH 0094/1861] rtc: cros-ec: Fail suspend/resume if wake IRQ can't
 be configured

If we encounter a failure during suspend where this RTC was programmed
to wakeup the system from suspend, but that wakeup couldn't be
configured because the system didn't support wakeup interrupts, we'll
run into the following warning:

	Unbalanced IRQ 166 wake disable
	WARNING: CPU: 7 PID: 3071 at kernel/irq/manage.c:669 irq_set_irq_wake+0x108/0x278

This happens because the suspend process isn't aborted when the RTC
fails to configure the wakeup IRQ. Instead, we continue suspending the
system and then another suspend callback fails the suspend process and
"unwinds" the previously suspended drivers by calling their resume
callbacks. When we get back to resuming this RTC driver, we'll call
disable_irq_wake() on an IRQ that hasn't been configured for wake.

Let's just fail suspend/resume here if we can't configure the system to
wake and the user has chosen to wakeup with this device. This fixes this
warning and makes the code more robust in case there are systems out
there that can't wakeup from suspend on this line but the user has
chosen to do so.

Cc: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Cc: Evan Green <evgreen@chromium.org>
Cc: Benson Leung <bleung@chromium.org>
Cc: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Acked-By: Benson Leung <bleung@chromium.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-cros-ec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c
index e5444296075ee..4d6bf9304ceb3 100644
--- a/drivers/rtc/rtc-cros-ec.c
+++ b/drivers/rtc/rtc-cros-ec.c
@@ -298,7 +298,7 @@ static int cros_ec_rtc_suspend(struct device *dev)
 	struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev);
 
 	if (device_may_wakeup(dev))
-		enable_irq_wake(cros_ec_rtc->cros_ec->irq);
+		return enable_irq_wake(cros_ec_rtc->cros_ec->irq);
 
 	return 0;
 }
@@ -309,7 +309,7 @@ static int cros_ec_rtc_resume(struct device *dev)
 	struct cros_ec_rtc *cros_ec_rtc = dev_get_drvdata(&pdev->dev);
 
 	if (device_may_wakeup(dev))
-		disable_irq_wake(cros_ec_rtc->cros_ec->irq);
+		return disable_irq_wake(cros_ec_rtc->cros_ec->irq);
 
 	return 0;
 }
-- 
GitLab


From 15d82d22498784966df8e4696174a16b02cc1052 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 20 Mar 2019 11:32:14 +0100
Subject: [PATCH 0095/1861] rtc: sh: Fix invalid alarm warning for non-enabled
 alarm

When no alarm has been programmed on RSK-RZA1, an error message is
printed during boot:

    rtc rtc0: invalid alarm value: 2019-03-14T255:255:255

sh_rtc_read_alarm_value() returns 0xff when querying a hardware alarm
field that is not enabled.  __rtc_read_alarm() validates the received
alarm values, and fills in missing fields when needed.
While 0xff is handled fine for the year, month, and day fields, and
corrected as considered being out-of-range, this is not the case for the
hour, minute, and second fields, where -1 is expected for missing
fields.

Fix this by returning -1 instead, as this value is handled fine for all
fields.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-sh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index d417b203cbc55..1d3de2a3d1a4d 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -374,7 +374,7 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
 static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
 {
 	unsigned int byte;
-	int value = 0xff;	/* return 0xff for ignored values */
+	int value = -1;			/* return -1 for ignored values */
 
 	byte = readb(rtc->regbase + reg_off);
 	if (byte & AR_ENB) {
-- 
GitLab


From 22e7d5148d9a0b2e5bd20913f55cce89b58cd990 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 20 Mar 2019 13:10:23 +0100
Subject: [PATCH 0096/1861] rtc: sd3078: fix manufacturer name

The proper manufacturer name is Shenzhen whwave.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index a71734c416939..f933c06bff4f8 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -667,9 +667,9 @@ config RTC_DRV_S5M
 	  will be called rtc-s5m.
 
 config RTC_DRV_SD3078
-    tristate "ZXW Crystal SD3078"
+    tristate "ZXW Shenzhen whwave SD3078"
     help
-      If you say yes here you get support for the ZXW Crystal
+      If you say yes here you get support for the ZXW Shenzhen whwave
       SD3078 RTC chips.
 
       This driver can also be built as a module. If so, the module
-- 
GitLab


From fd35759ce32b60d3eb52436894bab996dbf8cffa Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Wed, 20 Mar 2019 08:48:23 +1000
Subject: [PATCH 0097/1861] HID: logitech: Handle 0 scroll events for the m560
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hidpp_scroll_counter_handle_scroll() doesn't expect a 0-value scroll event, it
gets interpreted as a negative scroll direction event. This can cause scroll
direction resets and thus broken scrolling.

Fixes: 4435ff2f09a2fc ("HID: logitech: Enable high-resolution scrolling on Logitech mice")
Cc: stable@vger.kernel.org # v5.0
Reported-and-tested-by: Aimo Metsälä <aimetsal@outlook.com>
Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-logitech-hidpp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 0a243247b231a..199cc256e9d9d 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -2614,8 +2614,9 @@ static int m560_raw_event(struct hid_device *hdev, u8 *data, int size)
 		input_report_rel(mydata->input, REL_Y, v);
 
 		v = hid_snto32(data[6], 8);
-		hidpp_scroll_counter_handle_scroll(
-				&hidpp->vertical_wheel_counter, v);
+		if (v != 0)
+			hidpp_scroll_counter_handle_scroll(
+					&hidpp->vertical_wheel_counter, v);
 
 		input_sync(mydata->input);
 	}
-- 
GitLab


From bfc01ddff2b0c33de21af436324a669e95ac7e78 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 20 Mar 2019 17:54:44 +0100
Subject: [PATCH 0098/1861] Revert "net: xfrm: Add '_rcu' tag for rcu protected
 pointer in netns_xfrm"

This reverts commit f10e0010fae8174dc20bdc872bcaa85baa925cb7.

This commit was just wrong. It caused a lot of
syzbot warnings, so just revert it.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h |  2 +-
 net/xfrm/xfrm_user.c     | 30 +++++++-----------------------
 2 files changed, 8 insertions(+), 24 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index d2a36fb9f92a4..59f45b1e9dac0 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -57,7 +57,7 @@ struct netns_xfrm {
 	struct list_head	inexact_bins;
 
 
-	struct sock		__rcu *nlsk;
+	struct sock		*nlsk;
 	struct sock		*nlsk_stash;
 
 	u32			sysctl_aevent_etime;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 9445898323438..8d4d52fd457b2 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1071,22 +1071,6 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
 	return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
 }
 
-/* A similar wrapper like xfrm_nlmsg_multicast checking that nlsk is still
- * available.
- */
-static inline int xfrm_nlmsg_unicast(struct net *net, struct sk_buff *skb,
-				     u32 pid)
-{
-	struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
-
-	if (!nlsk) {
-		kfree_skb(skb);
-		return -EPIPE;
-	}
-
-	return nlmsg_unicast(nlsk, skb, pid);
-}
-
 static inline unsigned int xfrm_spdinfo_msgsize(void)
 {
 	return NLMSG_ALIGN(4)
@@ -1211,7 +1195,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	err = build_spdinfo(r_skb, net, sportid, seq, *flags);
 	BUG_ON(err < 0);
 
-	return xfrm_nlmsg_unicast(net, r_skb, sportid);
+	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
 }
 
 static inline unsigned int xfrm_sadinfo_msgsize(void)
@@ -1270,7 +1254,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	err = build_sadinfo(r_skb, net, sportid, seq, *flags);
 	BUG_ON(err < 0);
 
-	return xfrm_nlmsg_unicast(net, r_skb, sportid);
+	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
 }
 
 static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1290,7 +1274,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (IS_ERR(resp_skb)) {
 		err = PTR_ERR(resp_skb);
 	} else {
-		err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid);
+		err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
 	}
 	xfrm_state_put(x);
 out_noput:
@@ -1353,7 +1337,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto out;
 	}
 
-	err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid);
+	err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
 
 out:
 	xfrm_state_put(x);
@@ -1919,8 +1903,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		if (IS_ERR(resp_skb)) {
 			err = PTR_ERR(resp_skb);
 		} else {
-			err = xfrm_nlmsg_unicast(net, resp_skb,
-						 NETLINK_CB(skb).portid);
+			err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
+					    NETLINK_CB(skb).portid);
 		}
 	} else {
 		xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
@@ -2078,7 +2062,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	err = build_aevent(r_skb, x, &c);
 	BUG_ON(err < 0);
 
-	err = xfrm_nlmsg_unicast(net, r_skb, NETLINK_CB(skb).portid);
+	err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
 	spin_unlock_bh(&x->lock);
 	xfrm_state_put(x);
 	return err;
-- 
GitLab


From 19441e35a43b616ea6afad91ed0d9e77268d8f6a Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Mon, 4 Mar 2019 15:52:43 +0100
Subject: [PATCH 0099/1861] ASoC: stm32: dfsdm: manage multiple prepare

The DFSDM must be stopped when a new setting is applied.
restart systematically DFSDM on multiple prepare calls,
to apply changes.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_adfsdm.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c
index 47901983a6ff8..d77f0421e4f01 100644
--- a/sound/soc/stm/stm32_adfsdm.c
+++ b/sound/soc/stm/stm32_adfsdm.c
@@ -9,6 +9,7 @@
 
 #include <linux/clk.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -37,6 +38,8 @@ struct stm32_adfsdm_priv {
 	/* PCM buffer */
 	unsigned char *pcm_buff;
 	unsigned int pos;
+
+	struct mutex lock; /* protect against race condition on iio state */
 };
 
 static const struct snd_pcm_hardware stm32_adfsdm_pcm_hw = {
@@ -62,10 +65,12 @@ static void stm32_adfsdm_shutdown(struct snd_pcm_substream *substream,
 {
 	struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai);
 
+	mutex_lock(&priv->lock);
 	if (priv->iio_active) {
 		iio_channel_stop_all_cb(priv->iio_cb);
 		priv->iio_active = false;
 	}
+	mutex_unlock(&priv->lock);
 }
 
 static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream,
@@ -74,13 +79,19 @@ static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream,
 	struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai);
 	int ret;
 
+	mutex_lock(&priv->lock);
+	if (priv->iio_active) {
+		iio_channel_stop_all_cb(priv->iio_cb);
+		priv->iio_active = false;
+	}
+
 	ret = iio_write_channel_attribute(priv->iio_ch,
 					  substream->runtime->rate, 0,
 					  IIO_CHAN_INFO_SAMP_FREQ);
 	if (ret < 0) {
 		dev_err(dai->dev, "%s: Failed to set %d sampling rate\n",
 			__func__, substream->runtime->rate);
-		return ret;
+		goto out;
 	}
 
 	if (!priv->iio_active) {
@@ -92,6 +103,9 @@ static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream,
 				__func__, ret);
 	}
 
+out:
+	mutex_unlock(&priv->lock);
+
 	return ret;
 }
 
@@ -299,6 +313,7 @@ static int stm32_adfsdm_probe(struct platform_device *pdev)
 
 	priv->dev = &pdev->dev;
 	priv->dai_drv = stm32_adfsdm_dai;
+	mutex_init(&priv->lock);
 
 	dev_set_drvdata(&pdev->dev, priv);
 
-- 
GitLab


From c47255b61129857b74b0d86eaf59335348be05e0 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Mon, 4 Mar 2019 15:52:44 +0100
Subject: [PATCH 0100/1861] ASoC: stm32: dfsdm: fix debugfs warnings on entry
 creation

Register platform component with a prefix, to avoid warnings
on debugfs entries creation, due to component name
redundancy.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_adfsdm.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c
index d77f0421e4f01..78bed97347136 100644
--- a/sound/soc/stm/stm32_adfsdm.c
+++ b/sound/soc/stm/stm32_adfsdm.c
@@ -305,6 +305,7 @@ MODULE_DEVICE_TABLE(of, stm32_adfsdm_of_match);
 static int stm32_adfsdm_probe(struct platform_device *pdev)
 {
 	struct stm32_adfsdm_priv *priv;
+	struct snd_soc_component *component;
 	int ret;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
@@ -332,9 +333,15 @@ static int stm32_adfsdm_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->iio_cb))
 		return PTR_ERR(priv->iio_cb);
 
-	ret = devm_snd_soc_register_component(&pdev->dev,
-					      &stm32_adfsdm_soc_platform,
-					      NULL, 0);
+	component = devm_kzalloc(&pdev->dev, sizeof(*component), GFP_KERNEL);
+	if (!component)
+		return -ENOMEM;
+#ifdef CONFIG_DEBUG_FS
+	component->debugfs_prefix = "pcm";
+#endif
+
+	ret = snd_soc_add_component(&pdev->dev, component,
+				    &stm32_adfsdm_soc_platform, NULL, 0);
 	if (ret < 0)
 		dev_err(&pdev->dev, "%s: Failed to register PCM platform\n",
 			__func__);
@@ -342,12 +349,20 @@ static int stm32_adfsdm_probe(struct platform_device *pdev)
 	return ret;
 }
 
+static int stm32_adfsdm_remove(struct platform_device *pdev)
+{
+	snd_soc_unregister_component(&pdev->dev);
+
+	return 0;
+}
+
 static struct platform_driver stm32_adfsdm_driver = {
 	.driver = {
 		   .name = STM32_ADFSDM_DRV_NAME,
 		   .of_match_table = stm32_adfsdm_of_match,
 		   },
 	.probe = stm32_adfsdm_probe,
+	.remove = stm32_adfsdm_remove,
 };
 
 module_platform_driver(stm32_adfsdm_driver);
-- 
GitLab


From df2f677dee3ce4afda7ee561dd4f321c40320afd Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 15 Feb 2019 21:58:23 -0800
Subject: [PATCH 0101/1861] tools/power turbostat: Restore ability to execute
 in topology-order

turbostat executes on CPUs in "topology order".
This is an optimization for measuring profoundly idle systems --
as the closest hardware is woken next...

Fix a typo that was added with the sub-die-node support,
that broke topology ordering on multi-node systems.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9327c0ddc3a59..78d88b7d4e980 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -314,9 +314,8 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg
 	int retval, pkg_no, core_no, thread_no, node_no;
 
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-		for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
-			for (node_no = 0; node_no < topo.nodes_per_pkg;
-			     node_no++) {
+		for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
+			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
 				for (thread_no = 0; thread_no <
 					topo.threads_per_core; ++thread_no) {
 					struct thread_data *t;
-- 
GitLab


From 562855eeb1136009bc9d597116ac5829bf82dd06 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 19 Mar 2019 17:52:32 -0400
Subject: [PATCH 0102/1861] tools/power turbostat: Cleanup CC3-skip code

no functional change

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 78d88b7d4e980..861cb795679b7 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -666,7 +666,7 @@ void print_header(char *delim)
 
 	if (DO_BIC(BIC_CPU_c1))
 		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
+	if (DO_BIC(BIC_CPU_c3))
 		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
@@ -1002,7 +1002,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
+	if (DO_BIC(BIC_CPU_c3))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
 	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
@@ -1817,7 +1817,7 @@ retry:
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
+	if (DO_BIC(BIC_CPU_c3)) {
 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
 			return -6;
 	}
@@ -4703,6 +4703,9 @@ void process_cpuid()
 	do_knl_cstates  = is_knl(family, model);
 	do_cnl_cstates = is_cnl(family, model);
 
+	if (do_slm_cstates || do_knl_cstates || do_cnl_cstates)
+		BIC_NOT_PRESENT(BIC_CPU_c3);
+
 	if (!quiet)
 		decode_misc_pwr_mgmt_msr();
 
-- 
GitLab


From 31a1f15cea5e56fe8151ddf01278cb60c325626b Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 19 Mar 2019 17:55:06 -0400
Subject: [PATCH 0103/1861] tools/power turbostat: Cleanup CNL-specific code

no functional change.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 861cb795679b7..f2384ee98cdc8 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -63,7 +63,6 @@ unsigned int dump_only;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
 unsigned int do_slm_cstates;
-unsigned int do_cnl_cstates;
 unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
@@ -4701,9 +4700,8 @@ void process_cpuid()
 	}
 	do_slm_cstates = is_slm(family, model);
 	do_knl_cstates  = is_knl(family, model);
-	do_cnl_cstates = is_cnl(family, model);
 
-	if (do_slm_cstates || do_knl_cstates || do_cnl_cstates)
+	if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
 		BIC_NOT_PRESENT(BIC_CPU_c3);
 
 	if (!quiet)
-- 
GitLab


From 937807d355a375393557674e3233662a7131c46b Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 19 Mar 2019 19:09:07 -0400
Subject: [PATCH 0104/1861] tools/power turbostat: Add Icelake support

From a turbostat point of view, Iceland is like Cannonlake.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index f2384ee98cdc8..4f81b52aef846 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4449,6 +4449,9 @@ unsigned int intel_model_duplicates(unsigned int model)
 	case INTEL_FAM6_KABYLAKE_MOBILE:
 	case INTEL_FAM6_KABYLAKE_DESKTOP:
 		return INTEL_FAM6_SKYLAKE_MOBILE;
+
+	case INTEL_FAM6_ICELAKE_MOBILE:
+		return INTEL_FAM6_CANNONLAKE_MOBILE;
 	}
 	return model;
 }
-- 
GitLab


From 6de68fe15a0fcd0e887d73bd7a549e4dc6446481 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 14 Feb 2019 19:17:40 -0800
Subject: [PATCH 0105/1861] tools/power turbostat: Add Die column

If the system has more than one software visible die per package,
print a Die column.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 42 +++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4f81b52aef846..7d6c14ecf35a8 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -272,6 +272,7 @@ struct system_summary {
 
 struct cpu_topology {
 	int physical_package_id;
+	int die_id;
 	int logical_cpu_id;
 	int physical_node_id;
 	int logical_node_id;	/* 0-based count within the package */
@@ -282,6 +283,7 @@ struct cpu_topology {
 
 struct topo_params {
 	int num_packages;
+	int num_die;
 	int num_cpus;
 	int num_cores;
 	int max_cpu_num;
@@ -440,6 +442,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "CPU" },
 	{ 0x0, "APIC" },
 	{ 0x0, "X2APIC" },
+	{ 0x0, "Die" },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -493,6 +496,7 @@ struct msr_counter bic[] = {
 #define	BIC_CPU		(1ULL << 47)
 #define	BIC_APIC	(1ULL << 48)
 #define	BIC_X2APIC	(1ULL << 49)
+#define	BIC_Die		(1ULL << 50)
 
 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
@@ -619,6 +623,8 @@ void print_header(char *delim)
 		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Package))
 		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_Die))
+		outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Node))
 		outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Core))
@@ -902,6 +908,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (t == &average.threads) {
 		if (DO_BIC(BIC_Package))
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+		if (DO_BIC(BIC_Die))
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_Node))
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_Core))
@@ -919,6 +927,12 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		}
+		if (DO_BIC(BIC_Die)) {
+			if (c)
+				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
+			else
+				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+		}
 		if (DO_BIC(BIC_Node)) {
 			if (t)
 				outp += sprintf(outp, "%s%d",
@@ -2454,6 +2468,8 @@ void free_all_buffers(void)
 
 /*
  * Parse a file containing a single int.
+ * Return 0 if file can not be opened
+ * Exit if file can be opened, but can not be parsed
  */
 int parse_int_file(const char *fmt, ...)
 {
@@ -2465,7 +2481,9 @@ int parse_int_file(const char *fmt, ...)
 	va_start(args, fmt);
 	vsnprintf(path, sizeof(path), fmt, args);
 	va_end(args);
-	filep = fopen_or_die(path, "r");
+	filep = fopen(path, "r");
+	if (!filep)
+		return 0;
 	if (fscanf(filep, "%d", &value) != 1)
 		err(1, "%s: failed to parse number from file", path);
 	fclose(filep);
@@ -2486,6 +2504,11 @@ int get_physical_package_id(int cpu)
 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
 }
 
+int get_die_id(int cpu)
+{
+	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
+}
+
 int get_core_id(int cpu)
 {
 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
@@ -4772,6 +4795,7 @@ void topology_probe()
 	int i;
 	int max_core_id = 0;
 	int max_package_id = 0;
+	int max_die_id = 0;
 	int max_siblings = 0;
 
 	/* Initialize num_cpus, max_cpu_num */
@@ -4838,6 +4862,11 @@ void topology_probe()
 		if (cpus[i].physical_package_id > max_package_id)
 			max_package_id = cpus[i].physical_package_id;
 
+		/* get die information */
+		cpus[i].die_id = get_die_id(i);
+		if (cpus[i].die_id > max_die_id)
+			max_die_id = cpus[i].die_id;
+
 		/* get numa node information */
 		cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
 		if (cpus[i].physical_node_id > topo.max_node_num)
@@ -4863,6 +4892,13 @@ void topology_probe()
 	if (!summary_only && topo.cores_per_node > 1)
 		BIC_PRESENT(BIC_Core);
 
+	topo.num_die = max_die_id + 1;
+	if (debug > 1)
+		fprintf(outf, "max_die_id %d, sizing for %d die\n",
+				max_die_id, topo.num_die);
+	if (!summary_only && topo.num_die > 1)
+		BIC_PRESENT(BIC_Die);
+
 	topo.num_packages = max_package_id + 1;
 	if (debug > 1)
 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
@@ -4887,8 +4923,8 @@ void topology_probe()
 		if (cpu_is_not_present(i))
 			continue;
 		fprintf(outf,
-			"cpu %d pkg %d node %d lnode %d core %d thread %d\n",
-			i, cpus[i].physical_package_id,
+			"cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
+			i, cpus[i].physical_package_id, cpus[i].die_id,
 			cpus[i].physical_node_id,
 			cpus[i].logical_node_id,
 			cpus[i].physical_core_id,
-- 
GitLab


From 0a42d235e50d677775056324d0762dd102a9ebb0 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Mon, 13 Aug 2018 08:45:01 -0400
Subject: [PATCH 0106/1861] tools/power turbostat: Do not display an error on
 systems without a cpufreq driver

Running without a cpufreq driver is a valid case so warnings output in
this case should not be to stderr.

Use outf instead of stderr for these warnings.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7d6c14ecf35a8..0716abdb1bd9b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3465,7 +3465,7 @@ dump_sysfs_pstate_config(void)
 			base_cpu);
 	input = fopen(path, "r");
 	if (input == NULL) {
-		fprintf(stderr, "NSFOD %s\n", path);
+		fprintf(outf, "NSFOD %s\n", path);
 		return;
 	}
 	fgets(driver_buf, sizeof(driver_buf), input);
@@ -3475,7 +3475,7 @@ dump_sysfs_pstate_config(void)
 			base_cpu);
 	input = fopen(path, "r");
 	if (input == NULL) {
-		fprintf(stderr, "NSFOD %s\n", path);
+		fprintf(outf, "NSFOD %s\n", path);
 		return;
 	}
 	fgets(governor_buf, sizeof(governor_buf), input);
-- 
GitLab


From 9392bd98bba760be96ee67f51cb040dcf7aa190e Mon Sep 17 00:00:00 2001
From: Calvin Walton <calvin.walton@kepstin.ca>
Date: Fri, 17 Aug 2018 12:34:41 -0400
Subject: [PATCH 0107/1861] tools/power turbostat: Add support for AMD Fam 17h
 (Zen) RAPL

Based on the Open-Source Register Reference for AMD Family 17h
Processors Models 00h-2Fh:
https://support.amd.com/TechDocs/56255_OSRR.pdf

These processors report RAPL support in bit 14 of CPUID 0x80000007 EDX,
and the following MSRs are present:
0xc0010299 (RAPL_PWR_UNIT), like Intel's RAPL_POWER_UNIT
0xc001029a (CORE_ENERGY_STAT), kind of like Intel's PP0_ENERGY_STATUS
0xc001029b (PKG_ENERGY_STAT), like Intel's PKG_ENERGY_STATUS

A notable difference from the Intel implementation is that AMD reports
the "Cores" energy usage separately for each core, rather than a
per-package total. The code has been adjusted to handle either case in a
generic way.

I haven't yet enabled collection of package power, due to being unable
to test it on multi-node systems (TR, EPYC).

Signed-off-by: Calvin Walton <calvin.walton@kepstin.ca>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 159 +++++++++++++++++++++-----
 1 file changed, 130 insertions(+), 29 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0716abdb1bd9b..538878b9deb82 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -44,6 +44,7 @@
 #include <cpuid.h>
 #include <linux/capability.h>
 #include <errno.h>
+#include <math.h>
 
 char *proc_stat = "/proc/stat";
 FILE *outf;
@@ -140,9 +141,21 @@ unsigned int first_counter_read = 1;
 
 #define RAPL_CORES_ENERGY_STATUS	(1 << 9)
 					/* 0x639 MSR_PP0_ENERGY_STATUS */
+#define RAPL_PER_CORE_ENERGY	(1 << 10)
+					/* Indicates cores energy collection is per-core,
+					 * not per-package. */
+#define RAPL_AMD_F17H		(1 << 11)
+					/* 0xc0010299 MSR_RAPL_PWR_UNIT */
+					/* 0xc001029a MSR_CORE_ENERGY_STAT */
+					/* 0xc001029b MSR_PKG_ENERGY_STAT */
 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
 #define	TJMAX_DEFAULT	100
 
+/* MSRs that are not yet in the kernel-provided header. */
+#define MSR_RAPL_PWR_UNIT	0xc0010299
+#define MSR_CORE_ENERGY_STAT	0xc001029a
+#define MSR_PKG_ENERGY_STAT	0xc001029b
+
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
 /*
@@ -186,6 +199,7 @@ struct core_data {
 	unsigned long long c7;
 	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
 	unsigned int core_temp_c;
+	unsigned int core_energy;	/* MSR_CORE_ENERGY_STAT */
 	unsigned int core_id;
 	unsigned long long counter[MAX_ADDED_COUNTERS];
 } *core_even, *core_odd;
@@ -684,6 +698,14 @@ void print_header(char *delim)
 	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
 
+	if (do_rapl && !rapl_joules) {
+		if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
+	} else if (do_rapl && rapl_joules) {
+		if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
+	}
+
 	for (mp = sys.cp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
@@ -738,7 +760,7 @@ void print_header(char *delim)
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
 			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
-		if (DO_BIC(BIC_CorWatt))
+		if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_GFXWatt))
 			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
@@ -751,7 +773,7 @@ void print_header(char *delim)
 	} else if (do_rapl && rapl_joules) {
 		if (DO_BIC(BIC_Pkg_J))
 			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
-		if (DO_BIC(BIC_Cor_J))
+		if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_GFX_J))
 			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
@@ -812,6 +834,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "c6: %016llX\n", c->c6);
 		outp += sprintf(outp, "c7: %016llX\n", c->c7);
 		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
+		outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
 
 		for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
@@ -1045,6 +1068,20 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		}
 	}
 
+	/*
+	 * If measurement interval exceeds minimum RAPL Joule Counter range,
+	 * indicate that results are suspect by printing "**" in fraction place.
+	 */
+	if (interval_float < rapl_joule_counter_range)
+		fmt8 = "%s%.2f";
+	else
+		fmt8 = "%6.0f**";
+
+	if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+	if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+
 	/* print per-package data only for 1st core in package */
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		goto done;
@@ -1097,18 +1134,9 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_SYS_LPI))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
 
-	/*
- 	 * If measurement interval exceeds minimum RAPL Joule Counter range,
- 	 * indicate that results are suspect by printing "**" in fraction place.
- 	 */
-	if (interval_float < rapl_joule_counter_range)
-		fmt8 = "%s%.2f";
-	else
-		fmt8 = "%6.0f**";
-
 	if (DO_BIC(BIC_PkgWatt))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
-	if (DO_BIC(BIC_CorWatt))
+	if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
 	if (DO_BIC(BIC_GFXWatt))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
@@ -1116,7 +1144,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
 	if (DO_BIC(BIC_Pkg_J))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
-	if (DO_BIC(BIC_Cor_J))
+	if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
 	if (DO_BIC(BIC_GFX_J))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
@@ -1261,6 +1289,8 @@ delta_core(struct core_data *new, struct core_data *old)
 	old->core_temp_c = new->core_temp_c;
 	old->mc6_us = new->mc6_us - old->mc6_us;
 
+	DELTA_WRAP32(new->core_energy, old->core_energy);
+
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			old->counter[i] = new->counter[i];
@@ -1403,6 +1433,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	c->c7 = 0;
 	c->mc6_us = 0;
 	c->core_temp_c = 0;
+	c->core_energy = 0;
 
 	p->pkg_wtd_core_c0 = 0;
 	p->pkg_any_core_c0 = 0;
@@ -1485,6 +1516,8 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 
 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
 
+	average.cores.core_energy += c->core_energy;
+
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
@@ -1857,6 +1890,12 @@ retry:
 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
 	}
 
+	if (do_rapl & RAPL_AMD_F17H) {
+		if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
+			return -14;
+		c->core_energy = msr & 0xFFFFFFFF;
+	}
+
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (get_mp(cpu, mp, &c->counter[i]))
 			return -10;
@@ -3739,7 +3778,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
 
-double get_tdp(unsigned int model)
+double get_tdp_intel(unsigned int model)
 {
 	unsigned long long msr;
 
@@ -3756,6 +3795,16 @@ double get_tdp(unsigned int model)
 	}
 }
 
+double get_tdp_amd(unsigned int family)
+{
+	switch (family) {
+	case 0x17:
+	default:
+		/* This is the max stock TDP of HEDT/Server Fam17h chips */
+		return 250.0;
+	}
+}
+
 /*
  * rapl_dram_energy_units_probe()
  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
@@ -3775,21 +3824,12 @@ rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
 	}
 }
 
-
-/*
- * rapl_probe()
- *
- * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
- */
-void rapl_probe(unsigned int family, unsigned int model)
+void rapl_probe_intel(unsigned int family, unsigned int model)
 {
 	unsigned long long msr;
 	unsigned int time_unit;
 	double tdp;
 
-	if (!genuine_intel)
-		return;
-
 	if (family != 6)
 		return;
 
@@ -3913,13 +3953,66 @@ void rapl_probe(unsigned int family, unsigned int model)
 
 	rapl_time_units = 1.0 / (1 << (time_unit));
 
-	tdp = get_tdp(model);
+	tdp = get_tdp_intel(model);
 
 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
 	if (!quiet)
 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
+}
 
-	return;
+void rapl_probe_amd(unsigned int family, unsigned int model)
+{
+	unsigned long long msr;
+	unsigned int eax, ebx, ecx, edx;
+	unsigned int has_rapl = 0;
+	double tdp;
+
+	if (max_extended_level >= 0x80000007) {
+		__cpuid(0x80000007, eax, ebx, ecx, edx);
+		/* RAPL (Fam 17h) */
+		has_rapl = edx & (1 << 14);
+	}
+
+	if (!has_rapl)
+		return;
+
+	switch (family) {
+	case 0x17: /* Zen, Zen+ */
+		do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
+		if (rapl_joules)
+			BIC_PRESENT(BIC_Cor_J);
+		else
+			BIC_PRESENT(BIC_CorWatt);
+		break;
+	default:
+		return;
+	}
+
+	if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
+		return;
+
+	rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
+	rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
+	rapl_power_units = ldexp(1.0, -(msr & 0xf));
+
+	tdp = get_tdp_amd(model);
+
+	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+	if (!quiet)
+		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
+}
+
+/*
+ * rapl_probe()
+ *
+ * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
+ */
+void rapl_probe(unsigned int family, unsigned int model)
+{
+	if (genuine_intel)
+		rapl_probe_intel(family, model);
+	if (authentic_amd)
+		rapl_probe_amd(family, model);
 }
 
 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
@@ -4024,6 +4117,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
 	unsigned long long msr;
+	const char *msr_name;
 	int cpu;
 
 	if (!do_rapl)
@@ -4039,10 +4133,17 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return -1;
 	}
 
-	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
-		return -1;
+	if (do_rapl & RAPL_AMD_F17H) {
+		msr_name = "MSR_RAPL_PWR_UNIT";
+		if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
+			return -1;
+	} else {
+		msr_name = "MSR_RAPL_POWER_UNIT";
+		if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
+			return -1;
+	}
 
-	fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
+	fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
 		rapl_power_units, rapl_energy_units, rapl_time_units);
 
 	if (do_rapl & RAPL_PKG_POWER_INFO) {
-- 
GitLab


From 3316f99a9f1b68c578c57e76792bd19da1c7d423 Mon Sep 17 00:00:00 2001
From: Calvin Walton <calvin.walton@kepstin.ca>
Date: Fri, 17 Aug 2018 12:34:42 -0400
Subject: [PATCH 0108/1861] tools/power turbostat: Also read package power on
 AMD F17h (Zen)

The package power can also be read from an MSR. It's not clear exactly
what is included, and whether it's aggregated over all nodes or
reported separately.

It does look like this is reported separately per CCX (I get a single
value on the Ryzen R7 1700), but it might be reported separately per-
die (node?) on larger processors. If that's the case, it would have to
be recorded per node and aggregated for the socket.

Note that although Zen has these MSRs reporting power, it looks like
the actual RAPL configuration (power limits, configured TDP) is done
through PCI configuration space. I have not yet found any public
documentation for this.

Signed-off-by: Calvin Walton <calvin.walton@kepstin.ca>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 538878b9deb82..ee9aaeef662e7 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1985,6 +1985,11 @@ retry:
 			return -16;
 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
 	}
+	if (do_rapl & RAPL_AMD_F17H) {
+		if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
+			return -13;
+		p->energy_pkg = msr & 0xFFFFFFFF;
+	}
 	if (DO_BIC(BIC_PkgTmp)) {
 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
 			return -17;
@@ -3979,10 +3984,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
 	switch (family) {
 	case 0x17: /* Zen, Zen+ */
 		do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
-		if (rapl_joules)
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
 			BIC_PRESENT(BIC_Cor_J);
-		else
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
 			BIC_PRESENT(BIC_CorWatt);
+		}
 		break;
 	default:
 		return;
-- 
GitLab


From 8173c336989c1a12290cd023969df2775b2df34d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 20 Mar 2019 23:01:03 -0400
Subject: [PATCH 0109/1861] tools/power turbostat: Add checks for failure of
 fgets() and fscanf()

Most calls to fgets() and fscanf() are followed by error checks.
Add an exit-on-error in the remaining cases.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 28 +++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ee9aaeef662e7..9817abd215b9f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2643,7 +2643,8 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
 	filep = fopen_or_die(path, "r");
 	do {
 		offset -= BITMASK_SIZE;
-		fscanf(filep, "%lx%c", &map, &character);
+		if (fscanf(filep, "%lx%c", &map, &character) != 2)
+			err(1, "%s: failed to parse file", path);
 		for (shift = 0; shift < BITMASK_SIZE; shift++) {
 			if ((map >> shift) & 0x1) {
 				so = shift + offset;
@@ -3475,14 +3476,14 @@ dump_sysfs_cstate_config(void)
 		input = fopen(path, "r");
 		if (input == NULL)
 			continue;
-		fgets(name_buf, sizeof(name_buf), input);
+		if (!fgets(name_buf, sizeof(name_buf), input))
+			err(1, "%s: failed to read file", path);
 
 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
 		sp = strchr(name_buf, '-');
 		if (!sp)
 			sp = strchrnul(name_buf, '\n');
 		*sp = '\0';
-
 		fclose(input);
 
 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
@@ -3490,7 +3491,8 @@ dump_sysfs_cstate_config(void)
 		input = fopen(path, "r");
 		if (input == NULL)
 			continue;
-		fgets(desc, sizeof(desc), input);
+		if (!fgets(desc, sizeof(desc), input))
+			err(1, "%s: failed to read file", path);
 
 		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
 		fclose(input);
@@ -3512,7 +3514,8 @@ dump_sysfs_pstate_config(void)
 		fprintf(outf, "NSFOD %s\n", path);
 		return;
 	}
-	fgets(driver_buf, sizeof(driver_buf), input);
+	if (!fgets(driver_buf, sizeof(driver_buf), input))
+		err(1, "%s: failed to read file", path);
 	fclose(input);
 
 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
@@ -3522,7 +3525,8 @@ dump_sysfs_pstate_config(void)
 		fprintf(outf, "NSFOD %s\n", path);
 		return;
 	}
-	fgets(governor_buf, sizeof(governor_buf), input);
+	if (!fgets(governor_buf, sizeof(governor_buf), input))
+		err(1, "%s: failed to read file", path);
 	fclose(input);
 
 	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
@@ -3531,7 +3535,8 @@ dump_sysfs_pstate_config(void)
 	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
 	input = fopen(path, "r");
 	if (input != NULL) {
-		fscanf(input, "%d", &turbo);
+		if (fscanf(input, "%d", &turbo) != 1)
+			err(1, "%s: failed to parse number from file", path);
 		fprintf(outf, "cpufreq boost: %d\n", turbo);
 		fclose(input);
 	}
@@ -3539,7 +3544,8 @@ dump_sysfs_pstate_config(void)
 	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
 	input = fopen(path, "r");
 	if (input != NULL) {
-		fscanf(input, "%d", &turbo);
+		if (fscanf(input, "%d", &turbo) != 1)
+			err(1, "%s: failed to parse number from file", path);
 		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
 		fclose(input);
 	}
@@ -5464,7 +5470,8 @@ void probe_sysfs(void)
 		input = fopen(path, "r");
 		if (input == NULL)
 			continue;
-		fgets(name_buf, sizeof(name_buf), input);
+		if (!fgets(name_buf, sizeof(name_buf), input))
+			err(1, "%s: failed to read file", path);
 
 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
 		sp = strchr(name_buf, '-');
@@ -5491,7 +5498,8 @@ void probe_sysfs(void)
 		input = fopen(path, "r");
 		if (input == NULL)
 			continue;
-		fgets(name_buf, sizeof(name_buf), input);
+		if (!fgets(name_buf, sizeof(name_buf), input))
+			err(1, "%s: failed to read file", path);
 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
 		sp = strchr(name_buf, '-');
 		if (!sp)
-- 
GitLab


From 5ea7647b333f3580697edaaf2b17a2f6d29a82f1 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 25 Sep 2018 08:59:26 -0400
Subject: [PATCH 0110/1861] tools/power turbostat: Warn on bad ACPI LPIT data

On some systems /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
or /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
return a file error because of bad ACPI LPIT data from a misconfigured BIOS.
turbostat interprets this failure as a fatal error and outputs

	turbostat: CPU LPI: No data available

If the ACPI LPIT sysfs files return an error output a warning instead of
a fatal error, disable the ACPI LPIT evaluation code, and continue.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9817abd215b9f..442af78924021 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2921,8 +2921,11 @@ int snapshot_cpu_lpi_us(void)
 	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
 
 	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
-	if (retval != 1)
-		err(1, "CPU LPI");
+	if (retval != 1) {
+		fprintf(stderr, "Disabling Low Power Idle CPU output\n");
+		BIC_NOT_PRESENT(BIC_CPU_LPI);
+		return -1;
+	}
 
 	fclose(fp);
 
@@ -2944,9 +2947,11 @@ int snapshot_sys_lpi_us(void)
 	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
 
 	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
-	if (retval != 1)
-		err(1, "SYS LPI");
-
+	if (retval != 1) {
+		fprintf(stderr, "Disabling Low Power Idle System output\n");
+		BIC_NOT_PRESENT(BIC_SYS_LPI);
+		return -1;
+	}
 	fclose(fp);
 
 	return 0;
-- 
GitLab


From 0f71d089c912769251c992b8f7dcd508a472fe10 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 20 Mar 2019 23:23:25 -0400
Subject: [PATCH 0111/1861] tools/power turbostat: update version number

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 442af78924021..8d176b10daecc 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5278,7 +5278,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 18.07.27"
+	fprintf(outf, "turbostat version 19.03.20"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-- 
GitLab


From 5483844c3fc18474de29f5d6733003526e0a9f78 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Tue, 19 Mar 2019 15:39:20 +0000
Subject: [PATCH 0112/1861] vti4: ipip tunnel deregistration fixes.

If tunnel registration failed during module initialization, the module
would fail to deregister the IPPROTO_COMP protocol and would attempt to
deregister the tunnel.

The tunnel was not deregistered during module-exit.

Fixes: dd9ee3444014e ("vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel")
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ip_vti.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 68a21bf75dd0b..b6235ca09fa53 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -659,9 +659,9 @@ static int __init vti_init(void)
 	return err;
 
 rtnl_link_failed:
-	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
-xfrm_tunnel_failed:
 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel_failed:
+	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
 xfrm_proto_comp_failed:
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 xfrm_proto_ah_failed:
@@ -676,6 +676,7 @@ pernet_dev_failed:
 static void __exit vti_fini(void)
 {
 	rtnl_link_unregister(&vti_link_ops);
+	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
-- 
GitLab


From 01ce31c57b3f07c91c9d45bbaf126124cce83a5d Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Tue, 19 Mar 2019 15:39:21 +0000
Subject: [PATCH 0113/1861] vti4: removed duplicate log message.

Removed info log-message if ipip tunnel registration fails during
module-initialization: it adds nothing to the error message that is
written on all failures.

Fixes: dd9ee3444014e ("vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel")
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ip_vti.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index b6235ca09fa53..35d8346742e2c 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -646,10 +646,8 @@ static int __init vti_init(void)
 
 	msg = "ipip tunnel";
 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
-	if (err < 0) {
-		pr_info("%s: cant't register tunnel\n",__func__);
+	if (err < 0)
 		goto xfrm_tunnel_failed;
-	}
 
 	msg = "netlink interface";
 	err = rtnl_link_register(&vti_link_ops);
-- 
GitLab


From f0f2338a9cfaf71db895fa989ea7234e8a9b471d Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Wed, 20 Mar 2019 22:41:56 +0100
Subject: [PATCH 0114/1861] ASoC: cs4270: Set auto-increment bit for register
 writes

The CS4270 does not by default increment the register address on
consecutive writes. During normal operation it doesn't matter as all
register accesses are done individually. At resume time after suspend,
however, the regcache code gathers the biggest possible block of
registers to sync and sends them one on one go.

To fix this, set the INCR bit in all cases.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs4270.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
index 33d74f163bd75..793a14d586672 100644
--- a/sound/soc/codecs/cs4270.c
+++ b/sound/soc/codecs/cs4270.c
@@ -642,6 +642,7 @@ static const struct regmap_config cs4270_regmap = {
 	.reg_defaults =		cs4270_reg_defaults,
 	.num_reg_defaults =	ARRAY_SIZE(cs4270_reg_defaults),
 	.cache_type =		REGCACHE_RBTREE,
+	.write_flag_mask =	CS4270_I2C_INCR,
 
 	.readable_reg =		cs4270_reg_is_readable,
 	.volatile_reg =		cs4270_reg_is_volatile,
-- 
GitLab


From 53f67a78663811968f426d480bc55887d787bd94 Mon Sep 17 00:00:00 2001
From: "S.j. Wang" <shengjiu.wang@nxp.com>
Date: Sat, 2 Mar 2019 05:52:19 +0000
Subject: [PATCH 0115/1861] ASoC: fsl_asrc: add constraint for the asrc of
 older version

There is a constraint for the channel number setting on the
asrc of older version (e.g. imx35), the channel number should
be even, odd number isn't valid.

So add this constraint when the asrc of older version is used.

Acked-by: Nicolin Chen <nicoleotsuka@gmail.com>
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/fsl_asrc.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 528e8b1084229..0b937924d2e47 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -445,6 +445,19 @@ struct dma_chan *fsl_asrc_get_dma_channel(struct fsl_asrc_pair *pair, bool dir)
 }
 EXPORT_SYMBOL_GPL(fsl_asrc_get_dma_channel);
 
+static int fsl_asrc_dai_startup(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
+{
+	struct fsl_asrc *asrc_priv = snd_soc_dai_get_drvdata(dai);
+
+	/* Odd channel number is not valid for older ASRC (channel_bits==3) */
+	if (asrc_priv->channel_bits == 3)
+		snd_pcm_hw_constraint_step(substream->runtime, 0,
+					   SNDRV_PCM_HW_PARAM_CHANNELS, 2);
+
+	return 0;
+}
+
 static int fsl_asrc_dai_hw_params(struct snd_pcm_substream *substream,
 				  struct snd_pcm_hw_params *params,
 				  struct snd_soc_dai *dai)
@@ -539,6 +552,7 @@ static int fsl_asrc_dai_trigger(struct snd_pcm_substream *substream, int cmd,
 }
 
 static const struct snd_soc_dai_ops fsl_asrc_dai_ops = {
+	.startup      = fsl_asrc_dai_startup,
 	.hw_params    = fsl_asrc_dai_hw_params,
 	.hw_free      = fsl_asrc_dai_hw_free,
 	.trigger      = fsl_asrc_dai_trigger,
-- 
GitLab


From 0ff4e8c61b794a4bf6c854ab071a1abaaa80f358 Mon Sep 17 00:00:00 2001
From: "S.j. Wang" <shengjiu.wang@nxp.com>
Date: Wed, 27 Feb 2019 06:31:12 +0000
Subject: [PATCH 0116/1861] ASoC: fsl_esai: fix channel swap issue when stream
 starts

There is very low possibility ( < 0.1% ) that channel swap happened
in beginning when multi output/input pin is enabled. The issue is
that hardware can't send data to correct pin in the beginning with
the normal enable flow.

This is hardware issue, but there is no errata, the workaround flow
is that: Each time playback/recording, firstly clear the xSMA/xSMB,
then enable TE/RE, then enable xSMB and xSMA (xSMB must be enabled
before xSMA). Which is to use the xSMA as the trigger start register,
previously the xCR_TE or xCR_RE is the bit for starting.

Fixes commit 43d24e76b698 ("ASoC: fsl_esai: Add ESAI CPU DAI driver")
Cc: <stable@vger.kernel.org>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Acked-by: Nicolin Chen <nicoleotsuka@gmail.com>
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/fsl_esai.c | 47 +++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c
index afe67c865330e..3623aa9a6f2ea 100644
--- a/sound/soc/fsl/fsl_esai.c
+++ b/sound/soc/fsl/fsl_esai.c
@@ -54,6 +54,8 @@ struct fsl_esai {
 	u32 fifo_depth;
 	u32 slot_width;
 	u32 slots;
+	u32 tx_mask;
+	u32 rx_mask;
 	u32 hck_rate[2];
 	u32 sck_rate[2];
 	bool hck_dir[2];
@@ -361,21 +363,13 @@ static int fsl_esai_set_dai_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask,
 	regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR,
 			   ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots));
 
-	regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMA,
-			   ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(tx_mask));
-	regmap_update_bits(esai_priv->regmap, REG_ESAI_TSMB,
-			   ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(tx_mask));
-
 	regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR,
 			   ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(slots));
 
-	regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMA,
-			   ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(rx_mask));
-	regmap_update_bits(esai_priv->regmap, REG_ESAI_RSMB,
-			   ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(rx_mask));
-
 	esai_priv->slot_width = slot_width;
 	esai_priv->slots = slots;
+	esai_priv->tx_mask = tx_mask;
+	esai_priv->rx_mask = rx_mask;
 
 	return 0;
 }
@@ -596,6 +590,7 @@ static int fsl_esai_trigger(struct snd_pcm_substream *substream, int cmd,
 	bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
 	u8 i, channels = substream->runtime->channels;
 	u32 pins = DIV_ROUND_UP(channels, esai_priv->slots);
+	u32 mask;
 
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
@@ -608,15 +603,38 @@ static int fsl_esai_trigger(struct snd_pcm_substream *substream, int cmd,
 		for (i = 0; tx && i < channels; i++)
 			regmap_write(esai_priv->regmap, REG_ESAI_ETDR, 0x0);
 
+		/*
+		 * When set the TE/RE in the end of enablement flow, there
+		 * will be channel swap issue for multi data line case.
+		 * In order to workaround this issue, we switch the bit
+		 * enablement sequence to below sequence
+		 * 1) clear the xSMB & xSMA: which is done in probe and
+		 *                           stop state.
+		 * 2) set TE/RE
+		 * 3) set xSMB
+		 * 4) set xSMA:  xSMA is the last one in this flow, which
+		 *               will trigger esai to start.
+		 */
 		regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx),
 				   tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK,
 				   tx ? ESAI_xCR_TE(pins) : ESAI_xCR_RE(pins));
+		mask = tx ? esai_priv->tx_mask : esai_priv->rx_mask;
+
+		regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx),
+				   ESAI_xSMB_xS_MASK, ESAI_xSMB_xS(mask));
+		regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx),
+				   ESAI_xSMA_xS_MASK, ESAI_xSMA_xS(mask));
+
 		break;
 	case SNDRV_PCM_TRIGGER_SUSPEND:
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		regmap_update_bits(esai_priv->regmap, REG_ESAI_xCR(tx),
 				   tx ? ESAI_xCR_TE_MASK : ESAI_xCR_RE_MASK, 0);
+		regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMA(tx),
+				   ESAI_xSMA_xS_MASK, 0);
+		regmap_update_bits(esai_priv->regmap, REG_ESAI_xSMB(tx),
+				   ESAI_xSMB_xS_MASK, 0);
 
 		/* Disable and reset FIFO */
 		regmap_update_bits(esai_priv->regmap, REG_ESAI_xFCR(tx),
@@ -906,6 +924,15 @@ static int fsl_esai_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	esai_priv->tx_mask = 0xFFFFFFFF;
+	esai_priv->rx_mask = 0xFFFFFFFF;
+
+	/* Clear the TSMA, TSMB, RSMA, RSMB */
+	regmap_write(esai_priv->regmap, REG_ESAI_TSMA, 0);
+	regmap_write(esai_priv->regmap, REG_ESAI_TSMB, 0);
+	regmap_write(esai_priv->regmap, REG_ESAI_RSMA, 0);
+	regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0);
+
 	ret = devm_snd_soc_register_component(&pdev->dev, &fsl_esai_component,
 					      &fsl_esai_dai, 1);
 	if (ret) {
-- 
GitLab


From 8efd6365417a044db03009724ecc1a9521524913 Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Wed, 13 Mar 2019 17:28:37 -0500
Subject: [PATCH 0117/1861] arm64: dts: stratix10: add the sysmgr-syscon
 property from the gmac's

The gmac ethernet driver uses the "altr,sysmgr-syscon" property to
configure phy settings for the gmac controller.

Add the "altr,sysmgr-syscon" property to all gmac nodes.

This patch fixes:

[    0.917530] socfpga-dwmac ff800000.ethernet: No sysmgr-syscon node found
[    0.924209] socfpga-dwmac ff800000.ethernet: Unable to parse OF data

Cc: stable@vger.kernel.org
Reported-by: Ley Foon Tan <ley.foon.tan@intel.com>
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
---
 arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 7c649f6b14cb6..cd7c76e58b09a 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -162,6 +162,7 @@
 			rx-fifo-depth = <16384>;
 			snps,multicast-filter-bins = <256>;
 			iommus = <&smmu 1>;
+			altr,sysmgr-syscon = <&sysmgr 0x44 0>;
 			status = "disabled";
 		};
 
@@ -179,6 +180,7 @@
 			rx-fifo-depth = <16384>;
 			snps,multicast-filter-bins = <256>;
 			iommus = <&smmu 2>;
+			altr,sysmgr-syscon = <&sysmgr 0x48 0>;
 			status = "disabled";
 		};
 
@@ -196,6 +198,7 @@
 			rx-fifo-depth = <16384>;
 			snps,multicast-filter-bins = <256>;
 			iommus = <&smmu 3>;
+			altr,sysmgr-syscon = <&sysmgr 0x4c 0>;
 			status = "disabled";
 		};
 
-- 
GitLab


From 475c6bde7228e2d624153626941f290a314e4672 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 6 Feb 2019 13:54:17 +0100
Subject: [PATCH 0118/1861] iwlwifi: mvm: fix TX crypto on 22560+ devices

In the old days, we could transmit with HW crypto with an arbitrary
key by filling it into TX_CMD. This was broken first with the advent
of CCMP/GCMP-256 keys which don't fit there.

This was broken *again* with the newer TX_CMD format on 22560+,
where we simply cannot pass key material anymore. However, we forgot
to update all the cases when we get a key from mac80211 and don't
program it into the hardware but still return 0 for HW crypto on TX.

In AP mode with WEP, we tried to fix this by programming the keys
separately for each station later, but this ultimately turns out to
be buggy, for example now it leaks memory when we have more than one
WEP key.

Fix this by simply using only SW crypto for WEP in newer devices by
returning -EOPNOTSUPP instead of trying to program WEP keys later.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 64 +++++--------------
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h  |  1 -
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c  | 41 +-----------
 drivers/net/wireless/intel/iwlwifi/mvm/sta.h  |  7 +-
 4 files changed, 19 insertions(+), 94 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 3a92c09d46926..eeaeb84756665 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2714,9 +2714,6 @@ static void iwl_mvm_stop_ap_ibss(struct ieee80211_hw *hw,
 
 	iwl_mvm_mac_ctxt_remove(mvm, vif);
 
-	kfree(mvmvif->ap_wep_key);
-	mvmvif->ap_wep_key = NULL;
-
 	mutex_unlock(&mvm->mutex);
 }
 
@@ -3183,24 +3180,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 		ret = iwl_mvm_update_sta(mvm, vif, sta);
 	} else if (old_state == IEEE80211_STA_ASSOC &&
 		   new_state == IEEE80211_STA_AUTHORIZED) {
-		/* if wep is used, need to set the key for the station now */
-		if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) {
-			mvm_sta->wep_key =
-				kmemdup(mvmvif->ap_wep_key,
-					sizeof(*mvmvif->ap_wep_key) +
-					mvmvif->ap_wep_key->keylen,
-					GFP_KERNEL);
-			if (!mvm_sta->wep_key) {
-				ret = -ENOMEM;
-				goto out_unlock;
-			}
-
-			ret = iwl_mvm_set_sta_key(mvm, vif, sta,
-						  mvm_sta->wep_key,
-						  STA_KEY_IDX_INVALID);
-		} else {
-			ret = 0;
-		}
+		ret = 0;
 
 		/* we don't support TDLS during DCM */
 		if (iwl_mvm_phy_ctx_count(mvm) > 1)
@@ -3242,17 +3222,6 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 						   NL80211_TDLS_DISABLE_LINK);
 		}
 
-		/* Remove STA key if this is an AP using WEP */
-		if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) {
-			int rm_ret = iwl_mvm_remove_sta_key(mvm, vif, sta,
-							    mvm_sta->wep_key);
-
-			if (!ret)
-				ret = rm_ret;
-			kfree(mvm_sta->wep_key);
-			mvm_sta->wep_key = NULL;
-		}
-
 		if (unlikely(ret &&
 			     test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED,
 				      &mvm->status)))
@@ -3439,20 +3408,12 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
 		break;
 	case WLAN_CIPHER_SUITE_WEP40:
 	case WLAN_CIPHER_SUITE_WEP104:
-		if (vif->type == NL80211_IFTYPE_AP) {
-			struct iwl_mvm_vif *mvmvif =
-				iwl_mvm_vif_from_mac80211(vif);
-
-			mvmvif->ap_wep_key = kmemdup(key,
-						     sizeof(*key) + key->keylen,
-						     GFP_KERNEL);
-			if (!mvmvif->ap_wep_key)
-				return -ENOMEM;
-		}
-
-		if (vif->type != NL80211_IFTYPE_STATION)
-			return 0;
-		break;
+		if (vif->type == NL80211_IFTYPE_STATION)
+			break;
+		if (iwl_mvm_has_new_tx_api(mvm))
+			return -EOPNOTSUPP;
+		/* support HW crypto on TX */
+		return 0;
 	default:
 		/* currently FW supports only one optional cipher scheme */
 		if (hw->n_cipher_schemes &&
@@ -3540,12 +3501,17 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
 		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset);
 		if (ret) {
 			IWL_WARN(mvm, "set key failed\n");
+			key->hw_key_idx = STA_KEY_IDX_INVALID;
 			/*
 			 * can't add key for RX, but we don't need it
-			 * in the device for TX so still return 0
+			 * in the device for TX so still return 0,
+			 * unless we have new TX API where we cannot
+			 * put key material into the TX_CMD
 			 */
-			key->hw_key_idx = STA_KEY_IDX_INVALID;
-			ret = 0;
+			if (iwl_mvm_has_new_tx_api(mvm))
+				ret = -EOPNOTSUPP;
+			else
+				ret = 0;
 		}
 
 		break;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index bca6f6b536d97..a50dc53df0869 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -498,7 +498,6 @@ struct iwl_mvm_vif {
 	netdev_features_t features;
 
 	struct iwl_probe_resp_data __rcu *probe_resp_data;
-	struct ieee80211_key_conf *ap_wep_key;
 };
 
 static inline struct iwl_mvm_vif *
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 498c315291cfa..db26f0041a81f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -8,7 +8,7 @@
  * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -2333,21 +2333,6 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 		iwl_mvm_enable_txq(mvm, NULL, mvmvif->cab_queue, 0, &cfg,
 				   timeout);
 
-	if (mvmvif->ap_wep_key) {
-		u8 key_offset = iwl_mvm_set_fw_key_idx(mvm);
-
-		__set_bit(key_offset, mvm->fw_key_table);
-
-		if (key_offset == STA_KEY_IDX_INVALID)
-			return -ENOSPC;
-
-		ret = iwl_mvm_send_sta_key(mvm, mvmvif->mcast_sta.sta_id,
-					   mvmvif->ap_wep_key, true, 0, NULL, 0,
-					   key_offset, 0);
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
@@ -2419,28 +2404,6 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
 	iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0);
 
-	if (mvmvif->ap_wep_key) {
-		int i;
-
-		if (!__test_and_clear_bit(mvmvif->ap_wep_key->hw_key_idx,
-					  mvm->fw_key_table)) {
-			IWL_ERR(mvm, "offset %d not used in fw key table.\n",
-				mvmvif->ap_wep_key->hw_key_idx);
-			return -ENOENT;
-		}
-
-		/* track which key was deleted last */
-		for (i = 0; i < STA_KEY_MAX_NUM; i++) {
-			if (mvm->fw_key_deleted[i] < U8_MAX)
-				mvm->fw_key_deleted[i]++;
-		}
-		mvm->fw_key_deleted[mvmvif->ap_wep_key->hw_key_idx] = 0;
-		ret = __iwl_mvm_remove_sta_key(mvm, mvmvif->mcast_sta.sta_id,
-					       mvmvif->ap_wep_key, true);
-		if (ret)
-			return ret;
-	}
-
 	ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id);
 	if (ret)
 		IWL_WARN(mvm, "Failed sending remove station\n");
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 79700c7310a1a..b4d4071b865db 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -8,7 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -394,7 +394,6 @@ struct iwl_mvm_rxq_dup_data {
  *	the BA window. To be used for UAPSD only.
  * @ptk_pn: per-queue PTK PN data structures
  * @dup_data: per queue duplicate packet detection data
- * @wep_key: used in AP mode. Is a duplicate of the WEP key.
  * @deferred_traffic_tid_map: indication bitmap of deferred traffic per-TID
  * @tx_ant: the index of the antenna to use for data tx to this station. Only
  *	used during connection establishment (e.g. for the 4 way handshake
@@ -426,8 +425,6 @@ struct iwl_mvm_sta {
 	struct iwl_mvm_key_pn __rcu *ptk_pn[4];
 	struct iwl_mvm_rxq_dup_data *dup_data;
 
-	struct ieee80211_key_conf *wep_key;
-
 	u8 reserved_queue;
 
 	/* Temporary, until the new TLC will control the Tx protection */
-- 
GitLab


From d1967ce641772dd5e27b2a7a97fd625700cc589c Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Sun, 10 Feb 2019 10:39:59 +0200
Subject: [PATCH 0119/1861] iwlwifi: add sync_nmi to trans ops

Allow modules from outside pcie to call sync_nmi.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-trans.h     | 7 +++++++
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 2 +-
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c    | 5 +++--
 drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c  | 2 +-
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c       | 2 +-
 5 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index bbebbf3efd57d..fa750711d889f 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -618,6 +618,7 @@ struct iwl_trans_ops {
 	struct iwl_trans_dump_data *(*dump_data)(struct iwl_trans *trans,
 						 u32 dump_mask);
 	void (*debugfs_cleanup)(struct iwl_trans *trans);
+	void (*sync_nmi)(struct iwl_trans *trans);
 };
 
 /**
@@ -1245,6 +1246,12 @@ static inline void iwl_trans_fw_error(struct iwl_trans *trans)
 
 }
 
+static inline void iwl_trans_sync_nmi(struct iwl_trans *trans)
+{
+	if (trans->ops->sync_nmi)
+		trans->ops->sync_nmi(trans);
+}
+
 /*****************************************************
  * transport helper functions
  *****************************************************/
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index bf8b61a476c5b..59213164f35e3 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -1043,7 +1043,7 @@ static inline bool iwl_pcie_dbg_on(struct iwl_trans *trans)
 
 void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state);
 void iwl_trans_pcie_dump_regs(struct iwl_trans *trans);
-void iwl_trans_sync_nmi(struct iwl_trans *trans);
+void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans);
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index fe8269d023def..28d6ae8c9336f 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3318,7 +3318,8 @@ static void iwl_trans_pcie_resume(struct iwl_trans *trans)
 	.unref = iwl_trans_pcie_unref,					\
 	.dump_data = iwl_trans_pcie_dump_data,				\
 	.d3_suspend = iwl_trans_pcie_d3_suspend,			\
-	.d3_resume = iwl_trans_pcie_d3_resume
+	.d3_resume = iwl_trans_pcie_d3_resume,				\
+	.sync_nmi = iwl_trans_pcie_sync_nmi
 
 #ifdef CONFIG_PM_SLEEP
 #define IWL_TRANS_PM_OPS						\
@@ -3637,7 +3638,7 @@ out_no_pci:
 	return ERR_PTR(ret);
 }
 
-void iwl_trans_sync_nmi(struct iwl_trans *trans)
+void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans)
 {
 	unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 88530d9f4a54c..38d1103389871 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -965,7 +965,7 @@ static int iwl_pcie_gen2_send_hcmd_sync(struct iwl_trans *trans,
 			       cmd_str);
 		ret = -ETIMEDOUT;
 
-		iwl_trans_sync_nmi(trans);
+		iwl_trans_pcie_sync_nmi(trans);
 		goto cancel;
 	}
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 9fbd37d23e851..7be73e2c4681c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -1960,7 +1960,7 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans,
 			       iwl_get_cmd_string(trans, cmd->id));
 		ret = -ETIMEDOUT;
 
-		iwl_trans_sync_nmi(trans);
+		iwl_trans_pcie_sync_nmi(trans);
 		goto cancel;
 	}
 
-- 
GitLab


From 8625794e363946d153c5bc57ed30ab7616a9995a Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Sun, 10 Feb 2019 09:37:22 +0200
Subject: [PATCH 0120/1861] iwlwifi: dbg_ini: in case of region dump failure
 set memory to 0

In case the driver fails to dump a memory region, and this is the last
region, then partial region would be extracted.

Solve this by setting the data to zero in case of failure.
This will cause dump to be a list of consecutive successful memory
regions and trailing zeros with no partial memories extracted.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index f119c49cd39cd..da5d9d79c3728 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1614,6 +1614,7 @@ iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt,
 	if (!range) {
 		IWL_ERR(fwrt, "Failed to fill region header: id=%d, type=%d\n",
 			le32_to_cpu(reg->region_id), type);
+		memset(*data, 0, le32_to_cpu((*data)->len));
 		return;
 	}
 
@@ -1623,6 +1624,7 @@ iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt,
 		if (range_size < 0) {
 			IWL_ERR(fwrt, "Failed to dump region: id=%d, type=%d\n",
 				le32_to_cpu(reg->region_id), type);
+			memset(*data, 0, le32_to_cpu((*data)->len));
 			return;
 		}
 		range = range + range_size;
-- 
GitLab


From b05d57c9b647b77edf9ac4550c493cf77c3923c7 Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Tue, 12 Feb 2019 09:56:49 +0200
Subject: [PATCH 0121/1861] iwlwifi: dbg_ini: fix bad dump size calculation

The driver initiates the size value with the size of the struct and then
adds the size of the data and checks if the size is zero so size can not
be equal to zero.

Solve this by getting the data size, check that it is not equal to zero
and only then add the struct size.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Fixes: 7a14c23dcdee ("iwlwifi: dbg: dump data according to the new ini TLVs")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index da5d9d79c3728..b99d38b370143 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1809,12 +1809,12 @@ _iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt,
 
 	trigger = fwrt->dump.active_trigs[id].trig;
 
-	size = sizeof(*dump_file);
-	size += iwl_fw_ini_get_trigger_len(fwrt, trigger);
-
+	size = iwl_fw_ini_get_trigger_len(fwrt, trigger);
 	if (!size)
 		return NULL;
 
+	size += sizeof(*dump_file);
+
 	dump_file = vzalloc(size);
 	if (!dump_file)
 		return NULL;
-- 
GitLab


From 07d35b4270efd08e27cdad9a8d5fa31158cfc4e8 Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Sun, 10 Feb 2019 10:42:16 +0200
Subject: [PATCH 0122/1861] iwlwifi: use sync nmi in case of init flow failure

In case of alive interrupt timeout or any failure in the init flow
the driver generates FW nmi. The driver assumes that the nmi will
generate SW interrupt. This assumption does not hold and leads to faulty
behavior in the recovery flow.

Solve this by using sync nmi, this way, even if the driver does not
receive SW interrupt, it still starts the recovery flow.

Also remove the wait queue from iwl_fwrt_stop_device since the driver is
handling the SW interrupt synchronously.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c   | 26 +++----------------
 drivers/net/wireless/intel/iwlwifi/fw/init.c  |  1 -
 .../net/wireless/intel/iwlwifi/iwl-trans.h    |  7 -----
 3 files changed, 3 insertions(+), 31 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index b99d38b370143..d7380016f1c0d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1944,14 +1944,10 @@ int iwl_fw_dbg_error_collect(struct iwl_fw_runtime *fwrt,
 	iwl_dump_error_desc->len = 0;
 
 	ret = iwl_fw_dbg_collect_desc(fwrt, iwl_dump_error_desc, false, 0);
-	if (ret) {
+	if (ret)
 		kfree(iwl_dump_error_desc);
-	} else {
-		set_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status);
-
-		/* trigger nmi to halt the fw */
-		iwl_force_nmi(fwrt->trans);
-	}
+	else
+		iwl_trans_sync_nmi(fwrt->trans);
 
 	return ret;
 }
@@ -2491,22 +2487,6 @@ IWL_EXPORT_SYMBOL(iwl_fw_dbg_apply_point);
 
 void iwl_fwrt_stop_device(struct iwl_fw_runtime *fwrt)
 {
-	/* if the wait event timeout elapses instead of wake up then
-	 * the driver did not receive NMI interrupt and can not assume the FW
-	 * is halted
-	 */
-	int ret = wait_event_timeout(fwrt->trans->fw_halt_waitq,
-				     !test_bit(STATUS_FW_WAIT_DUMP,
-					       &fwrt->trans->status),
-				     msecs_to_jiffies(2000));
-	if (!ret) {
-		/* failed to receive NMI interrupt, assuming the FW is stuck */
-		set_bit(STATUS_FW_ERROR, &fwrt->trans->status);
-
-		clear_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status);
-	}
-
-	/* Assuming the op mode mutex is held at this point */
 	iwl_fw_dbg_collect_sync(fwrt);
 
 	iwl_trans_stop_device(fwrt->trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c
index 7adf4e4e841a9..12310e3d2fc5a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/init.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c
@@ -76,7 +76,6 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
 	fwrt->ops_ctx = ops_ctx;
 	INIT_DELAYED_WORK(&fwrt->dump.wk, iwl_fw_error_dump_wk);
 	iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir);
-	init_waitqueue_head(&fwrt->trans->fw_halt_waitq);
 }
 IWL_EXPORT_SYMBOL(iwl_fw_runtime_init);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index fa750711d889f..d8690acee40c0 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -338,7 +338,6 @@ enum iwl_d3_status {
  *	are sent
  * @STATUS_TRANS_IDLE: the trans is idle - general commands are not to be sent
  * @STATUS_TRANS_DEAD: trans is dead - avoid any read/write operation
- * @STATUS_FW_WAIT_DUMP: if set, wait until cleared before collecting dump
  */
 enum iwl_trans_status {
 	STATUS_SYNC_HCMD_ACTIVE,
@@ -351,7 +350,6 @@ enum iwl_trans_status {
 	STATUS_TRANS_GOING_IDLE,
 	STATUS_TRANS_IDLE,
 	STATUS_TRANS_DEAD,
-	STATUS_FW_WAIT_DUMP,
 };
 
 static inline int
@@ -832,7 +830,6 @@ struct iwl_trans {
 	u32 lmac_error_event_table[2];
 	u32 umac_error_event_table;
 	unsigned int error_event_table_tlv_status;
-	wait_queue_head_t fw_halt_waitq;
 
 	/* pointer to trans specific struct */
 	/*Ensure that this pointer will always be aligned to sizeof pointer */
@@ -1240,10 +1237,6 @@ static inline void iwl_trans_fw_error(struct iwl_trans *trans)
 	/* prevent double restarts due to the same erroneous FW */
 	if (!test_and_set_bit(STATUS_FW_ERROR, &trans->status))
 		iwl_op_mode_nic_error(trans->op_mode);
-
-	if (test_and_clear_bit(STATUS_FW_WAIT_DUMP, &trans->status))
-		wake_up(&trans->fw_halt_waitq);
-
 }
 
 static inline void iwl_trans_sync_nmi(struct iwl_trans *trans)
-- 
GitLab


From 0d5bad14226af0712c6eed06059a596fc89a4605 Mon Sep 17 00:00:00 2001
From: Ihab Zhaika <ihab.zhaika@intel.com>
Date: Mon, 25 Feb 2019 07:35:42 +0200
Subject: [PATCH 0123/1861] iwlwifi: rename structs to fit the new names

rename few structs to fit the new marketing names

Signed-off-by: Ihab Zhaika <ihab.zhaika@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 12 ++++++------
 drivers/net/wireless/intel/iwlwifi/iwl-config.h |  2 +-
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 12 ++++++------
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c |  2 +-
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index fdc56f821b5ac..d5c29298ae3e8 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -235,8 +235,8 @@ const struct iwl_cfg iwl_ax101_cfg_qu_hr = {
 	.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
 };
 
-const struct iwl_cfg iwl22260_2ax_cfg = {
-	.name = "Intel(R) Wireless-AX 22260",
+const struct iwl_cfg iwl_ax200_cfg_cc = {
+	.name = "Intel(R) Wi-Fi 6 AX200 160MHz",
 	.fw_name_pre = IWL_CC_A_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
@@ -249,7 +249,7 @@ const struct iwl_cfg iwl22260_2ax_cfg = {
 };
 
 const struct iwl_cfg killer1650x_2ax_cfg = {
-	.name = "Killer(R) Wireless-AX 1650x Wireless Network Adapter (200NGW)",
+	.name = "Killer(R) Wi-Fi 6 AX1650x 160MHz Wireless Network Adapter (200NGW)",
 	.fw_name_pre = IWL_CC_A_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
@@ -262,7 +262,7 @@ const struct iwl_cfg killer1650x_2ax_cfg = {
 };
 
 const struct iwl_cfg killer1650w_2ax_cfg = {
-	.name = "Killer(R) Wireless-AX 1650w Wireless Network Adapter (200D2W)",
+	.name = "Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)",
 	.fw_name_pre = IWL_CC_A_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
@@ -328,7 +328,7 @@ const struct iwl_cfg killer1550s_2ac_cfg_qu_b0_jf_b0 = {
 };
 
 const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = {
-	.name = "Killer(R) Wireless-AX 1650i Wireless Network Adapter (22560NGW)",
+	.name = "Killer(R) Wi-Fi 6 AX1650i 160MHz Wireless Network Adapter (201NGW)",
 	.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
@@ -340,7 +340,7 @@ const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = {
 };
 
 const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0 = {
-	.name = "Killer(R) Wireless-AX 1650s Wireless Network Adapter (22560D2W)",
+	.name = "Killer(R) Wi-Fi 6 AX1650s 160MHz Wireless Network Adapter (201D2W)",
 	.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index f5f87773667b0..c91b537fa7ffe 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -550,7 +550,7 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb;
 extern const struct iwl_cfg iwl22000_2ac_cfg_jf;
 extern const struct iwl_cfg iwl_ax101_cfg_qu_hr;
 extern const struct iwl_cfg iwl22000_2ax_cfg_hr;
-extern const struct iwl_cfg iwl22260_2ax_cfg;
+extern const struct iwl_cfg iwl_ax200_cfg_cc;
 extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0;
 extern const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0;
 extern const struct iwl_cfg killer1650x_2ax_cfg;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 2b94e4cef56cf..c35e50359d2c1 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -953,14 +953,14 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0xA0F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)},
 	{IWL_PCI_DEVICE(0xA0F0, 0x4070, iwl_ax101_cfg_qu_hr)},
 
-	{IWL_PCI_DEVICE(0x2723, 0x0080, iwl22260_2ax_cfg)},
-	{IWL_PCI_DEVICE(0x2723, 0x0084, iwl22260_2ax_cfg)},
-	{IWL_PCI_DEVICE(0x2723, 0x0088, iwl22260_2ax_cfg)},
-	{IWL_PCI_DEVICE(0x2723, 0x008C, iwl22260_2ax_cfg)},
+	{IWL_PCI_DEVICE(0x2723, 0x0080, iwl_ax200_cfg_cc)},
+	{IWL_PCI_DEVICE(0x2723, 0x0084, iwl_ax200_cfg_cc)},
+	{IWL_PCI_DEVICE(0x2723, 0x0088, iwl_ax200_cfg_cc)},
+	{IWL_PCI_DEVICE(0x2723, 0x008C, iwl_ax200_cfg_cc)},
 	{IWL_PCI_DEVICE(0x2723, 0x1653, killer1650w_2ax_cfg)},
 	{IWL_PCI_DEVICE(0x2723, 0x1654, killer1650x_2ax_cfg)},
-	{IWL_PCI_DEVICE(0x2723, 0x4080, iwl22260_2ax_cfg)},
-	{IWL_PCI_DEVICE(0x2723, 0x4088, iwl22260_2ax_cfg)},
+	{IWL_PCI_DEVICE(0x2723, 0x4080, iwl_ax200_cfg_cc)},
+	{IWL_PCI_DEVICE(0x2723, 0x4088, iwl_ax200_cfg_cc)},
 
 	{IWL_PCI_DEVICE(0x1a56, 0x1653, killer1650w_2ax_cfg)},
 	{IWL_PCI_DEVICE(0x1a56, 0x1654, killer1650x_2ax_cfg)},
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 28d6ae8c9336f..1d6f3053f2337 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3561,7 +3561,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 		}
 	} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
 		   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
-		   (trans->cfg != &iwl22260_2ax_cfg ||
+		   (trans->cfg != &iwl_ax200_cfg_cc ||
 		    trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) {
 		u32 hw_status;
 
-- 
GitLab


From 972d8e1377795556024e948357e82532890f2f7d Mon Sep 17 00:00:00 2001
From: Ihab Zhaika <ihab.zhaika@intel.com>
Date: Mon, 25 Feb 2019 08:07:03 +0200
Subject: [PATCH 0124/1861] iwlwifi: add new 0x2723/0x2080 card for 22000

add new PCI ID 0x2723/0x2080 for 22000 series

Signed-off-by: Ihab Zhaika <ihab.zhaika@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index c35e50359d2c1..9f1af8da9dc18 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -959,6 +959,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x2723, 0x008C, iwl_ax200_cfg_cc)},
 	{IWL_PCI_DEVICE(0x2723, 0x1653, killer1650w_2ax_cfg)},
 	{IWL_PCI_DEVICE(0x2723, 0x1654, killer1650x_2ax_cfg)},
+	{IWL_PCI_DEVICE(0x2723, 0x2080, iwl_ax200_cfg_cc)},
 	{IWL_PCI_DEVICE(0x2723, 0x4080, iwl_ax200_cfg_cc)},
 	{IWL_PCI_DEVICE(0x2723, 0x4088, iwl_ax200_cfg_cc)},
 
-- 
GitLab


From 0f0b7e1cc7abf8e1a8b301f2868379d611d05ae2 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Thu, 7 Mar 2019 13:09:13 +0100
Subject: [PATCH 0125/1861] x86/tsc: Add option to disable tsc clocksource
 watchdog

Clocksource watchdog has been found responsible for generating latency
spikes (in the 10-20 us range) when woken up to check for TSC stability.

Add an option to disable it at boot.

Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bigeasy@linutronix.de
Cc: linux-rt-users@vger.kernel.org
Cc: peterz@infradead.org
Cc: bristot@redhat.com
Cc: williams@redhat.com
Link: https://lkml.kernel.org/r/20190307120913.13168-1-juri.lelli@redhat.com
---
 Documentation/admin-guide/kernel-parameters.txt | 4 ++++
 arch/x86/kernel/tsc.c                           | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b8ee90bb6447..c4d830003b215 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4703,6 +4703,10 @@
 			[x86] unstable: mark the TSC clocksource as unstable, this
 			marks the TSC unconditionally unstable at bootup and
 			avoids any further wobbles once the TSC watchdog notices.
+			[x86] nowatchdog: disable clocksource watchdog. Used
+			in situations with strict latency requirements (where
+			interruptions from clocksource watchdog are not
+			acceptable).
 
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3fae238340699..aab0c82e0a0d7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -283,6 +283,7 @@ int __init notsc_setup(char *str)
 __setup("notsc", notsc_setup);
 
 static int no_sched_irq_time;
+static int no_tsc_watchdog;
 
 static int __init tsc_setup(char *str)
 {
@@ -292,6 +293,8 @@ static int __init tsc_setup(char *str)
 		no_sched_irq_time = 1;
 	if (!strcmp(str, "unstable"))
 		mark_tsc_unstable("boot parameter");
+	if (!strcmp(str, "nowatchdog"))
+		no_tsc_watchdog = 1;
 	return 1;
 }
 
@@ -1349,7 +1352,7 @@ static int __init init_tsc_clocksource(void)
 	if (tsc_unstable)
 		goto unreg;
 
-	if (tsc_clocksource_reliable)
+	if (tsc_clocksource_reliable || no_tsc_watchdog)
 		clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
 
 	if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
-- 
GitLab


From 77dcc6233e0def71e104d728ab5a39c2fca51127 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 Mar 2019 23:48:19 +0100
Subject: [PATCH 0126/1861] mac80211_hwsim: Replace hrtimer tasklet with
 softirq hrtimer

Switch the timer to HRTIMER_MODE_REL_SOFT, which executed the timer callback in
softirq context and remove the hrtimer_tasklet.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: linux-wireless@vger.kernel.org
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lkml.kernel.org/r/20190301224821.29843-2-bigeasy@linutronix.de
---
 drivers/net/wireless/mac80211_hwsim.c | 46 ++++++++++++---------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 0838af04d681a..2a407a71bcd14 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -521,7 +521,7 @@ struct mac80211_hwsim_data {
 	unsigned int rx_filter;
 	bool started, idle, scanning;
 	struct mutex mutex;
-	struct tasklet_hrtimer beacon_timer;
+	struct hrtimer beacon_timer;
 	enum ps_mode {
 		PS_DISABLED, PS_ENABLED, PS_AUTO_POLL, PS_MANUAL_POLL
 	} ps;
@@ -1460,7 +1460,7 @@ static void mac80211_hwsim_stop(struct ieee80211_hw *hw)
 {
 	struct mac80211_hwsim_data *data = hw->priv;
 	data->started = false;
-	tasklet_hrtimer_cancel(&data->beacon_timer);
+	hrtimer_cancel(&data->beacon_timer);
 	wiphy_dbg(hw->wiphy, "%s\n", __func__);
 }
 
@@ -1583,14 +1583,12 @@ static enum hrtimer_restart
 mac80211_hwsim_beacon(struct hrtimer *timer)
 {
 	struct mac80211_hwsim_data *data =
-		container_of(timer, struct mac80211_hwsim_data,
-			     beacon_timer.timer);
+		container_of(timer, struct mac80211_hwsim_data, beacon_timer);
 	struct ieee80211_hw *hw = data->hw;
 	u64 bcn_int = data->beacon_int;
-	ktime_t next_bcn;
 
 	if (!data->started)
-		goto out;
+		return HRTIMER_NORESTART;
 
 	ieee80211_iterate_active_interfaces_atomic(
 		hw, IEEE80211_IFACE_ITER_NORMAL,
@@ -1601,12 +1599,9 @@ mac80211_hwsim_beacon(struct hrtimer *timer)
 		bcn_int -= data->bcn_delta;
 		data->bcn_delta = 0;
 	}
-
-	next_bcn = ktime_add(hrtimer_get_expires(timer),
-			     ns_to_ktime(bcn_int * 1000));
-	tasklet_hrtimer_start(&data->beacon_timer, next_bcn, HRTIMER_MODE_ABS);
-out:
-	return HRTIMER_NORESTART;
+	hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer),
+			ns_to_ktime(bcn_int * NSEC_PER_USEC));
+	return HRTIMER_RESTART;
 }
 
 static const char * const hwsim_chanwidths[] = {
@@ -1680,15 +1675,15 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
 	mutex_unlock(&data->mutex);
 
 	if (!data->started || !data->beacon_int)
-		tasklet_hrtimer_cancel(&data->beacon_timer);
-	else if (!hrtimer_is_queued(&data->beacon_timer.timer)) {
+		hrtimer_cancel(&data->beacon_timer);
+	else if (!hrtimer_is_queued(&data->beacon_timer)) {
 		u64 tsf = mac80211_hwsim_get_tsf(hw, NULL);
 		u32 bcn_int = data->beacon_int;
 		u64 until_tbtt = bcn_int - do_div(tsf, bcn_int);
 
-		tasklet_hrtimer_start(&data->beacon_timer,
-				      ns_to_ktime(until_tbtt * 1000),
-				      HRTIMER_MODE_REL);
+		hrtimer_start(&data->beacon_timer,
+			      ns_to_ktime(until_tbtt * NSEC_PER_USEC),
+			      HRTIMER_MODE_REL_SOFT);
 	}
 
 	return 0;
@@ -1751,7 +1746,7 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 			  info->enable_beacon, info->beacon_int);
 		vp->bcn_en = info->enable_beacon;
 		if (data->started &&
-		    !hrtimer_is_queued(&data->beacon_timer.timer) &&
+		    !hrtimer_is_queued(&data->beacon_timer) &&
 		    info->enable_beacon) {
 			u64 tsf, until_tbtt;
 			u32 bcn_int;
@@ -1759,9 +1754,10 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 			tsf = mac80211_hwsim_get_tsf(hw, vif);
 			bcn_int = data->beacon_int;
 			until_tbtt = bcn_int - do_div(tsf, bcn_int);
-			tasklet_hrtimer_start(&data->beacon_timer,
-					      ns_to_ktime(until_tbtt * 1000),
-					      HRTIMER_MODE_REL);
+
+			hrtimer_start(&data->beacon_timer,
+				      ns_to_ktime(until_tbtt * NSEC_PER_USEC),
+				      HRTIMER_MODE_REL_SOFT);
 		} else if (!info->enable_beacon) {
 			unsigned int count = 0;
 			ieee80211_iterate_active_interfaces_atomic(
@@ -1770,7 +1766,7 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 			wiphy_dbg(hw->wiphy, "  beaconing vifs remaining: %u",
 				  count);
 			if (count == 0) {
-				tasklet_hrtimer_cancel(&data->beacon_timer);
+				hrtimer_cancel(&data->beacon_timer);
 				data->beacon_int = 0;
 			}
 		}
@@ -2922,9 +2918,9 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
 
-	tasklet_hrtimer_init(&data->beacon_timer,
-			     mac80211_hwsim_beacon,
-			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&data->beacon_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_ABS_SOFT);
+	data->beacon_timer.function = mac80211_hwsim_beacon;
 
 	err = ieee80211_register_hw(hw);
 	if (err < 0) {
-- 
GitLab


From 671422b2205b5f2c49948b686306b207a5975dd9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 Mar 2019 23:48:20 +0100
Subject: [PATCH 0127/1861] xfrm: Replace hrtimer tasklet with softirq hrtimer

Switch the timer to HRTIMER_MODE_SOFT, which executed the timer
callback in softirq context and remove the hrtimer_tasklet.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Link: https://lkml.kernel.org/r/20190301224821.29843-3-bigeasy@linutronix.de
---
 include/net/xfrm.h    |  2 +-
 net/xfrm/xfrm_state.c | 30 ++++++++++++++++++------------
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 85386becbaea2..bbcf0b650b81e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -219,7 +219,7 @@ struct xfrm_state {
 	struct xfrm_stats	stats;
 
 	struct xfrm_lifetime_cur curlft;
-	struct tasklet_hrtimer	mtimer;
+	struct hrtimer		mtimer;
 
 	struct xfrm_state_offload xso;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1bb971f46fc6f..586b4d656abde 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -434,7 +434,7 @@ EXPORT_SYMBOL(xfrm_state_free);
 
 static void ___xfrm_state_destroy(struct xfrm_state *x)
 {
-	tasklet_hrtimer_cancel(&x->mtimer);
+	hrtimer_cancel(&x->mtimer);
 	del_timer_sync(&x->rtimer);
 	kfree(x->aead);
 	kfree(x->aalg);
@@ -479,8 +479,8 @@ static void xfrm_state_gc_task(struct work_struct *work)
 
 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 {
-	struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
-	struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
+	struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer);
+	enum hrtimer_restart ret = HRTIMER_NORESTART;
 	time64_t now = ktime_get_real_seconds();
 	time64_t next = TIME64_MAX;
 	int warn = 0;
@@ -544,7 +544,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 		km_state_expired(x, 0, 0);
 resched:
 	if (next != TIME64_MAX) {
-		tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
+		hrtimer_forward_now(&x->mtimer, ktime_set(next, 0));
+		ret = HRTIMER_RESTART;
 	}
 
 	goto out;
@@ -561,7 +562,7 @@ expired:
 
 out:
 	spin_unlock(&x->lock);
-	return HRTIMER_NORESTART;
+	return ret;
 }
 
 static void xfrm_replay_timer_handler(struct timer_list *t);
@@ -580,8 +581,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
-		tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
-					CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
+		hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
+		x->mtimer.function = xfrm_timer_handler;
 		timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
 		x->curlft.add_time = ktime_get_real_seconds();
 		x->lft.soft_byte_limit = XFRM_INF;
@@ -1047,7 +1048,9 @@ found:
 				hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
-			tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
+			hrtimer_start(&x->mtimer,
+				      ktime_set(net->xfrm.sysctl_acq_expires, 0),
+				      HRTIMER_MODE_REL_SOFT);
 			net->xfrm.state_num++;
 			xfrm_hash_grow_check(net, x->bydst.next != NULL);
 			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
@@ -1159,7 +1162,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 	}
 
-	tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
+	hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
 	if (x->replay_maxage)
 		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
 
@@ -1266,7 +1269,9 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 		x->mark.m = m->m;
 		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 		xfrm_state_hold(x);
-		tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
+		hrtimer_start(&x->mtimer,
+			      ktime_set(net->xfrm.sysctl_acq_expires, 0),
+			      HRTIMER_MODE_REL_SOFT);
 		list_add(&x->km.all, &net->xfrm.state_all);
 		hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 		h = xfrm_src_hash(net, daddr, saddr, family);
@@ -1571,7 +1576,8 @@ out:
 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
 		x1->km.dying = 0;
 
-		tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
+		hrtimer_start(&x1->mtimer, ktime_set(1, 0),
+			      HRTIMER_MODE_REL_SOFT);
 		if (x1->curlft.use_time)
 			xfrm_state_check_expire(x1);
 
@@ -1610,7 +1616,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
-		tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
+		hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT);
 		return -EINVAL;
 	}
 
-- 
GitLab


From d7dcf26ff0ffd7b56fe2b09ed7f1867589f3cdf1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 1 Mar 2019 23:48:21 +0100
Subject: [PATCH 0128/1861] softirq: Remove tasklet_hrtimer

There are no more users of this interface.
Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Link: https://lkml.kernel.org/r/20190301224821.29843-4-bigeasy@linutronix.de
---
 include/linux/interrupt.h | 25 -------------------
 kernel/softirq.c          | 51 ---------------------------------------
 2 files changed, 76 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 690b238a44d5f..c7eef32e7739e 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -668,31 +668,6 @@ extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
 extern void tasklet_init(struct tasklet_struct *t,
 			 void (*func)(unsigned long), unsigned long data);
 
-struct tasklet_hrtimer {
-	struct hrtimer		timer;
-	struct tasklet_struct	tasklet;
-	enum hrtimer_restart	(*function)(struct hrtimer *);
-};
-
-extern void
-tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
-		     enum hrtimer_restart (*function)(struct hrtimer *),
-		     clockid_t which_clock, enum hrtimer_mode mode);
-
-static inline
-void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
-			   const enum hrtimer_mode mode)
-{
-	hrtimer_start(&ttimer->timer, time, mode);
-}
-
-static inline
-void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
-{
-	hrtimer_cancel(&ttimer->timer);
-	tasklet_kill(&ttimer->tasklet);
-}
-
 /*
  * Autoprobing for irqs:
  *
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 10277429ed84f..2c3382378d94c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -573,57 +573,6 @@ void tasklet_kill(struct tasklet_struct *t)
 }
 EXPORT_SYMBOL(tasklet_kill);
 
-/*
- * tasklet_hrtimer
- */
-
-/*
- * The trampoline is called when the hrtimer expires. It schedules a tasklet
- * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
- * hrtimer callback, but from softirq context.
- */
-static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
-{
-	struct tasklet_hrtimer *ttimer =
-		container_of(timer, struct tasklet_hrtimer, timer);
-
-	tasklet_hi_schedule(&ttimer->tasklet);
-	return HRTIMER_NORESTART;
-}
-
-/*
- * Helper function which calls the hrtimer callback from
- * tasklet/softirq context
- */
-static void __tasklet_hrtimer_trampoline(unsigned long data)
-{
-	struct tasklet_hrtimer *ttimer = (void *)data;
-	enum hrtimer_restart restart;
-
-	restart = ttimer->function(&ttimer->timer);
-	if (restart != HRTIMER_NORESTART)
-		hrtimer_restart(&ttimer->timer);
-}
-
-/**
- * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
- * @ttimer:	 tasklet_hrtimer which is initialized
- * @function:	 hrtimer callback function which gets called from softirq context
- * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
- * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
- */
-void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
-			  enum hrtimer_restart (*function)(struct hrtimer *),
-			  clockid_t which_clock, enum hrtimer_mode mode)
-{
-	hrtimer_init(&ttimer->timer, which_clock, mode);
-	ttimer->timer.function = __hrtimer_tasklet_trampoline;
-	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
-		     (unsigned long)ttimer);
-	ttimer->function = function;
-}
-EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
-
 void __init softirq_init(void)
 {
 	int cpu;
-- 
GitLab


From 3e2cf62efec52fb49daed437cc486c3cb9a0afa2 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Tue, 19 Mar 2019 21:19:52 +0100
Subject: [PATCH 0129/1861] ARM: OMAP1: ams-delta: Fix broken GPIO ID
 allocation

In order to request dynamic allocationn of GPIO IDs, a negative number
should be passed as a base GPIO ID via platform data.  Unfortuntely,
commit 771e53c4d1a1 ("ARM: OMAP1: ams-delta: Drop board specific global
GPIO numbers") didn't follow that rule while switching to dynamically
allocated GPIO IDs for Amstrad Delta latches, making their IDs
overlapping with those already assigned to OMAP GPIO devices.  Fix it.

Fixes: 771e53c4d1a1 ("ARM: OMAP1: ams-delta: Drop board specific global GPIO numbers")
Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Cc: stable@vger.kernel.org
Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-ams-delta.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index be30c3c061b46..1b15d593837ed 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -182,6 +182,7 @@ static struct resource latch1_resources[] = {
 
 static struct bgpio_pdata latch1_pdata = {
 	.label	= LATCH1_LABEL,
+	.base	= -1,
 	.ngpio	= LATCH1_NGPIO,
 };
 
@@ -219,6 +220,7 @@ static struct resource latch2_resources[] = {
 
 static struct bgpio_pdata latch2_pdata = {
 	.label	= LATCH2_LABEL,
+	.base	= -1,
 	.ngpio	= LATCH2_NGPIO,
 };
 
-- 
GitLab


From 30645307e5d2c8a4caf978558c66121ac91ad17e Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sat, 23 Feb 2019 14:20:42 +0100
Subject: [PATCH 0130/1861] ARM: OMAP2+: add missing of_node_put after
 of_device_is_available

Add an of_node_put when a tested device node is not available.

The semantic patch that fixes this problem is as follows
(http://coccinelle.lip6.fr):

// <smpl>
@@
identifier f;
local idexpression e;
expression x;
@@

e = f(...);
... when != of_node_put(e)
    when != x = e
    when != e = x
    when any
if (<+...of_device_is_available(e)...+>) {
  ... when != of_node_put(e)
(
  return e;
|
+ of_node_put(e);
  return ...;
)
}
// </smpl>

Fixes: e0c827aca0730 ("drm/omap: Populate DSS children in omapdss driver")
Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/display.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 1444b4b4bd9f8..439e143cad7b5 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -250,8 +250,10 @@ static int __init omapdss_init_of(void)
 	if (!node)
 		return 0;
 
-	if (!of_device_is_available(node))
+	if (!of_device_is_available(node)) {
+		of_node_put(node);
 		return 0;
+	}
 
 	pdev = of_find_device_by_node(node);
 
-- 
GitLab


From 4f96dc0a3e79ec257a2b082dab3ee694ff88c317 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 15 Mar 2019 12:59:09 +0200
Subject: [PATCH 0131/1861] ARM: dts: am335x-evm: Correct the regulators for
 the audio codec

Correctly map the regulators used by tlv320aic3106.
Both 1.8V and 3.3V for the codec is derived from VBAT via fixed regulators.

Cc: <Stable@vger.kernel.org> # v4.14+
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-evm.dts | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts
index dce5be5df97bd..edcff79879e78 100644
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@@ -57,6 +57,24 @@
 		enable-active-high;
 	};
 
+	/* TPS79501 */
+	v1_8d_reg: fixedregulator-v1_8d {
+		compatible = "regulator-fixed";
+		regulator-name = "v1_8d";
+		vin-supply = <&vbat>;
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+	};
+
+	/* TPS79501 */
+	v3_3d_reg: fixedregulator-v3_3d {
+		compatible = "regulator-fixed";
+		regulator-name = "v3_3d";
+		vin-supply = <&vbat>;
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+
 	matrix_keypad: matrix_keypad0 {
 		compatible = "gpio-matrix-keypad";
 		debounce-delay-ms = <5>;
@@ -499,10 +517,10 @@
 		status = "okay";
 
 		/* Regulators */
-		AVDD-supply = <&vaux2_reg>;
-		IOVDD-supply = <&vaux2_reg>;
-		DRVDD-supply = <&vaux2_reg>;
-		DVDD-supply = <&vbat>;
+		AVDD-supply = <&v3_3d_reg>;
+		IOVDD-supply = <&v3_3d_reg>;
+		DRVDD-supply = <&v3_3d_reg>;
+		DVDD-supply = <&v1_8d_reg>;
 	};
 };
 
-- 
GitLab


From 6691370646e844be98bb6558c024269791d20bd7 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 15 Mar 2019 12:59:17 +0200
Subject: [PATCH 0132/1861] ARM: dts: am335x-evmsk: Correct the regulators for
 the audio codec

Correctly map the regulators used by tlv320aic3106.
Both 1.8V and 3.3V for the codec is derived from VBAT via fixed regulators.

Cc: <Stable@vger.kernel.org> # v4.14+
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-evmsk.dts | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index b128998097ce7..2c2d8b5b8cf52 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -73,6 +73,24 @@
 		enable-active-high;
 	};
 
+	/* TPS79518 */
+	v1_8d_reg: fixedregulator-v1_8d {
+		compatible = "regulator-fixed";
+		regulator-name = "v1_8d";
+		vin-supply = <&vbat>;
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+	};
+
+	/* TPS78633 */
+	v3_3d_reg: fixedregulator-v3_3d {
+		compatible = "regulator-fixed";
+		regulator-name = "v3_3d";
+		vin-supply = <&vbat>;
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+
 	leds {
 		pinctrl-names = "default";
 		pinctrl-0 = <&user_leds_s0>;
@@ -501,10 +519,10 @@
 		status = "okay";
 
 		/* Regulators */
-		AVDD-supply = <&vaux2_reg>;
-		IOVDD-supply = <&vaux2_reg>;
-		DRVDD-supply = <&vaux2_reg>;
-		DVDD-supply = <&vbat>;
+		AVDD-supply = <&v3_3d_reg>;
+		IOVDD-supply = <&v3_3d_reg>;
+		DRVDD-supply = <&v3_3d_reg>;
+		DVDD-supply = <&v1_8d_reg>;
 	};
 };
 
-- 
GitLab


From 351f339faa308c1c1461314a18c832239a841ca0 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 12 Mar 2019 12:28:03 -0700
Subject: [PATCH 0133/1861] acpi/nfit: Always dump _DSM output payload

The dynamic-debug statements for command payload output only get emitted
when the command is not ND_CMD_CALL. Move the output payload dumping
ahead of the early return path for ND_CMD_CALL.

Fixes: 31eca76ba2fc9 ("...whitelisted dimm command marshaling mechanism")
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 5a389a4f4f652..f1ed0befe303d 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -567,6 +567,12 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		goto out;
 	}
 
+	dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
+			cmd_name, out_obj->buffer.length);
+	print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
+			out_obj->buffer.pointer,
+			min_t(u32, 128, out_obj->buffer.length), true);
+
 	if (call_pkg) {
 		call_pkg->nd_fw_size = out_obj->buffer.length;
 		memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
@@ -585,12 +591,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		return 0;
 	}
 
-	dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
-			cmd_name, out_obj->buffer.length);
-	print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
-			out_obj->buffer.pointer,
-			min_t(u32, 128, out_obj->buffer.length), true);
-
 	for (i = 0, offset = 0; i < desc->out_num; i++) {
 		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
 				(u32 *) out_obj->buffer.pointer,
-- 
GitLab


From 55c1fc0af29a6c1b92f217b7eb7581a882e0c07c Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kjlu@umn.edu>
Date: Tue, 12 Mar 2019 03:20:34 -0500
Subject: [PATCH 0134/1861] libnvdimm/namespace: Fix a potential NULL pointer
 dereference

In case kmemdup fails, the fix goes to blk_err to avoid NULL
pointer dereference.

Signed-off-by: Kangjie Lu <kjlu@umn.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/namespace_devs.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 7849bf1812c47..f293556cbbf6d 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -2249,9 +2249,12 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
 	if (!nsblk->uuid)
 		goto blk_err;
 	memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
-	if (name[0])
+	if (name[0]) {
 		nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
 				GFP_KERNEL);
+		if (!nsblk->alt_name)
+			goto blk_err;
+	}
 	res = nsblk_add_resource(nd_region, ndd, nsblk,
 			__le64_to_cpu(nd_label->dpa));
 	if (!res)
-- 
GitLab


From fe783516e3016652b74ac92fb8b3fc2b1c0e9d5b Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Thu, 21 Mar 2019 15:13:39 -0700
Subject: [PATCH 0135/1861] EDAC, skx, i10nm: Make skx_common.c a pure library

The following Kconfig constellations fail randconfig builds:

  CONFIG_ACPI_NFIT=y
  CONFIG_EDAC_DEBUG=y
  CONFIG_EDAC_SKX=m
  CONFIG_EDAC_I10NM=y

or

  CONFIG_ACPI_NFIT=y
  CONFIG_EDAC_DEBUG=y
  CONFIG_EDAC_SKX=y
  CONFIG_EDAC_I10NM=m

with:
  ...
  CC [M]  drivers/edac/skx_common.o
  ...
  .../skx_common.o:.../skx_common.c:672: undefined reference to `__this_module'

That is because if one of the two drivers - skx_edac or i10nm_edac - is
built-in and the other one is a module, the shared file skx_common.c
gets linked into a module object by kbuild. Therefore, when linking that
same file into vmlinux, the '__this_module' symbol used in debugfs isn't
defined, leading to the above error.

Fix it by moving all debugfs code from skx_common.c to both skx_base.c
and i10nm_base.c respectively. Thus, skx_common.c doesn't refer to the
'__this_module' symbol anymore.

Clarify skx_common.c's purpose at the top of the file for future
reference, while at it.

 [ bp: Make text more readable. ]

Fixes: d4dc89d069aa ("EDAC, i10nm: Add a driver for Intel 10nm server processors")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190321221339.GA32323@agluck-desk
---
 drivers/edac/i10nm_base.c | 52 +++++++++++++++++++++++++++++++++--
 drivers/edac/skx_base.c   | 50 +++++++++++++++++++++++++++++++++-
 drivers/edac/skx_common.c | 57 +++++++--------------------------------
 drivers/edac/skx_common.h |  8 ------
 4 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index c334fb7c63df5..6f06aec4877c2 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -181,6 +181,54 @@ static struct notifier_block i10nm_mce_dec = {
 	.priority	= MCE_PRIO_EDAC,
 };
 
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature.
+ * Exercise the address decode logic by writing an address to
+ * /sys/kernel/debug/edac/i10nm_test/addr.
+ */
+static struct dentry *i10nm_test;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+	struct mce m;
+
+	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
+
+	memset(&m, 0, sizeof(m));
+	/* ADDRV + MemRd + Unknown channel */
+	m.status = MCI_STATUS_ADDRV + 0x90;
+	/* One corrected error */
+	m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
+	m.addr = val;
+	skx_mce_check_error(NULL, 0, &m);
+
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static void setup_i10nm_debug(void)
+{
+	i10nm_test = edac_debugfs_create_dir("i10nm_test");
+	if (!i10nm_test)
+		return;
+
+	if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
+				      NULL, &fops_u64_wo)) {
+		debugfs_remove(i10nm_test);
+		i10nm_test = NULL;
+	}
+}
+
+static void teardown_i10nm_debug(void)
+{
+	debugfs_remove_recursive(i10nm_test);
+}
+#else
+static inline void setup_i10nm_debug(void) {}
+static inline void teardown_i10nm_debug(void) {}
+#endif /*CONFIG_EDAC_DEBUG*/
+
 static int __init i10nm_init(void)
 {
 	u8 mc = 0, src_id = 0, node_id = 0;
@@ -249,7 +297,7 @@ static int __init i10nm_init(void)
 
 	opstate_init();
 	mce_register_decode_chain(&i10nm_mce_dec);
-	setup_skx_debug("i10nm_test");
+	setup_i10nm_debug();
 
 	i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
 
@@ -262,7 +310,7 @@ fail:
 static void __exit i10nm_exit(void)
 {
 	edac_dbg(2, "\n");
-	teardown_skx_debug();
+	teardown_i10nm_debug();
 	mce_unregister_decode_chain(&i10nm_mce_dec);
 	skx_adxl_put();
 	skx_remove();
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index adae4c848ca1e..a5c8fa3a249ac 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -540,6 +540,54 @@ static struct notifier_block skx_mce_dec = {
 	.priority	= MCE_PRIO_EDAC,
 };
 
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature.
+ * Exercise the address decode logic by writing an address to
+ * /sys/kernel/debug/edac/skx_test/addr.
+ */
+static struct dentry *skx_test;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+	struct mce m;
+
+	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
+
+	memset(&m, 0, sizeof(m));
+	/* ADDRV + MemRd + Unknown channel */
+	m.status = MCI_STATUS_ADDRV + 0x90;
+	/* One corrected error */
+	m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
+	m.addr = val;
+	skx_mce_check_error(NULL, 0, &m);
+
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static void setup_skx_debug(void)
+{
+	skx_test = edac_debugfs_create_dir("skx_test");
+	if (!skx_test)
+		return;
+
+	if (!edac_debugfs_create_file("addr", 0200, skx_test,
+				      NULL, &fops_u64_wo)) {
+		debugfs_remove(skx_test);
+		skx_test = NULL;
+	}
+}
+
+static void teardown_skx_debug(void)
+{
+	debugfs_remove_recursive(skx_test);
+}
+#else
+static inline void setup_skx_debug(void) {}
+static inline void teardown_skx_debug(void) {}
+#endif /*CONFIG_EDAC_DEBUG*/
+
 /*
  * skx_init:
  *	make sure we are running on the correct cpu model
@@ -619,7 +667,7 @@ static int __init skx_init(void)
 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
 	opstate_init();
 
-	setup_skx_debug("skx_test");
+	setup_skx_debug();
 
 	mce_register_decode_chain(&skx_mce_dec);
 
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 0e96e7b5b0a78..b0dddcfa9baa2 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -1,7 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
- * Originally split out from the skx_edac driver.
+ *
+ * Shared code by both skx_edac and i10nm_edac. Originally split out
+ * from the skx_edac driver.
+ *
+ * This file is linked into both skx_edac and i10nm_edac drivers. In
+ * order to avoid link errors, this file must be like a pure library
+ * without including symbols and defines which would otherwise conflict,
+ * when linked once into a module and into a built-in object, at the
+ * same time. For example, __this_module symbol references when that
+ * file is being linked into a built-in object.
  *
  * Copyright (c) 2018, Intel Corporation.
  */
@@ -644,48 +652,3 @@ void skx_remove(void)
 		kfree(d);
 	}
 }
-
-#ifdef CONFIG_EDAC_DEBUG
-/*
- * Debug feature.
- * Exercise the address decode logic by writing an address to
- * /sys/kernel/debug/edac/dirname/addr.
- */
-static struct dentry *skx_test;
-
-static int debugfs_u64_set(void *data, u64 val)
-{
-	struct mce m;
-
-	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
-
-	memset(&m, 0, sizeof(m));
-	/* ADDRV + MemRd + Unknown channel */
-	m.status = MCI_STATUS_ADDRV + 0x90;
-	/* One corrected error */
-	m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
-	m.addr = val;
-	skx_mce_check_error(NULL, 0, &m);
-
-	return 0;
-}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
-
-void setup_skx_debug(const char *dirname)
-{
-	skx_test = edac_debugfs_create_dir(dirname);
-	if (!skx_test)
-		return;
-
-	if (!edac_debugfs_create_file("addr", 0200, skx_test,
-				      NULL, &fops_u64_wo)) {
-		debugfs_remove(skx_test);
-		skx_test = NULL;
-	}
-}
-
-void teardown_skx_debug(void)
-{
-	debugfs_remove_recursive(skx_test);
-}
-#endif /*CONFIG_EDAC_DEBUG*/
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index d25374e34d4f9..d18fa98669af1 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -141,12 +141,4 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 void skx_remove(void);
 
-#ifdef CONFIG_EDAC_DEBUG
-void setup_skx_debug(const char *dirname);
-void teardown_skx_debug(void);
-#else
-static inline void setup_skx_debug(const char *dirname) {}
-static inline void teardown_skx_debug(void) {}
-#endif /*CONFIG_EDAC_DEBUG*/
-
 #endif /* _SKX_COMM_EDAC_H */
-- 
GitLab


From 1bd76ff448a91e35e451a96671b5594e3fffc4bd Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Wed, 13 Mar 2019 10:27:22 -0500
Subject: [PATCH 0136/1861] EDAC, altera: Fix S10 Double Bit Error Notification

Stratix10 Double Bit Error Address was always read from SDRAM Address
register instead of each device's Address register.

To determine which device had the DBE, cycle through the EDAC devices
comparing the DBE value to the db_irq value. Once found, report the DBE
Address from the device registers as well as the device name.

Finally, notify the system via an SMC call and indicate the panic should
result in a system reboot. Change a run-time check to a Stratix10
compile-time check for a clean SMC notification.

Fixes: d5fc9125566c ("EDAC, altera: Combine Stratix10 and Arria10 probe functions")
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/1552490842-25440-1-git-send-email-thor.thayer@linux.intel.com
---
 drivers/edac/altera_edac.c | 69 ++++++++++++++++++++++++++++----------
 drivers/edac/altera_edac.h | 20 +++++++++++
 2 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 1bcf9aea0cdfc..5ff263850cc71 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1930,6 +1930,15 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
 		goto err_release_group1;
 	}
 
+#ifdef CONFIG_ARCH_STRATIX10
+	/* Use IRQ to determine SError origin instead of assigning IRQ */
+	rc = of_property_read_u32_index(np, "interrupts", 0, &altdev->db_irq);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Unable to parse DB IRQ index\n");
+		goto err_release_group1;
+	}
+#else
 	altdev->db_irq = irq_of_parse_and_map(np, 1);
 	if (!altdev->db_irq) {
 		edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating DBIRQ\n");
@@ -1943,6 +1952,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
 		edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
 		goto err_release_group1;
 	}
+#endif
 
 	rc = edac_device_add_device(dci);
 	if (rc) {
@@ -2005,6 +2015,10 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = {
 /************** Stratix 10 EDAC Double Bit Error Handler ************/
 #define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m)
 
+#ifdef CONFIG_ARCH_STRATIX10
+/* panic routine issues reboot on non-zero panic_timeout */
+extern int panic_timeout;
+
 /*
  * The double bit error is handled through SError which is fatal. This is
  * called as a panic notifier to printout ECC error info as part of the panic.
@@ -2018,17 +2032,37 @@ static int s10_edac_dberr_handler(struct notifier_block *this,
 	regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST,
 		    &dberror);
 	regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror);
-	if (dberror & S10_DDR0_IRQ_MASK) {
-		regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr);
-		regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST,
-			     err_addr);
-		edac_printk(KERN_ERR, EDAC_MC,
-			    "EDAC: [Uncorrectable errors @ 0x%08X]\n\n",
-			    err_addr);
+	if (dberror & S10_DBE_IRQ_MASK) {
+		struct list_head *position;
+		struct altr_edac_device_dev *ed;
+		struct arm_smccc_res result;
+
+		/* Find the matching DBE in the list of devices */
+		list_for_each(position, &edac->a10_ecc_devices) {
+			ed = list_entry(position, struct altr_edac_device_dev,
+					next);
+			if (!(BIT(ed->db_irq) & dberror))
+				continue;
+
+			writel(ALTR_A10_ECC_DERRPENA,
+			       ed->base + ALTR_A10_ECC_INTSTAT_OFST);
+			err_addr = readl(ed->base + ALTR_S10_DERR_ADDRA_OFST);
+			regmap_write(edac->ecc_mgr_map,
+				     S10_SYSMGR_UE_ADDR_OFST, err_addr);
+			edac_printk(KERN_ERR, EDAC_DEVICE,
+				    "EDAC: [Fatal DBE on %s @ 0x%08X]\n",
+				    ed->edac_dev_name, err_addr);
+			break;
+		}
+		/* Notify the System through SMC. Reboot delay = 1 second */
+		panic_timeout = 1;
+		arm_smccc_smc(INTEL_SIP_SMC_ECC_DBE, dberror, 0, 0, 0, 0,
+			      0, 0, &result);
 	}
 
 	return NOTIFY_DONE;
 }
+#endif
 
 /****************** Arria 10 EDAC Probe Function *********************/
 static int altr_edac_a10_probe(struct platform_device *pdev)
@@ -2098,16 +2132,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
 					 altr_edac_a10_irq_handler,
 					 edac);
 
-	if (socfpga_is_a10()) {
-		edac->db_irq = platform_get_irq(pdev, 1);
-		if (edac->db_irq < 0) {
-			dev_err(&pdev->dev, "No DBERR IRQ resource\n");
-			return edac->db_irq;
-		}
-		irq_set_chained_handler_and_data(edac->db_irq,
-						 altr_edac_a10_irq_handler,
-						 edac);
-	} else {
+#ifdef CONFIG_ARCH_STRATIX10
+	{
 		int dberror, err_addr;
 
 		edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
@@ -2130,6 +2156,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
 				     S10_SYSMGR_UE_ADDR_OFST, 0);
 		}
 	}
+#else
+	edac->db_irq = platform_get_irq(pdev, 1);
+	if (edac->db_irq < 0) {
+		dev_err(&pdev->dev, "No DBERR IRQ resource\n");
+		return edac->db_irq;
+	}
+	irq_set_chained_handler_and_data(edac->db_irq,
+					 altr_edac_a10_irq_handler, edac);
+#endif
 
 	for_each_child_of_node(pdev->dev.of_node, child) {
 		if (!of_device_is_available(child))
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index f8664bac9fa82..60513f201ffb4 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -289,6 +289,7 @@ struct altr_sdram_mc_data {
 #define ALTR_A10_ECC_INIT_WATCHDOG_10US      10000
 
 /************* Stratix10 Defines **************/
+#define ALTR_S10_DERR_ADDRA_OFST          0x2C
 
 /* Stratix10 ECC Manager Defines */
 #define S10_SYSMGR_ECC_INTMASK_CLR_OFST   0x98
@@ -299,6 +300,7 @@ struct altr_sdram_mc_data {
 #define S10_SYSMGR_UE_ADDR_OFST           0x224
 
 #define S10_DDR0_IRQ_MASK                 BIT(16)
+#define S10_DBE_IRQ_MASK                  0x3FE
 
 /* Define ECC Block Offsets for peripherals */
 #define ECC_BLK_ADDRESS_OFST              0x40
@@ -435,4 +437,22 @@ struct altr_arria10_edac {
 #define INTEL_SIP_SMC_REG_WRITE \
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE)
 
+/*
+ * Request INTEL_SIP_SMC_ECC_DBE
+ *
+ * Sync call used by service driver at EL1 alert EL3 that a Double Bit
+ * ECC error has occurred.
+ *
+ * Call register usage:
+ * a0 INTEL_SIP_SMC_ECC_DBE
+ * a1 SysManager Double Bit Error value
+ * a2-7 not used
+ *
+ * Return status
+ * a0 INTEL_SIP_SMC_STATUS_OK
+ */
+#define INTEL_SIP_SMC_FUNCID_ECC_DBE 13
+#define INTEL_SIP_SMC_ECC_DBE \
+	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE)
+
 #endif	/* #ifndef _ALTERA_EDAC_H */
-- 
GitLab


From e1e41b6ce5f9c1a80bf4f2404ec5ab11c6c5a2ad Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 18 Mar 2019 20:55:56 +0100
Subject: [PATCH 0137/1861] timekeeping: Consistently use unsigned int for
 seqcount snapshot

The timekeeping code uses a random mix of "unsigned long" and "unsigned
int" for the seqcount snapshots (ratio 14:12). Since the seqlock.h API is
entirely based on unsigned int, use that throughout.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Link: https://lkml.kernel.org/r/20190318195557.20773-1-linux@rasmusvillemoes.dk
---
 kernel/time/jiffies.c     |  2 +-
 kernel/time/sched_clock.c |  4 ++--
 kernel/time/tick-common.c |  2 +-
 kernel/time/tick-sched.c  |  3 ++-
 kernel/time/timekeeping.c | 18 +++++++++---------
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index dc1b6f1929f9b..95f8f3304c196 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -63,7 +63,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void)
 {
-	unsigned long seq;
+	unsigned int seq;
 	u64 ret;
 
 	do {
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 094b82ca95e52..16b80c2b4fe81 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -94,7 +94,7 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 unsigned long long notrace sched_clock(void)
 {
 	u64 cyc, res;
-	unsigned long seq;
+	unsigned int seq;
 	struct clock_read_data *rd;
 
 	do {
@@ -267,7 +267,7 @@ void __init generic_sched_clock_init(void)
  */
 static u64 notrace suspended_sched_clock_read(void)
 {
-	unsigned long seq = raw_read_seqcount(&cd.seq);
+	unsigned int seq = raw_read_seqcount(&cd.seq);
 
 	return cd.read_data[seq & 1].epoch_cyc;
 }
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 529143b4c8d2a..561641b2153f0 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -149,7 +149,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 	    !tick_broadcast_oneshot_active()) {
 		clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
 	} else {
-		unsigned long seq;
+		unsigned int seq;
 		ktime_t next;
 
 		do {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6fa52cd6df0be..b50f6f22c88e5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -645,7 +645,8 @@ static inline bool local_timer_softirq_pending(void)
 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
 	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
-	unsigned long seq, basejiff;
+	unsigned long basejiff;
+	unsigned int seq;
 
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f986e1918d129..540145da33dac 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -720,7 +720,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 void ktime_get_real_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	u64 nsecs;
 
 	WARN_ON(timekeeping_suspended);
@@ -829,7 +829,7 @@ EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
 {
 	ktime_t *offset = offsets[offs];
-	unsigned long seq;
+	unsigned int seq;
 	ktime_t tconv;
 
 	do {
@@ -960,7 +960,7 @@ time64_t __ktime_get_real_seconds(void)
 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	ktime_t base_raw;
 	ktime_t base_real;
 	u64 nsec_raw;
@@ -1122,7 +1122,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 	ktime_t base_real, base_raw;
 	u64 nsec_real, nsec_raw;
 	u8 cs_was_changed_seq;
-	unsigned long seq;
+	unsigned int seq;
 	bool do_interp;
 	int ret;
 
@@ -1409,7 +1409,7 @@ int timekeeping_notify(struct clocksource *clock)
 void ktime_get_raw_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	u64 nsecs;
 
 	do {
@@ -1431,7 +1431,7 @@ EXPORT_SYMBOL(ktime_get_raw_ts64);
 int timekeeping_valid_for_hres(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	int ret;
 
 	do {
@@ -1450,7 +1450,7 @@ int timekeeping_valid_for_hres(void)
 u64 timekeeping_max_deferment(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	u64 ret;
 
 	do {
@@ -2150,7 +2150,7 @@ EXPORT_SYMBOL_GPL(getboottime64);
 void ktime_get_coarse_real_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
@@ -2164,7 +2164,7 @@ void ktime_get_coarse_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 now, mono;
-	unsigned long seq;
+	unsigned int seq;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-- 
GitLab


From e0ceeae708cebf22c990c3d703a4ca187dc837f5 Mon Sep 17 00:00:00 2001
From: Pu Wen <puwen@hygon.cn>
Date: Sat, 23 Mar 2019 23:42:20 +0800
Subject: [PATCH 0138/1861] x86/CPU/hygon: Fix phys_proc_id calculation logic
 for multi-die processors

The Hygon family 18h multi-die processor platform supports 1, 2 or
4-Dies per socket. The topology looks like this:

  System View (with 1-Die 2-Socket):
             |------------|
           ------       -----
   SOCKET0 | D0 |       | D1 |  SOCKET1
           ------       -----

  System View (with 2-Die 2-socket):
             --------------------
             |     -------------|------
             |     |            |     |
           ------------       ------------
   SOCKET0 | D1 -- D0 |       | D3 -- D2 | SOCKET1
           ------------       ------------

  System View (with 4-Die 2-Socket) :
             --------------------
             |     -------------|------
             |     |            |     |
           ------------       ------------
           | D1 -- D0 |       | D7 -- D6 |
           | |  \/ |  |       | |  \/ |  |
   SOCKET0 | |  /\ |  |       | |  /\ |  | SOCKET1
           | D2 -- D3 |       | D4 -- D5 |
           ------------       ------------
             |     |            |     |
             ------|------------|     |
                   --------------------

Currently

  phys_proc_id = initial_apicid >> bits

calculates the physical processor ID from the initial_apicid by shifting
*bits*.

However, this does not work for 1-Die and 2-Die 2-socket systems.

According to document [1] section 2.1.11.1, the bits is the value of
CPUID_Fn80000008_ECX[12:15]. The possible values are 4, 5 or 6 which
mean:

  4 - 1 die
  5 - 2 dies
  6 - 3/4 dies.

Hygon programs the initial ApicId the same way as AMD. The ApicId is
read from CPUID_Fn00000001_EBX (see section 2.1.11.1 of referrence [1])
and the definition is as below (see section 2.1.10.2.1.3 of [1]):

      -------------------------------------------------
  Bit |     6     |   5  4  |    3   |    2   1   0   |
      |-----------|---------|--------|----------------|
  IDs | Socket ID | Node ID | CCX ID | Core/Thread ID |
      -------------------------------------------------

So for 3/4-Die configurations, the bits variable is 6, which is the same
as the ApicID definition field.

For 1-Die and 2-Die configurations, bits is 4 or 5, which will cause the
right shifted result to not be exactly the value of socket ID.

However, the socket ID should be obtained from ApicId[6]. To fix the
problem and match the ApicID field definition, set the shift bits to 6
for all Hygon family 18h multi-die CPUs.

Because AMD doesn't have 2-Socket systems with 1-Die/2-Die processors
(see reference [2]), this doesn't need to be changed on the AMD side but
only for Hygon.

References:
[1] https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
[2] https://www.amd.com/en/products/specifications/processors

 [bp: heavily massage commit message. ]

Signed-off-by: Pu Wen <puwen@hygon.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Lendacky <Thomas.Lendacky@amd.com>
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1553355740-19999-1-git-send-email-puwen@hygon.cn
---
 arch/x86/kernel/cpu/hygon.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index cf25405444ab3..415621ddb8a23 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -19,6 +19,8 @@
 
 #include "cpu.h"
 
+#define APICID_SOCKET_ID_BIT 6
+
 /*
  * nodes_per_socket: Stores the number of nodes per socket.
  * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8]
@@ -87,6 +89,9 @@ static void hygon_get_topology(struct cpuinfo_x86 *c)
 		if (!err)
 			c->x86_coreid_bits = get_count_order(c->x86_max_cores);
 
+		/* Socket ID is ApicId[6] for these processors. */
+		c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT;
+
 		cacheinfo_hygon_init_llc_id(c, cpu, node_id);
 	} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
 		u64 value;
-- 
GitLab


From 1b72d43237980eab9b6ae6bb8181e51c840377e6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Mar 2019 16:39:20 +0100
Subject: [PATCH 0139/1861] tick: Remove outgoing CPU from broadcast masks

Valentin reported that unplugging a CPU occasionally results in a warning
in the tick broadcast code which is triggered when an offline CPU is in the
broadcast mask.

This happens because the outgoing CPU is not removing itself from the
broadcast masks, especially not from the broadcast_force_mask. The removal
happens on the control CPU after the outgoing CPU is dead. It's a long
standing issue, but the warning is harmless.

Rework the hotplug mechanism so that the outgoing CPU removes itself from
the broadcast masks after disabling interrupts and removing itself from the
online mask.

Reported-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1903211540180.1784@nanos.tec.linutronix.de
---
 include/linux/tick.h         |  6 ++++++
 kernel/cpu.c                 |  2 ++
 kernel/time/clockevents.c    | 18 ++++++++++++++--
 kernel/time/tick-broadcast.c | 40 +++++++++++++++++-------------------
 kernel/time/tick-internal.h  | 10 +++++----
 5 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 55388ab45fd4d..76acb48acdb76 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -68,6 +68,12 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode);
 static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
 #endif /* BROADCAST */
 
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
+extern void tick_offline_cpu(unsigned int cpu);
+#else
+static inline void tick_offline_cpu(unsigned int cpu) { }
+#endif
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
 #else
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 025f419d16f68..f69ba38573c23 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -844,6 +844,8 @@ static int take_cpu_down(void *_param)
 
 	/* Give up timekeeping duties */
 	tick_handover_do_timer();
+	/* Remove CPU from timer broadcasting */
+	tick_offline_cpu(cpu);
 	/* Park the stopper thread */
 	stop_machine_park(cpu);
 	return 0;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 5e77662dd2d90..f5490222e134a 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -611,6 +611,22 @@ void clockevents_resume(void)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+/**
+ * tick_offline_cpu - Take CPU out of the broadcast mechanism
+ * @cpu:	The outgoing CPU
+ *
+ * Called on the outgoing CPU after it took itself offline.
+ */
+void tick_offline_cpu(unsigned int cpu)
+{
+	raw_spin_lock(&clockevents_lock);
+	tick_broadcast_offline(cpu);
+	raw_spin_unlock(&clockevents_lock);
+}
+# endif
+
 /**
  * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
  */
@@ -621,8 +637,6 @@ void tick_cleanup_dead_cpu(int cpu)
 
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
-	tick_shutdown_broadcast_oneshot(cpu);
-	tick_shutdown_broadcast(cpu);
 	tick_shutdown(cpu);
 	/*
 	 * Unregister the clock event devices which were
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index ee834d4fb8140..0283523de045c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -36,10 +36,12 @@ static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+static void tick_broadcast_oneshot_offline(unsigned int cpu);
 #else
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
+static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
 #endif
 
 /*
@@ -433,27 +435,29 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-/*
- * Remove a CPU from broadcasting
- */
-void tick_shutdown_broadcast(unsigned int cpu)
+static void tick_shutdown_broadcast(void)
 {
-	struct clock_event_device *bc;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
-	bc = tick_broadcast_device.evtdev;
-	cpumask_clear_cpu(cpu, tick_broadcast_mask);
-	cpumask_clear_cpu(cpu, tick_broadcast_on);
+	struct clock_event_device *bc = tick_broadcast_device.evtdev;
 
 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
 		if (bc && cpumask_empty(tick_broadcast_mask))
 			clockevents_shutdown(bc);
 	}
+}
 
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+/*
+ * Remove a CPU from broadcasting
+ */
+void tick_broadcast_offline(unsigned int cpu)
+{
+	raw_spin_lock(&tick_broadcast_lock);
+	cpumask_clear_cpu(cpu, tick_broadcast_mask);
+	cpumask_clear_cpu(cpu, tick_broadcast_on);
+	tick_broadcast_oneshot_offline(cpu);
+	tick_shutdown_broadcast();
+	raw_spin_unlock(&tick_broadcast_lock);
 }
+
 #endif
 
 void tick_suspend_broadcast(void)
@@ -950,14 +954,10 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
 }
 
 /*
- * Remove a dead CPU from broadcasting
+ * Remove a dying CPU from broadcasting
  */
-void tick_shutdown_broadcast_oneshot(unsigned int cpu)
+static void tick_broadcast_oneshot_offline(unsigned int cpu)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
 	/*
 	 * Clear the broadcast masks for the dead cpu, but do not stop
 	 * the broadcast device!
@@ -965,8 +965,6 @@ void tick_shutdown_broadcast_oneshot(unsigned int cpu)
 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
-
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 #endif
 
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index e277284c2831c..7b24961367292 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -64,7 +64,6 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
 extern void tick_install_broadcast_device(struct clock_event_device *dev);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
-extern void tick_shutdown_broadcast(unsigned int cpu);
 extern void tick_suspend_broadcast(void);
 extern void tick_resume_broadcast(void);
 extern bool tick_resume_check_broadcast(void);
@@ -78,7 +77,6 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev)
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
 static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
 static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-static inline void tick_shutdown_broadcast(unsigned int cpu) { }
 static inline void tick_suspend_broadcast(void) { }
 static inline void tick_resume_broadcast(void) { }
 static inline bool tick_resume_check_broadcast(void) { return false; }
@@ -128,19 +126,23 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_broadcast_switch_to_oneshot(void);
-extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
 extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
 static inline void tick_broadcast_switch_to_oneshot(void) { }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
 #endif /* !(BROADCAST && ONESHOT) */
 
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
+extern void tick_broadcast_offline(unsigned int cpu);
+#else
+static inline void tick_broadcast_offline(unsigned int cpu) { }
+#endif
+
 /* NO_HZ_FULL internal */
 #ifdef CONFIG_NO_HZ_FULL
 extern void tick_nohz_init(void);
-- 
GitLab


From 9308fd4074551f222f30322d1ee8c5aff18e9747 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Fri, 22 Mar 2019 20:29:00 +0000
Subject: [PATCH 0140/1861] x86/MCE: Group AMD function prototypes in
 <asm/mce.h>

There are two groups of "ifdef CONFIG_X86_MCE_AMD" function prototypes
in <asm/mce.h>. Merge these two groups.

No functional change.

 [ bp: align vertically. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "clemej@gmail.com" <clemej@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: "rafal@milecki.pl" <rafal@milecki.pl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190322202848.20749-3-Yazen.Ghannam@amd.com
---
 arch/x86/include/asm/mce.h | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 22d05e3835f0b..dc2d4b206ab7e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -210,16 +210,6 @@ static inline void cmci_rediscover(void) {}
 static inline void cmci_recheck(void) {}
 #endif
 
-#ifdef CONFIG_X86_MCE_AMD
-void mce_amd_feature_init(struct cpuinfo_x86 *c);
-int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
-#else
-static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
-static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
-#endif
-
-static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
-
 int mce_available(struct cpuinfo_x86 *c);
 bool mce_is_memory_error(struct mce *m);
 bool mce_is_correctable(struct mce *m);
@@ -345,12 +335,19 @@ extern bool amd_mce_is_memory_error(struct mce *m);
 extern int mce_threshold_create_device(unsigned int cpu);
 extern int mce_threshold_remove_device(unsigned int cpu);
 
-#else
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
+int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
 
-static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
-static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
-static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
+#else
 
+static inline int mce_threshold_create_device(unsigned int cpu)		{ return 0; };
+static inline int mce_threshold_remove_device(unsigned int cpu)		{ return 0; };
+static inline bool amd_mce_is_memory_error(struct mce *m)		{ return false; };
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)		{ }
+static inline int
+umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)	{ return -EINVAL; };
 #endif
 
+static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c)	{ return mce_amd_feature_init(c); }
+
 #endif /* _ASM_X86_MCE_H */
-- 
GitLab


From bfb57a91c2cb497c3780ed2e08f85d038efd0b7b Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sun, 24 Mar 2019 18:07:02 +0200
Subject: [PATCH 0141/1861] habanalabs: remove low credit limit of DMA #0

Because DMA #0 is now used by the user, remove the limitation of credits
from this channel. Without this patch, this channel is pretty much
unusable due to its very low bandwidth configuration.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ea979ebd62fb8..3c509e19d69dc 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1688,12 +1688,11 @@ static void goya_init_golden_registers(struct hl_device *hdev)
 
 	/*
 	 * Workaround for H2 #HW-23 bug
-	 * Set DMA max outstanding read requests to 240 on DMA CH 1. Set it
-	 * to 16 on KMD DMA
-	 * We need to limit only these DMAs because the user can only read
+	 * Set DMA max outstanding read requests to 240 on DMA CH 1.
+	 * This limitation is still large enough to not affect Gen4 bandwidth.
+	 * We need to only limit that DMA channel because the user can only read
 	 * from Host using DMA CH 1
 	 */
-	WREG32(mmDMA_CH_0_CFG0, 0x0fff0010);
 	WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
 
 	goya->hw_cap_initialized |= HW_CAP_GOLDEN;
@@ -3693,7 +3692,7 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
 	 * WA for HW-23.
 	 * We can't allow user to read from Host using QMANs other than 1.
 	 */
-	if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 &&
+	if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
 		hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
 				le32_to_cpu(user_dma_pkt->tsize),
 				hdev->asic_prop.va_space_host_start_address,
-- 
GitLab


From fccfb9ce70ed4ea7a145f77b86de62e38178517f Mon Sep 17 00:00:00 2001
From: Dragos Bogdan <dragos.bogdan@analog.com>
Date: Tue, 19 Mar 2019 12:47:00 +0200
Subject: [PATCH 0142/1861] iio: ad_sigma_delta: select channel when reading
 register

The desired channel has to be selected in order to correctly fill the
buffer with the corresponding data.
The `ad_sd_write_reg()` already does this, but for the
`ad_sd_read_reg_raw()` this was omitted.

Fixes: af3008485ea03 ("iio:adc: Add common code for ADI Sigma Delta devices")
Signed-off-by: Dragos Bogdan <dragos.bogdan@analog.com>
Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad_sigma_delta.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index ff5f2da2e1b13..54d9978b27405 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -121,6 +121,7 @@ static int ad_sd_read_reg_raw(struct ad_sigma_delta *sigma_delta,
 	if (sigma_delta->info->has_registers) {
 		data[0] = reg << sigma_delta->info->addr_shift;
 		data[0] |= sigma_delta->info->read_mask;
+		data[0] |= sigma_delta->comm;
 		spi_message_add_tail(&t[0], &m);
 	}
 	spi_message_add_tail(&t[1], &m);
-- 
GitLab


From d6b87eaf10bd061914f6d277d7428b3285d8850e Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 21 Mar 2019 13:09:18 +0100
Subject: [PATCH 0143/1861] tick/sched: Update tick_sched struct documentation

Adapt the documentation order of struct members to the effective order of
struct members and add missing descriptions.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: fweisbec@gmail.com
Cc: peterz@infradead.org
Link: https://lkml.kernel.org/r/20190321120921.16463-2-anna-maria@linutronix.de
---
 kernel/time/tick-sched.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 6de959a854b2c..4fb06527cf64f 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -24,12 +24,19 @@ enum tick_nohz_mode {
  * struct tick_sched - sched tick emulation and no idle tick control/stats
  * @sched_timer:	hrtimer to schedule the periodic tick in high
  *			resolution mode
+ * @check_clocks:	Notification mechanism about clocksource changes
+ * @nohz_mode:		Mode - one state of tick_nohz_mode
+ * @inidle:		Indicator that the CPU is in the tick idle mode
+ * @tick_stopped:	Indicator that the idle tick has been stopped
+ * @idle_active:	Indicator that the CPU is actively in the tick idle mode;
+ *			it is resetted during irq handling phases.
+ * @do_timer_lst:	CPU was the last one doing do_timer before going idle
+ * @got_idle_tick:	Tick timer function has run with @inidle set
  * @last_tick:		Store the last tick expiry time when the tick
  *			timer is modified for nohz sleeps. This is necessary
  *			to resume the tick timer operation in the timeline
  *			when the CPU returns from nohz sleep.
  * @next_tick:		Next tick to be fired when in dynticks mode.
- * @tick_stopped:	Indicator that the idle tick has been stopped
  * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
  * @idle_calls:		Total number of idle calls
  * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
@@ -40,8 +47,8 @@ enum tick_nohz_mode {
  * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
  * @timer_expires:	Anticipated timer expiration time (in case sched tick is stopped)
  * @timer_expires_base:	Base time clock monotonic for @timer_expires
- * @do_timer_lst:	CPU was the last one doing do_timer before going idle
- * @got_idle_tick:	Tick timer function has run with @inidle set
+ * @next_timer:		Expiry time of next expiring timer for debugging purpose only
+ * @tick_dep_mask:	Tick dependency mask - is set, if someone needs the tick
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
-- 
GitLab


From dc1e7dc5ac6254ba0502323381a7ec847e408f1d Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 21 Mar 2019 13:09:19 +0100
Subject: [PATCH 0144/1861] timer: Move trace point to get proper index

When placing the timer_start trace point before the timer wheel bucket
index is calculated, the index information in the trace point is useless.

It is not possible to simply move the debug_activate() call after the index
calculation, because debug_object_activate() needs to be called before
touching the object.

Therefore split debug_activate() and move the trace point into
enqueue_timer() after the new index has been calculated. The
debug_object_activate() call remains at the original place.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: fweisbec@gmail.com
Cc: peterz@infradead.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: https://lkml.kernel.org/r/20190321120921.16463-3-anna-maria@linutronix.de
---
 kernel/time/timer.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2fce056f8a495..8d7918ae4d0c3 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -536,6 +536,8 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
 	hlist_add_head(&timer->entry, base->vectors + idx);
 	__set_bit(idx, base->pending_map);
 	timer_set_idx(timer, idx);
+
+	trace_timer_start(timer, timer->expires, timer->flags);
 }
 
 static void
@@ -757,13 +759,6 @@ static inline void debug_init(struct timer_list *timer)
 	trace_timer_init(timer);
 }
 
-static inline void
-debug_activate(struct timer_list *timer, unsigned long expires)
-{
-	debug_timer_activate(timer);
-	trace_timer_start(timer, expires, timer->flags);
-}
-
 static inline void debug_deactivate(struct timer_list *timer)
 {
 	debug_timer_deactivate(timer);
@@ -1037,7 +1032,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
 		}
 	}
 
-	debug_activate(timer, expires);
+	debug_timer_activate(timer);
 
 	timer->expires = expires;
 	/*
@@ -1171,7 +1166,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	}
 	forward_timer_base(base);
 
-	debug_activate(timer, timer->expires);
+	debug_timer_activate(timer);
 	internal_add_timer(base, timer);
 	raw_spin_unlock_irqrestore(&base->lock, flags);
 }
-- 
GitLab


From 6849cbb0f9a8dbc1ba56e9abc6955613103e01e3 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 21 Mar 2019 13:09:20 +0100
Subject: [PATCH 0145/1861] timer/trace: Replace deprecated vsprintf pointer
 extension %pf by %ps

Since commit 04b8eb7a4ccd ("symbol lookup: introduce
dereference_symbol_descriptor()") %pf is deprecated, because %ps is smart
enough to handle function pointer dereference on platforms where such a
dereference is required.

While at it add proper line breaks to stay in the 80 character limit.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: fweisbec@gmail.com
Cc: peterz@infradead.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: https://lkml.kernel.org/r/20190321120921.16463-4-anna-maria@linutronix.de
---
 include/trace/events/timer.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index a57e4ee989d62..da975d69c4536 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -73,7 +73,7 @@ TRACE_EVENT(timer_start,
 		__entry->flags		= flags;
 	),
 
-	TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] cpu=%u idx=%u flags=%s",
+	TP_printk("timer=%p function=%ps expires=%lu [timeout=%ld] cpu=%u idx=%u flags=%s",
 		  __entry->timer, __entry->function, __entry->expires,
 		  (long)__entry->expires - __entry->now,
 		  __entry->flags & TIMER_CPUMASK,
@@ -105,7 +105,8 @@ TRACE_EVENT(timer_expire_entry,
 		__entry->function	= timer->function;
 	),
 
-	TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now)
+	TP_printk("timer=%p function=%ps now=%lu",
+		  __entry->timer, __entry->function, __entry->now)
 );
 
 /**
@@ -210,7 +211,7 @@ TRACE_EVENT(hrtimer_start,
 		__entry->mode		= mode;
 	),
 
-	TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu "
+	TP_printk("hrtimer=%p function=%ps expires=%llu softexpires=%llu "
 		  "mode=%s", __entry->hrtimer, __entry->function,
 		  (unsigned long long) __entry->expires,
 		  (unsigned long long) __entry->softexpires,
@@ -243,7 +244,8 @@ TRACE_EVENT(hrtimer_expire_entry,
 		__entry->function	= hrtimer->function;
 	),
 
-	TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function,
+	TP_printk("hrtimer=%p function=%ps now=%llu",
+		  __entry->hrtimer, __entry->function,
 		  (unsigned long long) __entry->now)
 );
 
-- 
GitLab


From f28d3d5346e97e60c81f933ac89ccf015430e5cf Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 21 Mar 2019 13:09:21 +0100
Subject: [PATCH 0146/1861] timer/trace: Improve timer tracing

Timers are added to the timer wheel off by one. This is required in
case a timer is queued directly before incrementing jiffies to prevent
early timer expiry.

When reading a timer trace and relying only on the expiry time of the timer
in the timer_start trace point and on the now in the timer_expiry_entry
trace point, it seems that the timer fires late. With the current
timer_expiry_entry trace point information only now=jiffies is printed but
not the value of base->clk. This makes it impossible to draw a conclusion
to the index of base->clk and makes it impossible to examine timer problems
without additional trace points.

Therefore add the base->clk value to the timer_expire_entry trace
point, to be able to calculate the index the timer base is located at
during collecting expired timers.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: fweisbec@gmail.com
Cc: peterz@infradead.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: https://lkml.kernel.org/r/20190321120921.16463-5-anna-maria@linutronix.de
---
 include/trace/events/timer.h | 11 +++++++----
 kernel/time/timer.c          | 17 +++++++++++++----
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index da975d69c4536..b7a904825e7df 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -89,24 +89,27 @@ TRACE_EVENT(timer_start,
  */
 TRACE_EVENT(timer_expire_entry,
 
-	TP_PROTO(struct timer_list *timer),
+	TP_PROTO(struct timer_list *timer, unsigned long baseclk),
 
-	TP_ARGS(timer),
+	TP_ARGS(timer, baseclk),
 
 	TP_STRUCT__entry(
 		__field( void *,	timer	)
 		__field( unsigned long,	now	)
 		__field( void *,	function)
+		__field( unsigned long,	baseclk	)
 	),
 
 	TP_fast_assign(
 		__entry->timer		= timer;
 		__entry->now		= jiffies;
 		__entry->function	= timer->function;
+		__entry->baseclk	= baseclk;
 	),
 
-	TP_printk("timer=%p function=%ps now=%lu",
-		  __entry->timer, __entry->function, __entry->now)
+	TP_printk("timer=%p function=%ps now=%lu baseclk=%lu",
+		  __entry->timer, __entry->function, __entry->now,
+		  __entry->baseclk)
 );
 
 /**
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 8d7918ae4d0c3..a9b1bbc2d88d9 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1293,7 +1293,9 @@ int del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
-static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))
+static void call_timer_fn(struct timer_list *timer,
+			  void (*fn)(struct timer_list *),
+			  unsigned long baseclk)
 {
 	int count = preempt_count();
 
@@ -1316,7 +1318,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list
 	 */
 	lock_map_acquire(&lockdep_map);
 
-	trace_timer_expire_entry(timer);
+	trace_timer_expire_entry(timer, baseclk);
 	fn(timer);
 	trace_timer_expire_exit(timer);
 
@@ -1337,6 +1339,13 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list
 
 static void expire_timers(struct timer_base *base, struct hlist_head *head)
 {
+	/*
+	 * This value is required only for tracing. base->clk was
+	 * incremented directly before expire_timers was called. But expiry
+	 * is related to the old base->clk value.
+	 */
+	unsigned long baseclk = base->clk - 1;
+
 	while (!hlist_empty(head)) {
 		struct timer_list *timer;
 		void (*fn)(struct timer_list *);
@@ -1350,11 +1359,11 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
-			call_timer_fn(timer, fn);
+			call_timer_fn(timer, fn, baseclk);
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
-			call_timer_fn(timer, fn);
+			call_timer_fn(timer, fn, baseclk);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
-- 
GitLab


From 59c39840f5abf4a71e1810a8da71aaccd6c17d26 Mon Sep 17 00:00:00 2001
From: Prasad Sodagudi <psodagud@codeaurora.org>
Date: Sun, 24 Mar 2019 07:57:04 -0700
Subject: [PATCH 0147/1861] genirq: Prevent use-after-free and work list
 corruption

When irq_set_affinity_notifier() replaces the notifier, then the
reference count on the old notifier is dropped which causes it to be
freed. But nothing ensures that the old notifier is not longer queued
in the work list. If it is queued this results in a use after free and
possibly in work list corruption.

Ensure that the work is canceled before the reference is dropped.

Signed-off-by: Prasad Sodagudi <psodagud@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: marc.zyngier@arm.com
Link: https://lkml.kernel.org/r/1553439424-6529-1-git-send-email-psodagud@codeaurora.org
---
 kernel/irq/manage.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1401afa0d58a4..53a0813921158 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -357,8 +357,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 	desc->affinity_notify = notify;
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
-	if (old_notify)
+	if (old_notify) {
+		cancel_work_sync(&old_notify->work);
 		kref_put(&old_notify->kref, old_notify->release);
+	}
 
 	return 0;
 }
-- 
GitLab


From 4ba104f468bbfc27362c393815d03aa18fb7a20f Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 23 Feb 2019 14:27:10 +0100
Subject: [PATCH 0148/1861] batman-adv: Reduce claim hash refcnt only for
 removed entry

The batadv_hash_remove is a function which searches the hashtable for an
entry using a needle, a hashtable bucket selection function and a compare
function. It will lock the bucket list and delete an entry when the compare
function matches it with the needle. It returns the pointer to the
hlist_node which matches or NULL when no entry matches the needle.

The batadv_bla_del_claim is not itself protected in anyway to avoid that
any other function is modifying the hashtable between the search for the
entry and the call to batadv_hash_remove. It can therefore happen that the
entry either doesn't exist anymore or an entry was deleted which is not the
same object as the needle. In such an situation, the reference counter (for
the reference stored in the hashtable) must not be reduced for the needle.
Instead the reference counter of the actually removed entry has to be
reduced.

Otherwise the reference counter will underflow and the object might be
freed before all its references were dropped. The kref helpers reported
this problem as:

  refcount_t: underflow; use-after-free.

Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ef39aabdb6943..4fb01108e5f53 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -803,6 +803,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
 				 const u8 *mac, const unsigned short vid)
 {
 	struct batadv_bla_claim search_claim, *claim;
+	struct batadv_bla_claim *claim_removed_entry;
+	struct hlist_node *claim_removed_node;
 
 	ether_addr_copy(search_claim.addr, mac);
 	search_claim.vid = vid;
@@ -813,10 +815,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
 	batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__,
 		   mac, batadv_print_vid(vid));
 
-	batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
-			   batadv_choose_claim, claim);
-	batadv_claim_put(claim); /* reference from the hash is gone */
+	claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash,
+						batadv_compare_claim,
+						batadv_choose_claim, claim);
+	if (!claim_removed_node)
+		goto free_claim;
 
+	/* reference from the hash is gone */
+	claim_removed_entry = hlist_entry(claim_removed_node,
+					  struct batadv_bla_claim, hash_entry);
+	batadv_claim_put(claim_removed_entry);
+
+free_claim:
 	/* don't need the reference from hash_find() anymore */
 	batadv_claim_put(claim);
 }
-- 
GitLab


From 3d65b9accab4a7ed5038f6df403fbd5e298398c7 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 23 Feb 2019 14:27:10 +0100
Subject: [PATCH 0149/1861] batman-adv: Reduce tt_local hash refcnt only for
 removed entry

The batadv_hash_remove is a function which searches the hashtable for an
entry using a needle, a hashtable bucket selection function and a compare
function. It will lock the bucket list and delete an entry when the compare
function matches it with the needle. It returns the pointer to the
hlist_node which matches or NULL when no entry matches the needle.

The batadv_tt_local_remove is not itself protected in anyway to avoid that
any other function is modifying the hashtable between the search for the
entry and the call to batadv_hash_remove. It can therefore happen that the
entry either doesn't exist anymore or an entry was deleted which is not the
same object as the needle. In such an situation, the reference counter (for
the reference stored in the hashtable) must not be reduced for the needle.
Instead the reference counter of the actually removed entry has to be
reduced.

Otherwise the reference counter will underflow and the object might be
freed before all its references were dropped. The kref helpers reported
this problem as:

  refcount_t: underflow; use-after-free.

Fixes: ef72706a0543 ("batman-adv: protect tt_local_entry from concurrent delete events")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/translation-table.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index f73d79139ae79..b5cfc5068a904 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1337,9 +1337,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
 			   unsigned short vid, const char *message,
 			   bool roaming)
 {
+	struct batadv_tt_local_entry *tt_removed_entry;
 	struct batadv_tt_local_entry *tt_local_entry;
 	u16 flags, curr_flags = BATADV_NO_FLAGS;
-	void *tt_entry_exists;
+	struct hlist_node *tt_removed_node;
 
 	tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
 	if (!tt_local_entry)
@@ -1368,15 +1369,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
 	 */
 	batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
 
-	tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash,
+	tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash,
 					     batadv_compare_tt,
 					     batadv_choose_tt,
 					     &tt_local_entry->common);
-	if (!tt_entry_exists)
+	if (!tt_removed_node)
 		goto out;
 
-	/* extra call to free the local tt entry */
-	batadv_tt_local_entry_put(tt_local_entry);
+	/* drop reference of remove hash entry */
+	tt_removed_entry = hlist_entry(tt_removed_node,
+				       struct batadv_tt_local_entry,
+				       common.hash_entry);
+	batadv_tt_local_entry_put(tt_removed_entry);
 
 out:
 	if (tt_local_entry)
-- 
GitLab


From f131a56880d10932931e74773fb8702894a94a75 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 23 Feb 2019 14:27:10 +0100
Subject: [PATCH 0150/1861] batman-adv: Reduce tt_global hash refcnt only for
 removed entry

The batadv_hash_remove is a function which searches the hashtable for an
entry using a needle, a hashtable bucket selection function and a compare
function. It will lock the bucket list and delete an entry when the compare
function matches it with the needle. It returns the pointer to the
hlist_node which matches or NULL when no entry matches the needle.

The batadv_tt_global_free is not itself protected in anyway to avoid that
any other function is modifying the hashtable between the search for the
entry and the call to batadv_hash_remove. It can therefore happen that the
entry either doesn't exist anymore or an entry was deleted which is not the
same object as the needle. In such an situation, the reference counter (for
the reference stored in the hashtable) must not be reduced for the needle.
Instead the reference counter of the actually removed entry has to be
reduced.

Otherwise the reference counter will underflow and the object might be
freed before all its references were dropped. The kref helpers reported
this problem as:

  refcount_t: underflow; use-after-free.

Fixes: 7683fdc1e886 ("batman-adv: protect the local and the global trans-tables with rcu")
Reported-by: Martin Weinelt <martin@linuxlounge.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/translation-table.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b5cfc5068a904..26c4e2493ddfb 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -616,14 +616,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
 				  struct batadv_tt_global_entry *tt_global,
 				  const char *message)
 {
+	struct batadv_tt_global_entry *tt_removed_entry;
+	struct hlist_node *tt_removed_node;
+
 	batadv_dbg(BATADV_DBG_TT, bat_priv,
 		   "Deleting global tt entry %pM (vid: %d): %s\n",
 		   tt_global->common.addr,
 		   batadv_print_vid(tt_global->common.vid), message);
 
-	batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
-			   batadv_choose_tt, &tt_global->common);
-	batadv_tt_global_entry_put(tt_global);
+	tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash,
+					     batadv_compare_tt,
+					     batadv_choose_tt,
+					     &tt_global->common);
+	if (!tt_removed_node)
+		return;
+
+	/* drop reference of remove hash entry */
+	tt_removed_entry = hlist_entry(tt_removed_node,
+				       struct batadv_tt_global_entry,
+				       common.hash_entry);
+	batadv_tt_global_entry_put(tt_removed_entry);
 }
 
 /**
-- 
GitLab


From ca8c3b922e7032aff6cc3fd05548f4df1f3df90e Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Fri, 22 Feb 2019 16:25:54 +0100
Subject: [PATCH 0151/1861] batman-adv: fix warning in function
 batadv_v_elp_get_throughput
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_CFG80211 isn't enabled the compiler correcly warns about
'sinfo.pertid' may be unused. It can also happen for other error
conditions that it not warn about.

net/batman-adv/bat_v_elp.c: In function ‘batadv_v_elp_get_throughput.isra.0’:
include/net/cfg80211.h:6370:13: warning: ‘sinfo.pertid’ may be used
 uninitialized in this function [-Wmaybe-uninitialized]
  kfree(sinfo->pertid);
        ~~~~~^~~~~~~~

Rework so that we only release '&sinfo' if cfg80211_get_station returns
zero.

Fixes: 7d652669b61d ("batman-adv: release station info tidstats")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_v_elp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index a9b7919c9de55..d5df0114f08ac 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -104,8 +104,10 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
 
 		ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
 
-		/* free the TID stats immediately */
-		cfg80211_sinfo_release_content(&sinfo);
+		if (!ret) {
+			/* free the TID stats immediately */
+			cfg80211_sinfo_release_content(&sinfo);
+		}
 
 		dev_put(real_netdev);
 		if (ret == -ENOENT) {
-- 
GitLab


From 438b3d3fae4346a49fe12fa7cc1dc9327f006a91 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 3 Mar 2019 19:25:26 +0100
Subject: [PATCH 0152/1861] batman-adv: Fix genl notification for
 throughput_override

The throughput_override sysfs file is not below the meshif but below a
hardif. The kobj has therefore not a pointer which can be used to find the
batadv_priv data. The pointer stored in the hardif object must be used
instead to find the correct meshif private data.

Fixes: 7e6f461efe25 ("batman-adv: Trigger genl notification on sysfs config change")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/sysfs.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0b4b3fb778a61..208655cf67179 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1116,9 +1116,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
 						struct attribute *attr,
 						char *buff, size_t count)
 {
-	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
 	struct batadv_hard_iface *hard_iface;
+	struct batadv_priv *bat_priv;
 	u32 tp_override;
 	u32 old_tp_override;
 	bool ret;
@@ -1147,7 +1147,10 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
 
 	atomic_set(&hard_iface->bat_v.throughput_override, tp_override);
 
-	batadv_netlink_notify_hardif(bat_priv, hard_iface);
+	if (hard_iface->soft_iface) {
+		bat_priv = netdev_priv(hard_iface->soft_iface);
+		batadv_netlink_notify_hardif(bat_priv, hard_iface);
+	}
 
 out:
 	batadv_hardif_put(hard_iface);
-- 
GitLab


From 1efdd4bd254311498123a15fa0acd565f454da97 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0153/1861] m68k: Call timer_interrupt() with interrupts
 disabled

Some platforms execute their timer handler with the interrupt priority
level set below 6. That means the handler could be interrupted by another
driver and this could lead to re-entry of the timer core.

Avoid this by use of local_irq_save/restore for timer interrupt dispatch.
This provides mutual exclusion around the timer interrupt flag access
which is needed later in this series for the clocksource conversion.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1811131407120.2697@nanos.tec.linutronix.de
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/cia.c       |  9 +++++++++
 arch/m68k/atari/ataints.c   |  4 ++--
 arch/m68k/atari/time.c      | 15 ++++++++++++++-
 arch/m68k/bvme6000/config.c | 20 ++++++++++----------
 arch/m68k/hp300/time.c      | 10 ++++++++--
 arch/m68k/mac/via.c         | 17 +++++++++++++++++
 arch/m68k/mvme147/config.c  | 18 ++++++++++--------
 arch/m68k/mvme16x/config.c  | 21 +++++++++++----------
 arch/m68k/q40/q40ints.c     | 19 +++++++++++--------
 arch/m68k/sun3/sun3ints.c   |  3 +++
 arch/m68k/sun3x/time.c      | 16 ++++++++++------
 11 files changed, 105 insertions(+), 47 deletions(-)

diff --git a/arch/m68k/amiga/cia.c b/arch/m68k/amiga/cia.c
index 2081b8cd5591c..b9aee983e6f4c 100644
--- a/arch/m68k/amiga/cia.c
+++ b/arch/m68k/amiga/cia.c
@@ -88,10 +88,19 @@ static irqreturn_t cia_handler(int irq, void *dev_id)
 	struct ciabase *base = dev_id;
 	int mach_irq;
 	unsigned char ints;
+	unsigned long flags;
 
+	/* Interrupts get disabled while the timer irq flag is cleared and
+	 * the timer interrupt serviced.
+	 */
 	mach_irq = base->cia_irq;
+	local_irq_save(flags);
 	ints = cia_set_irq(base, CIA_ICR_ALL);
 	amiga_custom.intreq = base->int_mask;
+	if (ints & 1)
+		generic_handle_irq(mach_irq);
+	local_irq_restore(flags);
+	mach_irq++, ints >>= 1;
 	for (; ints; mach_irq++, ints >>= 1) {
 		if (ints & 1)
 			generic_handle_irq(mach_irq);
diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c
index 3d2b63bedf058..56f02ea2c248d 100644
--- a/arch/m68k/atari/ataints.c
+++ b/arch/m68k/atari/ataints.c
@@ -142,7 +142,7 @@ struct mfptimerbase {
 	.name		= "MFP Timer D"
 };
 
-static irqreturn_t mfptimer_handler(int irq, void *dev_id)
+static irqreturn_t mfp_timer_d_handler(int irq, void *dev_id)
 {
 	struct mfptimerbase *base = dev_id;
 	int mach_irq;
@@ -344,7 +344,7 @@ void __init atari_init_IRQ(void)
 	st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 0xf0) | 0x6;
 
 	/* request timer D dispatch handler */
-	if (request_irq(IRQ_MFP_TIMD, mfptimer_handler, IRQF_SHARED,
+	if (request_irq(IRQ_MFP_TIMD, mfp_timer_d_handler, IRQF_SHARED,
 			stmfp_base.name, &stmfp_base))
 		pr_err("Couldn't register %s interrupt\n", stmfp_base.name);
 
diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c
index 9cca64286464c..fafa20f75ab9a 100644
--- a/arch/m68k/atari/time.c
+++ b/arch/m68k/atari/time.c
@@ -24,6 +24,18 @@
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL_GPL(rtc_lock);
 
+static irqreturn_t mfp_timer_c_handler(int irq, void *dev_id)
+{
+	irq_handler_t timer_routine = dev_id;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	timer_routine(0, NULL);
+	local_irq_restore(flags);
+
+	return IRQ_HANDLED;
+}
+
 void __init
 atari_sched_init(irq_handler_t timer_routine)
 {
@@ -32,7 +44,8 @@ atari_sched_init(irq_handler_t timer_routine)
     /* start timer C, div = 1:100 */
     st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 15) | 0x60;
     /* install interrupt service routine for MFP Timer C */
-    if (request_irq(IRQ_MFP_TIMC, timer_routine, 0, "timer", timer_routine))
+    if (request_irq(IRQ_MFP_TIMC, mfp_timer_c_handler, 0, "timer",
+                    timer_routine))
 	pr_err("Couldn't register timer interrupt\n");
 }
 
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 143ee9fa3893e..0e5efed4da865 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -44,11 +44,6 @@ extern int bvme6000_hwclk (int, struct rtc_time *);
 extern void bvme6000_reset (void);
 void bvme6000_set_vectors (void);
 
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
-
-static irq_handler_t tick_handler;
-
 
 int __init bvme6000_parse_bootinfo(const struct bi_record *bi)
 {
@@ -157,12 +152,18 @@ irqreturn_t bvme6000_abort_int (int irq, void *dev_id)
 
 static irqreturn_t bvme6000_timer_int (int irq, void *dev_id)
 {
+    irq_handler_t timer_routine = dev_id;
+    unsigned long flags;
     volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE;
-    unsigned char msr = rtc->msr & 0xc0;
+    unsigned char msr;
 
+    local_irq_save(flags);
+    msr = rtc->msr & 0xc0;
     rtc->msr = msr | 0x20;		/* Ack the interrupt */
+    timer_routine(0, NULL);
+    local_irq_restore(flags);
 
-    return tick_handler(irq, dev_id);
+    return IRQ_HANDLED;
 }
 
 /*
@@ -181,9 +182,8 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
 
     rtc->msr = 0;	/* Ensure timer registers accessible */
 
-    tick_handler = timer_routine;
-    if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0,
-				"timer", bvme6000_timer_int))
+    if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0, "timer",
+                    timer_routine))
 	panic ("Couldn't register timer int");
 
     rtc->t1cr_omr = 0x04;	/* Mode 2, ext clk */
diff --git a/arch/m68k/hp300/time.c b/arch/m68k/hp300/time.c
index 289d928a46cbe..d30b03ea93a27 100644
--- a/arch/m68k/hp300/time.c
+++ b/arch/m68k/hp300/time.c
@@ -38,13 +38,19 @@
 
 static irqreturn_t hp300_tick(int irq, void *dev_id)
 {
+	irq_handler_t timer_routine = dev_id;
+	unsigned long flags;
 	unsigned long tmp;
-	irq_handler_t vector = dev_id;
+
+	local_irq_save(flags);
 	in_8(CLOCKBASE + CLKSR);
 	asm volatile ("movpw %1@(5),%0" : "=d" (tmp) : "a" (CLOCKBASE));
+	timer_routine(0, NULL);
+	local_irq_restore(flags);
+
 	/* Turn off the network and SCSI leds */
 	blinken_leds(0, 0xe0);
-	return vector(irq, NULL);
+	return IRQ_HANDLED;
 }
 
 u32 hp300_gettimeoffset(void)
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 0b02894591738..5c3d650d6a6ff 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -440,6 +440,8 @@ void via_nubus_irq_shutdown(int irq)
  * via6522.c :-), disable/pending masks added.
  */
 
+#define VIA_TIMER_1_INT BIT(6)
+
 void via1_irq(struct irq_desc *desc)
 {
 	int irq_num;
@@ -449,6 +451,21 @@ void via1_irq(struct irq_desc *desc)
 	if (!events)
 		return;
 
+	irq_num = IRQ_MAC_TIMER_1;
+	irq_bit = VIA_TIMER_1_INT;
+	if (events & irq_bit) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		via1[vIFR] = irq_bit;
+		generic_handle_irq(irq_num);
+		local_irq_restore(flags);
+
+		events &= ~irq_bit;
+		if (!events)
+			return;
+	}
+
 	irq_num = VIA1_SOURCE_BASE;
 	irq_bit = 1;
 	do {
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index adea549d240e9..93c68d2b8e0ea 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -45,11 +45,6 @@ extern void mvme147_reset (void);
 
 static int bcd2int (unsigned char b);
 
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/time/timekeeping.c, called via mvme147_process_int() */
-
-irq_handler_t tick_handler;
-
 
 int __init mvme147_parse_bootinfo(const struct bi_record *bi)
 {
@@ -104,16 +99,23 @@ void __init config_mvme147(void)
 
 static irqreturn_t mvme147_timer_int (int irq, void *dev_id)
 {
+	irq_handler_t timer_routine = dev_id;
+	unsigned long flags;
+
+	local_irq_save(flags);
 	m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR;
 	m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
-	return tick_handler(irq, dev_id);
+	timer_routine(0, NULL);
+	local_irq_restore(flags);
+
+	return IRQ_HANDLED;
 }
 
 
 void mvme147_sched_init (irq_handler_t timer_routine)
 {
-	tick_handler = timer_routine;
-	if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1", NULL))
+	if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1",
+			timer_routine))
 		pr_err("Couldn't register timer interrupt\n");
 
 	/* Init the clock with a value */
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 6ee36a5b528d8..5feb3ab484d08 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -50,11 +50,6 @@ extern void mvme16x_reset (void);
 
 int bcd2int (unsigned char b);
 
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/time/timekeeping.c, called via mvme16x_process_int() */
-
-static irq_handler_t tick_handler;
-
 
 unsigned short mvme16x_config;
 EXPORT_SYMBOL(mvme16x_config);
@@ -352,8 +347,15 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id)
 
 static irqreturn_t mvme16x_timer_int (int irq, void *dev_id)
 {
-    *(volatile unsigned char *)0xfff4201b |= 8;
-    return tick_handler(irq, dev_id);
+	irq_handler_t timer_routine = dev_id;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	*(volatile unsigned char *)0xfff4201b |= 8;
+	timer_routine(0, NULL);
+	local_irq_restore(flags);
+
+	return IRQ_HANDLED;
 }
 
 void mvme16x_sched_init (irq_handler_t timer_routine)
@@ -361,14 +363,13 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
     uint16_t brdno = be16_to_cpu(mvme_bdid.brdno);
     int irq;
 
-    tick_handler = timer_routine;
     /* Using PCCchip2 or MC2 chip tick timer 1 */
     *(volatile unsigned long *)0xfff42008 = 0;
     *(volatile unsigned long *)0xfff42004 = 10000;	/* 10ms */
     *(volatile unsigned char *)0xfff42017 |= 3;
     *(volatile unsigned char *)0xfff4201b = 0x16;
-    if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0,
-				"timer", mvme16x_timer_int))
+    if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0, "timer",
+                    timer_routine))
 	panic ("Couldn't register timer int");
 
     if (brdno == 0x0162 || brdno == 0x172)
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index 3e7603202977e..1c696906c159f 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -127,10 +127,10 @@ void q40_mksound(unsigned int hz, unsigned int ticks)
 	sound_ticks = ticks << 1;
 }
 
-static irq_handler_t q40_timer_routine;
-
-static irqreturn_t q40_timer_int (int irq, void * dev)
+static irqreturn_t q40_timer_int(int irq, void *dev_id)
 {
+	irq_handler_t timer_routine = dev_id;
+
 	ql_ticks = ql_ticks ? 0 : 1;
 	if (sound_ticks) {
 		unsigned char sval=(sound_ticks & 1) ? 128-SVOL : 128+SVOL;
@@ -139,8 +139,13 @@ static irqreturn_t q40_timer_int (int irq, void * dev)
 		*DAC_RIGHT=sval;
 	}
 
-	if (!ql_ticks)
-		q40_timer_routine(irq, dev);
+	if (!ql_ticks) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		timer_routine(0, NULL);
+		local_irq_restore(flags);
+	}
 	return IRQ_HANDLED;
 }
 
@@ -148,11 +153,9 @@ void q40_sched_init (irq_handler_t timer_routine)
 {
 	int timer_irq;
 
-	q40_timer_routine = timer_routine;
 	timer_irq = Q40_IRQ_FRAME;
 
-	if (request_irq(timer_irq, q40_timer_int, 0,
-				"timer", q40_timer_int))
+	if (request_irq(timer_irq, q40_timer_int, 0, "timer", timer_routine))
 		panic("Couldn't register timer int");
 
 	master_outb(-1, FRAME_CLEAR_REG);
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 6bbca30c91884..a5824abb4a39c 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -61,8 +61,10 @@ static irqreturn_t sun3_int7(int irq, void *dev_id)
 
 static irqreturn_t sun3_int5(int irq, void *dev_id)
 {
+	unsigned long flags;
 	unsigned int cnt;
 
+	local_irq_save(flags);
 #ifdef CONFIG_SUN3
 	intersil_clear();
 #endif
@@ -76,6 +78,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
 	cnt = kstat_irqs_cpu(irq, 0);
 	if (!(cnt % 20))
 		sun3_leds(led_pattern[cnt % 160 / 20]);
+	local_irq_restore(flags);
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c
index 047e2bcee3d7a..3c8a86d085084 100644
--- a/arch/m68k/sun3x/time.c
+++ b/arch/m68k/sun3x/time.c
@@ -80,15 +80,19 @@ u32 sun3x_gettimeoffset(void)
 }
 
 #if 0
-static void sun3x_timer_tick(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t sun3x_timer_tick(int irq, void *dev_id)
 {
-    void (*vector)(int, void *, struct pt_regs *) = dev_id;
+	irq_handler_t timer_routine = dev_id;
+	unsigned long flags;
 
-    /* Clear the pending interrupt - pulse the enable line low */
-    disable_irq(5);
-    enable_irq(5);
+	local_irq_save(flags);
+	/* Clear the pending interrupt - pulse the enable line low */
+	disable_irq(5);
+	enable_irq(5);
+	timer_routine(0, NULL);
+	local_irq_restore(flags);
 
-    vector(irq, NULL, regs);
+	return IRQ_HANDLED;
 }
 #endif
 
-- 
GitLab


From 0ca7ce7db771580433bf24454f7a1542bd326078 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0154/1861] m68k: mac: Fix VIA timer counter accesses

This resolves some bugs that affect VIA timer counter accesses.
Avoid lost interrupts caused by reading the counter low byte register.
Make allowance for the fact that the counter will be decremented to
0xFFFF before being reloaded.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mac/via.c | 102 +++++++++++++++++++++++---------------------
 1 file changed, 53 insertions(+), 49 deletions(-)

diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 5c3d650d6a6ff..8b88489987515 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -54,16 +54,6 @@ static __u8 rbv_clear;
 
 static int gIER,gIFR,gBufA,gBufB;
 
-/*
- * Timer defs.
- */
-
-#define TICK_SIZE		10000
-#define MAC_CLOCK_TICK		(783300/HZ)		/* ticks per HZ */
-#define MAC_CLOCK_LOW		(MAC_CLOCK_TICK&0xFF)
-#define MAC_CLOCK_HIGH		(MAC_CLOCK_TICK>>8)
-
-
 /*
  * On Macs with a genuine VIA chip there is no way to mask an individual slot
  * interrupt. This limitation also seems to apply to VIA clone logic cores in
@@ -271,22 +261,6 @@ void __init via_init(void)
 	}
 }
 
-/*
- * Start the 100 Hz clock
- */
-
-void __init via_init_clock(irq_handler_t func)
-{
-	via1[vACR] |= 0x40;
-	via1[vT1LL] = MAC_CLOCK_LOW;
-	via1[vT1LH] = MAC_CLOCK_HIGH;
-	via1[vT1CL] = MAC_CLOCK_LOW;
-	via1[vT1CH] = MAC_CLOCK_HIGH;
-
-	if (request_irq(IRQ_MAC_TIMER_1, func, 0, "timer", func))
-		pr_err("Couldn't register %s interrupt\n", "timer");
-}
-
 /*
  * Debugging dump, used in various places to see what's going on.
  */
@@ -314,29 +288,6 @@ void via_debug_dump(void)
 	}
 }
 
-/*
- * This is always executed with interrupts disabled.
- *
- * TBI: get time offset between scheduling timer ticks
- */
-
-u32 mac_gettimeoffset(void)
-{
-	unsigned long ticks, offset = 0;
-
-	/* read VIA1 timer 2 current value */
-	ticks = via1[vT1CL] | (via1[vT1CH] << 8);
-	/* The probability of underflow is less than 2% */
-	if (ticks > MAC_CLOCK_TICK - MAC_CLOCK_TICK / 50)
-		/* Check for pending timer interrupt in VIA1 IFR */
-		if (via1[vIFR] & 0x40) offset = TICK_SIZE;
-
-	ticks = MAC_CLOCK_TICK - ticks;
-	ticks = ticks * 10000L / MAC_CLOCK_TICK;
-
-	return (ticks + offset) * 1000;
-}
-
 /*
  * Flush the L2 cache on Macs that have it by flipping
  * the system into 24-bit mode for an instant.
@@ -622,3 +573,56 @@ int via2_scsi_drq_pending(void)
 	return via2[gIFR] & (1 << IRQ_IDX(IRQ_MAC_SCSIDRQ));
 }
 EXPORT_SYMBOL(via2_scsi_drq_pending);
+
+/* timer and clock source */
+
+#define VIA_CLOCK_FREQ     783360                /* VIA "phase 2" clock in Hz */
+#define VIA_TIMER_INTERVAL (1000000 / HZ)        /* microseconds per jiffy */
+#define VIA_TIMER_CYCLES   (VIA_CLOCK_FREQ / HZ) /* clock cycles per jiffy */
+
+#define VIA_TC             (VIA_TIMER_CYCLES - 2) /* including 0 and -1 */
+#define VIA_TC_LOW         (VIA_TC & 0xFF)
+#define VIA_TC_HIGH        (VIA_TC >> 8)
+
+void __init via_init_clock(irq_handler_t timer_routine)
+{
+	if (request_irq(IRQ_MAC_TIMER_1, timer_routine, 0, "timer", NULL)) {
+		pr_err("Couldn't register %s interrupt\n", "timer");
+		return;
+	}
+
+	via1[vT1LL] = VIA_TC_LOW;
+	via1[vT1LH] = VIA_TC_HIGH;
+	via1[vT1CL] = VIA_TC_LOW;
+	via1[vT1CH] = VIA_TC_HIGH;
+	via1[vACR] |= 0x40;
+}
+
+u32 mac_gettimeoffset(void)
+{
+	unsigned long flags;
+	u8 count_high;
+	u16 count, offset = 0;
+
+	/*
+	 * Timer counter wrap-around is detected with the timer interrupt flag
+	 * but reading the counter low byte (vT1CL) would reset the flag.
+	 * Also, accessing both counter registers is essentially a data race.
+	 * These problems are avoided by ignoring the low byte. Clock accuracy
+	 * is 256 times worse (error can reach 0.327 ms) but CPU overhead is
+	 * reduced by avoiding slow VIA register accesses.
+	 */
+
+	local_irq_save(flags);
+	count_high = via1[vT1CH];
+	if (count_high == 0xFF)
+		count_high = 0;
+	if (count_high > 0 && (via1[vIFR] & VIA_TIMER_1_INT))
+		offset = VIA_TIMER_CYCLES;
+	local_irq_restore(flags);
+
+	count = count_high << 8;
+	count = VIA_TIMER_CYCLES - count + offset;
+
+	return ((count * VIA_TIMER_INTERVAL) / VIA_TIMER_CYCLES) * 1000;
+}
-- 
GitLab


From 6242c94d14304673e6a62f3ff77cf52404af875a Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0155/1861] m68k: apollo, q40, sun3, sun3x: Remove
 arch_gettimeoffset implementations

These dummy implementations are no better than
default_arch_gettimeoffset() so remove them.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/apollo/config.c | 7 -------
 arch/m68k/q40/config.c    | 9 ---------
 arch/m68k/sun3/config.c   | 2 --
 arch/m68k/sun3/intersil.c | 7 -------
 arch/m68k/sun3x/config.c  | 1 -
 arch/m68k/sun3x/time.c    | 5 -----
 arch/m68k/sun3x/time.h    | 1 -
 7 files changed, 32 deletions(-)

diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index aef8d42e078dd..7d168e6dfb014 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -29,7 +29,6 @@ u_long apollo_model;
 
 extern void dn_sched_init(irq_handler_t handler);
 extern void dn_init_IRQ(void);
-extern u32 dn_gettimeoffset(void);
 extern int dn_dummy_hwclk(int, struct rtc_time *);
 extern void dn_dummy_reset(void);
 #ifdef CONFIG_HEARTBEAT
@@ -152,7 +151,6 @@ void __init config_apollo(void)
 
 	mach_sched_init=dn_sched_init; /* */
 	mach_init_IRQ=dn_init_IRQ;
-	arch_gettimeoffset   = dn_gettimeoffset;
 	mach_max_dma_address = 0xffffffff;
 	mach_hwclk           = dn_dummy_hwclk; /* */
 	mach_reset	     = dn_dummy_reset;  /* */
@@ -205,11 +203,6 @@ void dn_sched_init(irq_handler_t timer_routine)
 		pr_err("Couldn't register timer interrupt\n");
 }
 
-u32 dn_gettimeoffset(void)
-{
-	return 0xdeadbeef;
-}
-
 int dn_dummy_hwclk(int op, struct rtc_time *t) {
 
 
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 96810d91da2bd..e63eb5f069995 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -40,7 +40,6 @@ extern void q40_init_IRQ(void);
 static void q40_get_model(char *model);
 extern void q40_sched_init(irq_handler_t handler);
 
-static u32 q40_gettimeoffset(void);
 static int q40_hwclk(int, struct rtc_time *);
 static unsigned int q40_get_ss(void);
 static int q40_get_rtc_pll(struct rtc_pll_info *pll);
@@ -169,7 +168,6 @@ void __init config_q40(void)
 	mach_sched_init = q40_sched_init;
 
 	mach_init_IRQ = q40_init_IRQ;
-	arch_gettimeoffset = q40_gettimeoffset;
 	mach_hwclk = q40_hwclk;
 	mach_get_ss = q40_get_ss;
 	mach_get_rtc_pll = q40_get_rtc_pll;
@@ -201,13 +199,6 @@ int __init q40_parse_bootinfo(const struct bi_record *rec)
 	return 1;
 }
 
-
-static u32 q40_gettimeoffset(void)
-{
-	return 5000 * (ql_ticks != 0) * 1000;
-}
-
-
 /*
  * Looks like op is non-zero for setting the clock, and zero for
  * reading the clock.
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index 542c4404861c3..229ea37dfe1b1 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -37,7 +37,6 @@
 
 char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
 
-extern u32 sun3_gettimeoffset(void);
 static void sun3_sched_init(irq_handler_t handler);
 extern void sun3_get_model (char* model);
 extern int sun3_hwclk(int set, struct rtc_time *t);
@@ -138,7 +137,6 @@ void __init config_sun3(void)
         mach_sched_init      =  sun3_sched_init;
         mach_init_IRQ        =  sun3_init_IRQ;
         mach_reset           =  sun3_reboot;
-	arch_gettimeoffset   =  sun3_gettimeoffset;
 	mach_get_model	     =  sun3_get_model;
 	mach_hwclk           =  sun3_hwclk;
 	mach_halt	     =  sun3_halt;
diff --git a/arch/m68k/sun3/intersil.c b/arch/m68k/sun3/intersil.c
index d911070af02a5..8fc74864de819 100644
--- a/arch/m68k/sun3/intersil.c
+++ b/arch/m68k/sun3/intersil.c
@@ -22,13 +22,6 @@
 #define STOP_VAL (INTERSIL_STOP | INTERSIL_INT_ENABLE | INTERSIL_24H_MODE)
 #define START_VAL (INTERSIL_RUN | INTERSIL_INT_ENABLE | INTERSIL_24H_MODE)
 
-/* does this need to be implemented? */
-u32 sun3_gettimeoffset(void)
-{
-  return 1000;
-}
-
-
 /* get/set hwclock */
 
 int sun3_hwclk(int set, struct rtc_time *t)
diff --git a/arch/m68k/sun3x/config.c b/arch/m68k/sun3x/config.c
index 33d3a1c6fba04..03ce7f9facfe5 100644
--- a/arch/m68k/sun3x/config.c
+++ b/arch/m68k/sun3x/config.c
@@ -49,7 +49,6 @@ void __init config_sun3x(void)
 	mach_sched_init      = sun3x_sched_init;
 	mach_init_IRQ        = sun3_init_IRQ;
 
-	arch_gettimeoffset   = sun3x_gettimeoffset;
 	mach_reset           = sun3x_reboot;
 
 	mach_hwclk           = sun3x_hwclk;
diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c
index 3c8a86d085084..9163294b0fb62 100644
--- a/arch/m68k/sun3x/time.c
+++ b/arch/m68k/sun3x/time.c
@@ -73,11 +73,6 @@ int sun3x_hwclk(int set, struct rtc_time *t)
 
 	return 0;
 }
-/* Not much we can do here */
-u32 sun3x_gettimeoffset(void)
-{
-    return 0L;
-}
 
 #if 0
 static irqreturn_t sun3x_timer_tick(int irq, void *dev_id)
diff --git a/arch/m68k/sun3x/time.h b/arch/m68k/sun3x/time.h
index 496f406412adc..86ce78bb3c28d 100644
--- a/arch/m68k/sun3x/time.h
+++ b/arch/m68k/sun3x/time.h
@@ -3,7 +3,6 @@
 #define SUN3X_TIME_H
 
 extern int sun3x_hwclk(int set, struct rtc_time *t);
-u32 sun3x_gettimeoffset(void);
 void sun3x_sched_init(irq_handler_t vector);
 
 struct mostek_dt {
-- 
GitLab


From 7d6ca23554e34f25a70cb25666194f25b38c319b Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0156/1861] m68k: Drop ARCH_USES_GETTIMEOFFSET

The functions that implement arch_gettimeoffset are re-used by
new clocksource drivers in subsequent patches.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/Kconfig           | 1 -
 arch/m68k/amiga/config.c    | 3 ---
 arch/m68k/atari/config.c    | 2 --
 arch/m68k/bvme6000/config.c | 2 --
 arch/m68k/hp300/config.c    | 1 -
 arch/m68k/hp300/time.h      | 1 -
 arch/m68k/mac/config.c      | 3 ---
 arch/m68k/mvme147/config.c  | 2 --
 arch/m68k/mvme16x/config.c  | 2 --
 9 files changed, 17 deletions(-)

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b54206408f91b..6fb91412bcbb3 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -20,7 +20,6 @@ config M68K
 	select GENERIC_STRNCPY_FROM_USER if MMU
 	select GENERIC_STRNLEN_USER if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
 	select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 65f63a4571300..d4976c1aa0cca 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -95,8 +95,6 @@ static char amiga_model_name[13] = "Amiga ";
 static void amiga_sched_init(irq_handler_t handler);
 static void amiga_get_model(char *model);
 static void amiga_get_hardware_list(struct seq_file *m);
-/* amiga specific timer functions */
-static u32 amiga_gettimeoffset(void);
 extern void amiga_mksound(unsigned int count, unsigned int ticks);
 static void amiga_reset(void);
 extern void amiga_init_sound(void);
@@ -386,7 +384,6 @@ void __init config_amiga(void)
 	mach_init_IRQ        = amiga_init_IRQ;
 	mach_get_model       = amiga_get_model;
 	mach_get_hardware_list = amiga_get_hardware_list;
-	arch_gettimeoffset   = amiga_gettimeoffset;
 
 	/*
 	 * default MAX_DMA=0xffffffff on all machines. If we don't do so, the SCSI
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 4fcc4b1df1c0f..902255e7b5b2a 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -78,7 +78,6 @@ static void atari_heartbeat(int on);
 
 /* atari specific timer functions (in time.c) */
 extern void atari_sched_init(irq_handler_t);
-extern u32 atari_gettimeoffset(void);
 extern int atari_mste_hwclk (int, struct rtc_time *);
 extern int atari_tt_hwclk (int, struct rtc_time *);
 
@@ -205,7 +204,6 @@ void __init config_atari(void)
 	mach_init_IRQ        = atari_init_IRQ;
 	mach_get_model	 = atari_get_model;
 	mach_get_hardware_list = atari_get_hardware_list;
-	arch_gettimeoffset   = atari_gettimeoffset;
 	mach_reset           = atari_reset;
 	mach_max_dma_address = 0xffffff;
 #if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 0e5efed4da865..c8b99d5516d14 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -39,7 +39,6 @@
 
 static void bvme6000_get_model(char *model);
 extern void bvme6000_sched_init(irq_handler_t handler);
-extern u32 bvme6000_gettimeoffset(void);
 extern int bvme6000_hwclk (int, struct rtc_time *);
 extern void bvme6000_reset (void);
 void bvme6000_set_vectors (void);
@@ -105,7 +104,6 @@ void __init config_bvme6000(void)
     mach_max_dma_address = 0xffffffff;
     mach_sched_init      = bvme6000_sched_init;
     mach_init_IRQ        = bvme6000_init_IRQ;
-    arch_gettimeoffset   = bvme6000_gettimeoffset;
     mach_hwclk           = bvme6000_hwclk;
     mach_reset		 = bvme6000_reset;
     mach_get_model       = bvme6000_get_model;
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index a19bcd23f80b3..a161d44fd20bb 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -254,7 +254,6 @@ void __init config_hp300(void)
 	mach_sched_init      = hp300_sched_init;
 	mach_init_IRQ        = hp300_init_IRQ;
 	mach_get_model       = hp300_get_model;
-	arch_gettimeoffset   = hp300_gettimeoffset;
 	mach_hwclk	     = hp300_hwclk;
 	mach_get_ss	     = hp300_get_ss;
 	mach_reset           = hp300_reset;
diff --git a/arch/m68k/hp300/time.h b/arch/m68k/hp300/time.h
index f5583ec4033d4..1d77b55cc72a0 100644
--- a/arch/m68k/hp300/time.h
+++ b/arch/m68k/hp300/time.h
@@ -1,2 +1 @@
 extern void hp300_sched_init(irq_handler_t vector);
-extern u32 hp300_gettimeoffset(void);
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index cd9317d532769..11be08f4f750a 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -54,8 +54,6 @@ struct mac_booter_data mac_bi_data;
 /* The phys. video addr. - might be bogus on some machines */
 static unsigned long mac_orig_videoaddr;
 
-/* Mac specific timer functions */
-extern u32 mac_gettimeoffset(void);
 extern int mac_hwclk(int, struct rtc_time *);
 extern void iop_preinit(void);
 extern void iop_init(void);
@@ -155,7 +153,6 @@ void __init config_mac(void)
 	mach_sched_init = mac_sched_init;
 	mach_init_IRQ = mac_init_IRQ;
 	mach_get_model = mac_get_model;
-	arch_gettimeoffset = mac_gettimeoffset;
 	mach_hwclk = mac_hwclk;
 	mach_reset = mac_reset;
 	mach_halt = mac_poweroff;
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 93c68d2b8e0ea..4ef4faa5ed8b8 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -38,7 +38,6 @@
 
 static void mvme147_get_model(char *model);
 extern void mvme147_sched_init(irq_handler_t handler);
-extern u32 mvme147_gettimeoffset(void);
 extern int mvme147_hwclk (int, struct rtc_time *);
 extern void mvme147_reset (void);
 
@@ -84,7 +83,6 @@ void __init config_mvme147(void)
 	mach_max_dma_address	= 0x01000000;
 	mach_sched_init		= mvme147_sched_init;
 	mach_init_IRQ		= mvme147_init_IRQ;
-	arch_gettimeoffset	= mvme147_gettimeoffset;
 	mach_hwclk		= mvme147_hwclk;
 	mach_reset		= mvme147_reset;
 	mach_get_model		= mvme147_get_model;
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 5feb3ab484d08..3a3129e6f0bca 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -44,7 +44,6 @@ static MK48T08ptr_t volatile rtc = (MK48T08ptr_t)MVME_RTC_BASE;
 
 static void mvme16x_get_model(char *model);
 extern void mvme16x_sched_init(irq_handler_t handler);
-extern u32 mvme16x_gettimeoffset(void);
 extern int mvme16x_hwclk (int, struct rtc_time *);
 extern void mvme16x_reset (void);
 
@@ -272,7 +271,6 @@ void __init config_mvme16x(void)
     mach_max_dma_address = 0xffffffff;
     mach_sched_init      = mvme16x_sched_init;
     mach_init_IRQ        = mvme16x_init_IRQ;
-    arch_gettimeoffset   = mvme16x_gettimeoffset;
     mach_hwclk           = mvme16x_hwclk;
     mach_reset		 = mvme16x_reset;
     mach_get_model       = mvme16x_get_model;
-- 
GitLab


From 5afd3d06e5cb30a312aaa34348f9457b9fb38a52 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0157/1861] m68k: amiga: Convert to clocksource API

Add a platform clocksource by adapting the existing arch_gettimeoffset
implementation.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Michael Schmitz <schmitzmic@gmail.com>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/config.c | 46 ++++++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index d4976c1aa0cca..c32ab8041cf6b 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/tty.h>
+#include <linux/clocksource.h>
 #include <linux/console.h>
 #include <linux/rtc.h>
 #include <linux/init.h>
@@ -461,7 +462,29 @@ void __init config_amiga(void)
 		*(unsigned char *)ZTWO_VADDR(0xde0002) |= 0x80;
 }
 
+static u64 amiga_read_clk(struct clocksource *cs);
+
+static struct clocksource amiga_clk = {
+	.name   = "ciab",
+	.rating = 250,
+	.read   = amiga_read_clk,
+	.mask   = CLOCKSOURCE_MASK(32),
+	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
 static unsigned short jiffy_ticks;
+static u32 clk_total, clk_offset;
+
+static irqreturn_t ciab_timer_handler(int irq, void *dev_id)
+{
+	irq_handler_t timer_routine = dev_id;
+
+	clk_total += jiffy_ticks;
+	clk_offset = 0;
+	timer_routine(0, NULL);
+
+	return IRQ_HANDLED;
+}
 
 static void __init amiga_sched_init(irq_handler_t timer_routine)
 {
@@ -481,19 +504,22 @@ static void __init amiga_sched_init(irq_handler_t timer_routine)
 	 * Please don't change this to use ciaa, as it interferes with the
 	 * SCSI code. We'll have to take a look at this later
 	 */
-	if (request_irq(IRQ_AMIGA_CIAB_TA, timer_routine, 0, "timer", NULL))
+	if (request_irq(IRQ_AMIGA_CIAB_TA, ciab_timer_handler, IRQF_TIMER,
+			"timer", timer_routine))
 		pr_err("Couldn't register timer interrupt\n");
 	/* start timer */
 	ciab.cra |= 0x11;
-}
 
-#define TICK_SIZE 10000
+	clocksource_register_hz(&amiga_clk, amiga_eclock);
+}
 
-/* This is always executed with interrupts disabled.  */
-static u32 amiga_gettimeoffset(void)
+static u64 amiga_read_clk(struct clocksource *cs)
 {
 	unsigned short hi, lo, hi2;
-	u32 ticks, offset = 0;
+	unsigned long flags;
+	u32 ticks;
+
+	local_irq_save(flags);
 
 	/* read CIA B timer A current value */
 	hi  = ciab.tahi;
@@ -510,12 +536,14 @@ static u32 amiga_gettimeoffset(void)
 	if (ticks > jiffy_ticks / 2)
 		/* check for pending interrupt */
 		if (cia_set_irq(&ciab_base, 0) & CIA_ICR_TA)
-			offset = 10000;
+			clk_offset = jiffy_ticks;
 
 	ticks = jiffy_ticks - ticks;
-	ticks = (10000 * ticks) / jiffy_ticks;
+	ticks += clk_offset + clk_total;
+
+	local_irq_restore(flags);
 
-	return (ticks + offset) * 1000;
+	return ticks;
 }
 
 static void amiga_reset(void)  __noreturn;
-- 
GitLab


From 26ccd2d376d9b7de9a27b9a7ed71e16216101921 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0158/1861] m68k: atari: Convert to clocksource API

Add a platform clocksource by adapting the existing arch_gettimeoffset
implementation.

Normally the MFP timer C interrupt flag would be used to check for
timer counter wrap-around. Unfortunately, that flag gets cleared by the
MFP itself (due to automatic End-of-Interrupt mode). This means that
mfp_timer_c_handler() and atari_read_clk() must race when accounting
for counter wrap-around.

That problem is avoided by effectively stopping the clock when it might
otherwise jump backwards (due to interrupt latency). Note that this may
affect clock accuracy.

After the timer interrupt is asserted, wait for the counter to be
reloaded so that atari_read_clk() will not see the intermediate state
as that would cause the clock to jump backwards.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Michael Schmitz <schmitzmic@gmail.com>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/atari/time.c | 53 ++++++++++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c
index fafa20f75ab9a..ce923a523695a 100644
--- a/arch/m68k/atari/time.c
+++ b/arch/m68k/atari/time.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/rtc.h>
 #include <linux/bcd.h>
+#include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 
@@ -24,12 +25,29 @@
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL_GPL(rtc_lock);
 
+static u64 atari_read_clk(struct clocksource *cs);
+
+static struct clocksource atari_clk = {
+	.name   = "mfp",
+	.rating = 100,
+	.read   = atari_read_clk,
+	.mask   = CLOCKSOURCE_MASK(32),
+	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+static u8 last_timer_count;
+
 static irqreturn_t mfp_timer_c_handler(int irq, void *dev_id)
 {
 	irq_handler_t timer_routine = dev_id;
 	unsigned long flags;
 
 	local_irq_save(flags);
+	do {
+		last_timer_count = st_mfp.tim_dt_c;
+	} while (last_timer_count == 1);
+	clk_total += INT_TICKS;
 	timer_routine(0, NULL);
 	local_irq_restore(flags);
 
@@ -44,32 +62,33 @@ atari_sched_init(irq_handler_t timer_routine)
     /* start timer C, div = 1:100 */
     st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 15) | 0x60;
     /* install interrupt service routine for MFP Timer C */
-    if (request_irq(IRQ_MFP_TIMC, mfp_timer_c_handler, 0, "timer",
+    if (request_irq(IRQ_MFP_TIMC, mfp_timer_c_handler, IRQF_TIMER, "timer",
                     timer_routine))
 	pr_err("Couldn't register timer interrupt\n");
+
+    clocksource_register_hz(&atari_clk, INT_CLK);
 }
 
 /* ++andreas: gettimeoffset fixed to check for pending interrupt */
 
-#define TICK_SIZE 10000
-
-/* This is always executed with interrupts disabled.  */
-u32 atari_gettimeoffset(void)
+static u64 atari_read_clk(struct clocksource *cs)
 {
-  u32 ticks, offset = 0;
-
-  /* read MFP timer C current value */
-  ticks = st_mfp.tim_dt_c;
-  /* The probability of underflow is less than 2% */
-  if (ticks > INT_TICKS - INT_TICKS / 50)
-    /* Check for pending timer interrupt */
-    if (st_mfp.int_pn_b & (1 << 5))
-      offset = TICK_SIZE;
+	unsigned long flags;
+	u8 count;
+	u32 ticks;
 
-  ticks = INT_TICKS - ticks;
-  ticks = ticks * 10000L / INT_TICKS;
+	local_irq_save(flags);
+	/* Ensure that the count is monotonically decreasing, even though
+	 * the result may briefly stop changing after counter wrap-around.
+	 */
+	count = min(st_mfp.tim_dt_c, last_timer_count);
+	last_timer_count = count;
+
+	ticks = INT_TICKS - count;
+	ticks += clk_total;
+	local_irq_restore(flags);
 
-  return (ticks + offset) * 1000;
+	return ticks;
 }
 
 
-- 
GitLab


From 3384df06c131833ef5fbc117cccfe6aac331bdda Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0159/1861] m68k: bvme6000: Convert to clocksource API

Add a platform clocksource by adapting the existing arch_gettimeoffset
implementation.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/bvme6000/config.c | 57 ++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index c8b99d5516d14..8ebaabc931cd0 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/tty.h>
+#include <linux/clocksource.h>
 #include <linux/console.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
@@ -147,6 +148,21 @@ irqreturn_t bvme6000_abort_int (int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static u64 bvme6000_read_clk(struct clocksource *cs);
+
+static struct clocksource bvme6000_clk = {
+	.name   = "rtc",
+	.rating = 250,
+	.read   = bvme6000_read_clk,
+	.mask   = CLOCKSOURCE_MASK(32),
+	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total, clk_offset;
+
+#define RTC_TIMER_CLOCK_FREQ 8000000
+#define RTC_TIMER_CYCLES     (RTC_TIMER_CLOCK_FREQ / HZ)
+#define RTC_TIMER_COUNT      ((RTC_TIMER_CYCLES / 2) - 1)
 
 static irqreturn_t bvme6000_timer_int (int irq, void *dev_id)
 {
@@ -158,6 +174,8 @@ static irqreturn_t bvme6000_timer_int (int irq, void *dev_id)
     local_irq_save(flags);
     msr = rtc->msr & 0xc0;
     rtc->msr = msr | 0x20;		/* Ack the interrupt */
+    clk_total += RTC_TIMER_CYCLES;
+    clk_offset = 0;
     timer_routine(0, NULL);
     local_irq_restore(flags);
 
@@ -180,13 +198,13 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
 
     rtc->msr = 0;	/* Ensure timer registers accessible */
 
-    if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0, "timer",
+    if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, IRQF_TIMER, "timer",
                     timer_routine))
 	panic ("Couldn't register timer int");
 
     rtc->t1cr_omr = 0x04;	/* Mode 2, ext clk */
-    rtc->t1msb = 39999 >> 8;
-    rtc->t1lsb = 39999 & 0xff;
+    rtc->t1msb = RTC_TIMER_COUNT >> 8;
+    rtc->t1lsb = RTC_TIMER_COUNT & 0xff;
     rtc->irr_icr1 &= 0xef;	/* Route timer 1 to INTR pin */
     rtc->msr = 0x40;		/* Access int.cntrl, etc */
     rtc->pfr_icr0 = 0x80;	/* Just timer 1 ints enabled */
@@ -198,14 +216,14 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
 
     rtc->msr = msr;
 
+    clocksource_register_hz(&bvme6000_clk, RTC_TIMER_CLOCK_FREQ);
+
     if (request_irq(BVME_IRQ_ABORT, bvme6000_abort_int, 0,
 				"abort", bvme6000_abort_int))
 	panic ("Couldn't register abort int");
 }
 
 
-/* This is always executed with interrupts disabled.  */
-
 /*
  * NOTE:  Don't accept any readings within 5us of rollover, as
  * the T1INT bit may be a little slow getting set.  There is also
@@ -213,14 +231,18 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
  * results...
  */
 
-u32 bvme6000_gettimeoffset(void)
+static u64 bvme6000_read_clk(struct clocksource *cs)
 {
+    unsigned long flags;
     volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE;
     volatile PitRegsPtr pit = (PitRegsPtr)BVME_PIT_BASE;
-    unsigned char msr = rtc->msr & 0xc0;
+    unsigned char msr, msb;
     unsigned char t1int, t1op;
     u32 v = 800000, ov;
 
+    local_irq_save(flags);
+
+    msr = rtc->msr & 0xc0;
     rtc->msr = 0;	/* Ensure timer registers accessible */
 
     do {
@@ -228,22 +250,25 @@ u32 bvme6000_gettimeoffset(void)
 	t1int = rtc->msr & 0x20;
 	t1op  = pit->pcdr & 0x04;
 	rtc->t1cr_omr |= 0x40;		/* Latch timer1 */
-	v = rtc->t1msb << 8;		/* Read timer1 */
-	v |= rtc->t1lsb;		/* Read timer1 */
+	msb = rtc->t1msb;		/* Read timer1 */
+	v = (msb << 8) | rtc->t1lsb;	/* Read timer1 */
     } while (t1int != (rtc->msr & 0x20) ||
 		t1op != (pit->pcdr & 0x04) ||
 			abs(ov-v) > 80 ||
-				v > 39960);
+				v > RTC_TIMER_COUNT - (RTC_TIMER_COUNT / 100));
 
-    v = 39999 - v;
+    v = RTC_TIMER_COUNT - v;
     if (!t1op)				/* If in second half cycle.. */
-	v += 40000;
-    v /= 8;				/* Convert ticks to microseconds */
-    if (t1int)
-	v += 10000;			/* Int pending, + 10ms */
+	v += RTC_TIMER_CYCLES / 2;
+    if (msb > 0 && t1int)
+	clk_offset = RTC_TIMER_CYCLES;
     rtc->msr = msr;
 
-    return v * 1000;
+    v += clk_offset + clk_total;
+
+    local_irq_restore(flags);
+
+    return v;
 }
 
 /*
-- 
GitLab


From 2ed16626f58f88892864ed0cd68f841872ed1ca1 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0160/1861] m68k: hp300: Convert to clocksource API

Add a platform clocksource by adapting the existing arch_gettimeoffset
implementation.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/hp300/time.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/m68k/hp300/time.c b/arch/m68k/hp300/time.c
index d30b03ea93a27..90982803a6290 100644
--- a/arch/m68k/hp300/time.c
+++ b/arch/m68k/hp300/time.c
@@ -8,6 +8,7 @@
  */
 
 #include <asm/ptrace.h>
+#include <linux/clocksource.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -19,6 +20,18 @@
 #include <asm/traps.h>
 #include <asm/blinken.h>
 
+static u64 hp300_read_clk(struct clocksource *cs);
+
+static struct clocksource hp300_clk = {
+	.name   = "timer",
+	.rating = 250,
+	.read   = hp300_read_clk,
+	.mask   = CLOCKSOURCE_MASK(32),
+	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+
 /* Clock hardware definitions */
 
 #define CLOCKBASE	0xf05f8000
@@ -32,9 +45,10 @@
 #define	CLKMSB3		0xD
 
 /* This is for machines which generate the exact clock. */
-#define USECS_PER_JIFFY (1000000/HZ)
 
-#define INTVAL ((10000 / 4) - 1)
+#define HP300_TIMER_CLOCK_FREQ 250000
+#define HP300_TIMER_CYCLES     (HP300_TIMER_CLOCK_FREQ / HZ)
+#define INTVAL                 (HP300_TIMER_CYCLES - 1)
 
 static irqreturn_t hp300_tick(int irq, void *dev_id)
 {
@@ -45,6 +59,7 @@ static irqreturn_t hp300_tick(int irq, void *dev_id)
 	local_irq_save(flags);
 	in_8(CLOCKBASE + CLKSR);
 	asm volatile ("movpw %1@(5),%0" : "=d" (tmp) : "a" (CLOCKBASE));
+	clk_total += INTVAL;
 	timer_routine(0, NULL);
 	local_irq_restore(flags);
 
@@ -53,20 +68,26 @@ static irqreturn_t hp300_tick(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-u32 hp300_gettimeoffset(void)
+static u64 hp300_read_clk(struct clocksource *cs)
 {
-  /* Read current timer 1 value */
+  unsigned long flags;
   unsigned char lsb, msb1, msb2;
-  unsigned short ticks;
+  u32 ticks;
 
+  local_irq_save(flags);
+  /* Read current timer 1 value */
   msb1 = in_8(CLOCKBASE + 5);
   lsb = in_8(CLOCKBASE + 7);
   msb2 = in_8(CLOCKBASE + 5);
   if (msb1 != msb2)
     /* A carry happened while we were reading.  Read it again */
     lsb = in_8(CLOCKBASE + 7);
+
   ticks = INTVAL - ((msb2 << 8) | lsb);
-  return ((USECS_PER_JIFFY * ticks) / INTVAL) * 1000;
+  ticks += clk_total;
+  local_irq_restore(flags);
+
+  return ticks;
 }
 
 void __init hp300_sched_init(irq_handler_t vector)
@@ -76,9 +97,11 @@ void __init hp300_sched_init(irq_handler_t vector)
 
   asm volatile(" movpw %0,%1@(5)" : : "d" (INTVAL), "a" (CLOCKBASE));
 
-  if (request_irq(IRQ_AUTO_6, hp300_tick, 0, "timer tick", vector))
+  if (request_irq(IRQ_AUTO_6, hp300_tick, IRQF_TIMER, "timer tick", vector))
     pr_err("Couldn't register timer interrupt\n");
 
   out_8(CLOCKBASE + CLKCR2, 0x1);		/* select CR1 */
   out_8(CLOCKBASE + CLKCR1, 0x40);		/* enable irq */
+
+  clocksource_register_hz(&hp300_clk, HP300_TIMER_CLOCK_FREQ);
 }
-- 
GitLab


From 4be2ba93cf2e7a7639e13605aebe0cd656ef9d93 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0161/1861] m68k: hp300: Handle timer counter overflow

Because hp300_read_clk() never checks the timer interrupt flag it may
fail to notice that the timer has wrapped, allowing the clock to jump
backwards. This is not a new problem.

This is resolved by checking the interrupt flag and, if need be,
taking wrap-around into account. The interrupt handler clears the flag
when it eventually executes.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/hp300/time.c | 46 +++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/arch/m68k/hp300/time.c b/arch/m68k/hp300/time.c
index 90982803a6290..bfee13e1d0fe1 100644
--- a/arch/m68k/hp300/time.c
+++ b/arch/m68k/hp300/time.c
@@ -30,7 +30,7 @@ static struct clocksource hp300_clk = {
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static u32 clk_total;
+static u32 clk_total, clk_offset;
 
 /* Clock hardware definitions */
 
@@ -41,9 +41,12 @@ static u32 clk_total;
 #define	CLKCR3		CLKCR1
 #define	CLKSR		CLKCR2
 #define	CLKMSB1		0x5
+#define	CLKLSB1		0x7
 #define	CLKMSB2		0x9
 #define	CLKMSB3		0xD
 
+#define	CLKSR_INT1	BIT(0)
+
 /* This is for machines which generate the exact clock. */
 
 #define HP300_TIMER_CLOCK_FREQ 250000
@@ -60,6 +63,7 @@ static irqreturn_t hp300_tick(int irq, void *dev_id)
 	in_8(CLOCKBASE + CLKSR);
 	asm volatile ("movpw %1@(5),%0" : "=d" (tmp) : "a" (CLOCKBASE));
 	clk_total += INTVAL;
+	clk_offset = 0;
 	timer_routine(0, NULL);
 	local_irq_restore(flags);
 
@@ -70,24 +74,28 @@ static irqreturn_t hp300_tick(int irq, void *dev_id)
 
 static u64 hp300_read_clk(struct clocksource *cs)
 {
-  unsigned long flags;
-  unsigned char lsb, msb1, msb2;
-  u32 ticks;
-
-  local_irq_save(flags);
-  /* Read current timer 1 value */
-  msb1 = in_8(CLOCKBASE + 5);
-  lsb = in_8(CLOCKBASE + 7);
-  msb2 = in_8(CLOCKBASE + 5);
-  if (msb1 != msb2)
-    /* A carry happened while we were reading.  Read it again */
-    lsb = in_8(CLOCKBASE + 7);
-
-  ticks = INTVAL - ((msb2 << 8) | lsb);
-  ticks += clk_total;
-  local_irq_restore(flags);
-
-  return ticks;
+	unsigned long flags;
+	unsigned char lsb, msb, msb_new;
+	u32 ticks;
+
+	local_irq_save(flags);
+	/* Read current timer 1 value */
+	msb = in_8(CLOCKBASE + CLKMSB1);
+again:
+	if ((in_8(CLOCKBASE + CLKSR) & CLKSR_INT1) && msb > 0)
+		clk_offset = INTVAL;
+	lsb = in_8(CLOCKBASE + CLKLSB1);
+	msb_new = in_8(CLOCKBASE + CLKMSB1);
+	if (msb_new != msb) {
+		msb = msb_new;
+		goto again;
+	}
+
+	ticks = INTVAL - ((msb << 8) | lsb);
+	ticks += clk_offset + clk_total;
+	local_irq_restore(flags);
+
+	return ticks;
 }
 
 void __init hp300_sched_init(irq_handler_t vector)
-- 
GitLab


From 481fa139b893ff3fdbab6ea1b98271f6dde11135 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0162/1861] m68k: mac: Convert to clocksource API

Add a platform clocksource by adapting the existing arch_gettimeoffset
implementation.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mac/via.c | 45 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 8b88489987515..3c2cfcb749825 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -23,6 +23,7 @@
  *
  */
 
+#include <linux/clocksource.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -577,16 +578,39 @@ EXPORT_SYMBOL(via2_scsi_drq_pending);
 /* timer and clock source */
 
 #define VIA_CLOCK_FREQ     783360                /* VIA "phase 2" clock in Hz */
-#define VIA_TIMER_INTERVAL (1000000 / HZ)        /* microseconds per jiffy */
 #define VIA_TIMER_CYCLES   (VIA_CLOCK_FREQ / HZ) /* clock cycles per jiffy */
 
 #define VIA_TC             (VIA_TIMER_CYCLES - 2) /* including 0 and -1 */
 #define VIA_TC_LOW         (VIA_TC & 0xFF)
 #define VIA_TC_HIGH        (VIA_TC >> 8)
 
+static u64 mac_read_clk(struct clocksource *cs);
+
+static struct clocksource mac_clk = {
+	.name   = "via1",
+	.rating = 250,
+	.read   = mac_read_clk,
+	.mask   = CLOCKSOURCE_MASK(32),
+	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total, clk_offset;
+
+static irqreturn_t via_timer_handler(int irq, void *dev_id)
+{
+	irq_handler_t timer_routine = dev_id;
+
+	clk_total += VIA_TIMER_CYCLES;
+	clk_offset = 0;
+	timer_routine(0, NULL);
+
+	return IRQ_HANDLED;
+}
+
 void __init via_init_clock(irq_handler_t timer_routine)
 {
-	if (request_irq(IRQ_MAC_TIMER_1, timer_routine, 0, "timer", NULL)) {
+	if (request_irq(IRQ_MAC_TIMER_1, via_timer_handler, IRQF_TIMER, "timer",
+			timer_routine)) {
 		pr_err("Couldn't register %s interrupt\n", "timer");
 		return;
 	}
@@ -596,13 +620,16 @@ void __init via_init_clock(irq_handler_t timer_routine)
 	via1[vT1CL] = VIA_TC_LOW;
 	via1[vT1CH] = VIA_TC_HIGH;
 	via1[vACR] |= 0x40;
+
+	clocksource_register_hz(&mac_clk, VIA_CLOCK_FREQ);
 }
 
-u32 mac_gettimeoffset(void)
+static u64 mac_read_clk(struct clocksource *cs)
 {
 	unsigned long flags;
 	u8 count_high;
-	u16 count, offset = 0;
+	u16 count;
+	u32 ticks;
 
 	/*
 	 * Timer counter wrap-around is detected with the timer interrupt flag
@@ -618,11 +645,11 @@ u32 mac_gettimeoffset(void)
 	if (count_high == 0xFF)
 		count_high = 0;
 	if (count_high > 0 && (via1[vIFR] & VIA_TIMER_1_INT))
-		offset = VIA_TIMER_CYCLES;
-	local_irq_restore(flags);
-
+		clk_offset = VIA_TIMER_CYCLES;
 	count = count_high << 8;
-	count = VIA_TIMER_CYCLES - count + offset;
+	ticks = VIA_TIMER_CYCLES - count;
+	ticks += clk_offset + clk_total;
+	local_irq_restore(flags);
 
-	return ((count * VIA_TIMER_INTERVAL) / VIA_TIMER_CYCLES) * 1000;
+	return ticks;
 }
-- 
GitLab


From fc4c47b3b5b8fe2b5d2940b9bd6203609b571b50 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0163/1861] m68k: mvme147: Convert to clocksource API

Add a platform clocksource by adapting the existing arch_gettimeoffset
implementation.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/mvme147hw.h |  1 -
 arch/m68k/mvme147/config.c        | 36 +++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/m68k/include/asm/mvme147hw.h b/arch/m68k/include/asm/mvme147hw.h
index 9c7ff67c5ffd6..7c3dd513128e3 100644
--- a/arch/m68k/include/asm/mvme147hw.h
+++ b/arch/m68k/include/asm/mvme147hw.h
@@ -66,7 +66,6 @@ struct pcc_regs {
 #define PCC_INT_ENAB		0x08
 
 #define PCC_TIMER_INT_CLR	0x80
-#define PCC_TIMER_PRELOAD	63936l
 
 #define PCC_LEVEL_ABORT		0x07
 #define PCC_LEVEL_SERIAL	0x04
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 4ef4faa5ed8b8..c44a254e8a8c3 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/tty.h>
+#include <linux/clocksource.h>
 #include <linux/console.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
@@ -92,6 +93,21 @@ void __init config_mvme147(void)
 		vme_brdtype = VME_TYPE_MVME147;
 }
 
+static u64 mvme147_read_clk(struct clocksource *cs);
+
+static struct clocksource mvme147_clk = {
+	.name   = "pcc",
+	.rating = 250,
+	.read   = mvme147_read_clk,
+	.mask   = CLOCKSOURCE_MASK(32),
+	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+
+#define PCC_TIMER_CLOCK_FREQ 160000
+#define PCC_TIMER_CYCLES     (PCC_TIMER_CLOCK_FREQ / HZ)
+#define PCC_TIMER_PRELOAD    (0x10000 - PCC_TIMER_CYCLES)
 
 /* Using pcc tick timer 1 */
 
@@ -103,6 +119,7 @@ static irqreturn_t mvme147_timer_int (int irq, void *dev_id)
 	local_irq_save(flags);
 	m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR;
 	m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
+	clk_total += PCC_TIMER_CYCLES;
 	timer_routine(0, NULL);
 	local_irq_restore(flags);
 
@@ -112,32 +129,39 @@ static irqreturn_t mvme147_timer_int (int irq, void *dev_id)
 
 void mvme147_sched_init (irq_handler_t timer_routine)
 {
-	if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1",
-			timer_routine))
+	if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, IRQF_TIMER,
+			"timer 1", timer_routine))
 		pr_err("Couldn't register timer interrupt\n");
 
 	/* Init the clock with a value */
-	/* our clock goes off every 6.25us */
+	/* The clock counter increments until 0xFFFF then reloads */
 	m147_pcc->t1_preload = PCC_TIMER_PRELOAD;
 	m147_pcc->t1_cntrl = 0x0;	/* clear timer */
 	m147_pcc->t1_cntrl = 0x3;	/* start timer */
 	m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR;  /* clear pending ints */
 	m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
+
+	clocksource_register_hz(&mvme147_clk, PCC_TIMER_CLOCK_FREQ);
 }
 
-/* This is always executed with interrupts disabled.  */
 /* XXX There are race hazards in this code XXX */
-u32 mvme147_gettimeoffset(void)
+static u64 mvme147_read_clk(struct clocksource *cs)
 {
+	unsigned long flags;
 	volatile unsigned short *cp = (volatile unsigned short *)0xfffe1012;
 	unsigned short n;
+	u32 ticks;
 
+	local_irq_save(flags);
 	n = *cp;
 	while (n != *cp)
 		n = *cp;
 
 	n -= PCC_TIMER_PRELOAD;
-	return ((unsigned long)n * 25 / 4) * 1000;
+	ticks = clk_total + n;
+	local_irq_restore(flags);
+
+	return ticks;
 }
 
 static int bcd2int (unsigned char b)
-- 
GitLab


From 7529b90d051e4629884771ba2b1d3a87d2c6a9d7 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0164/1861] m68k: mvme147: Handle timer counter overflow

Reading the timer counter races with timer overflow (and the
corresponding interrupt). This is resolved by reading the overflow
register and taking this value into account. The interrupt handler
must clear the overflow register when it eventually executes.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/mvme147hw.h |  1 +
 arch/m68k/mvme147/config.c        | 21 +++++++++++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/m68k/include/asm/mvme147hw.h b/arch/m68k/include/asm/mvme147hw.h
index 7c3dd513128e3..257b29184af91 100644
--- a/arch/m68k/include/asm/mvme147hw.h
+++ b/arch/m68k/include/asm/mvme147hw.h
@@ -66,6 +66,7 @@ struct pcc_regs {
 #define PCC_INT_ENAB		0x08
 
 #define PCC_TIMER_INT_CLR	0x80
+#define PCC_TIMER_CLR_OVF	0x04
 
 #define PCC_LEVEL_ABORT		0x07
 #define PCC_LEVEL_SERIAL	0x04
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index c44a254e8a8c3..545a1fe0e1194 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -118,7 +118,7 @@ static irqreturn_t mvme147_timer_int (int irq, void *dev_id)
 
 	local_irq_save(flags);
 	m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR;
-	m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
+	m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF;
 	clk_total += PCC_TIMER_CYCLES;
 	timer_routine(0, NULL);
 	local_irq_restore(flags);
@@ -144,21 +144,22 @@ void mvme147_sched_init (irq_handler_t timer_routine)
 	clocksource_register_hz(&mvme147_clk, PCC_TIMER_CLOCK_FREQ);
 }
 
-/* XXX There are race hazards in this code XXX */
 static u64 mvme147_read_clk(struct clocksource *cs)
 {
 	unsigned long flags;
-	volatile unsigned short *cp = (volatile unsigned short *)0xfffe1012;
-	unsigned short n;
+	u8 overflow, tmp;
+	u16 count;
 	u32 ticks;
 
 	local_irq_save(flags);
-	n = *cp;
-	while (n != *cp)
-		n = *cp;
-
-	n -= PCC_TIMER_PRELOAD;
-	ticks = clk_total + n;
+	tmp = m147_pcc->t1_cntrl >> 4;
+	count = m147_pcc->t1_count;
+	overflow = m147_pcc->t1_cntrl >> 4;
+	if (overflow != tmp)
+		count = m147_pcc->t1_count;
+	count -= PCC_TIMER_PRELOAD;
+	ticks = count + overflow * PCC_TIMER_CYCLES;
+	ticks += clk_total;
 	local_irq_restore(flags);
 
 	return ticks;
-- 
GitLab


From 3d744eee38f11cafb0fc332c3081ab1cd07a89f7 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0165/1861] m68k: mvme16x: Convert to clocksource API

Add a platform clocksource by adapting the existing arch_gettimeoffset
implementation.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mvme16x/config.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 3a3129e6f0bca..2c109ee2a1a5c 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/tty.h>
+#include <linux/clocksource.h>
 #include <linux/console.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
@@ -343,6 +344,21 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static u64 mvme16x_read_clk(struct clocksource *cs);
+
+static struct clocksource mvme16x_clk = {
+	.name   = "pcc",
+	.rating = 250,
+	.read   = mvme16x_read_clk,
+	.mask   = CLOCKSOURCE_MASK(32),
+	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+
+#define PCC_TIMER_CLOCK_FREQ 1000000
+#define PCC_TIMER_CYCLES     (PCC_TIMER_CLOCK_FREQ / HZ)
+
 static irqreturn_t mvme16x_timer_int (int irq, void *dev_id)
 {
 	irq_handler_t timer_routine = dev_id;
@@ -350,6 +366,7 @@ static irqreturn_t mvme16x_timer_int (int irq, void *dev_id)
 
 	local_irq_save(flags);
 	*(volatile unsigned char *)0xfff4201b |= 8;
+	clk_total += PCC_TIMER_CYCLES;
 	timer_routine(0, NULL);
 	local_irq_restore(flags);
 
@@ -363,13 +380,15 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
 
     /* Using PCCchip2 or MC2 chip tick timer 1 */
     *(volatile unsigned long *)0xfff42008 = 0;
-    *(volatile unsigned long *)0xfff42004 = 10000;	/* 10ms */
+    *(volatile unsigned long *)0xfff42004 = PCC_TIMER_CYCLES;
     *(volatile unsigned char *)0xfff42017 |= 3;
     *(volatile unsigned char *)0xfff4201b = 0x16;
-    if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0, "timer",
+    if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, IRQF_TIMER, "timer",
                     timer_routine))
 	panic ("Couldn't register timer int");
 
+    clocksource_register_hz(&mvme16x_clk, PCC_TIMER_CLOCK_FREQ);
+
     if (brdno == 0x0162 || brdno == 0x172)
 	irq = MVME162_IRQ_ABORT;
     else
@@ -379,11 +398,17 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
 	panic ("Couldn't register abort int");
 }
 
-
-/* This is always executed with interrupts disabled.  */
-u32 mvme16x_gettimeoffset(void)
+static u64 mvme16x_read_clk(struct clocksource *cs)
 {
-    return (*(volatile u32 *)0xfff42008) * 1000;
+	unsigned long flags;
+	u32 ticks;
+
+	local_irq_save(flags);
+	ticks = *(volatile u32 *)0xfff42008;
+	ticks += clk_total;
+	local_irq_restore(flags);
+
+	return ticks;
 }
 
 int bcd2int (unsigned char b)
-- 
GitLab


From 19999a8b8782d7f887353753c3c7cb5fca2f3784 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 1 Dec 2018 11:53:10 +1100
Subject: [PATCH 0166/1861] m68k: mvme16x: Handle timer counter overflow

Reading the timer counter races with timer overflow (and the
corresponding interrupt). This is resolved by reading the overflow
register and taking this value into account. The interrupt handler
must clear the overflow register when it eventually executes.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mvme16x/config.c | 45 +++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 2c109ee2a1a5c..9bc2da69f80cb 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -115,11 +115,11 @@ static void __init mvme16x_init_IRQ (void)
 	m68k_setup_user_interrupt(VEC_USER, 192);
 }
 
-#define pcc2chip	((volatile u_char *)0xfff42000)
-#define PccSCCMICR	0x1d
-#define PccSCCTICR	0x1e
-#define PccSCCRICR	0x1f
-#define PccTPIACKR	0x25
+#define PCC2CHIP   (0xfff42000)
+#define PCCSCCMICR (PCC2CHIP + 0x1d)
+#define PCCSCCTICR (PCC2CHIP + 0x1e)
+#define PCCSCCRICR (PCC2CHIP + 0x1f)
+#define PCCTPIACKR (PCC2CHIP + 0x25)
 
 #ifdef CONFIG_EARLY_PRINTK
 
@@ -227,10 +227,10 @@ void mvme16x_cons_write(struct console *co, const char *str, unsigned count)
 	base_addr[CyIER] = CyTxMpty;
 
 	while (1) {
-		if (pcc2chip[PccSCCTICR] & 0x20)
+		if (in_8(PCCSCCTICR) & 0x20)
 		{
 			/* We have a Tx int. Acknowledge it */
-			sink = pcc2chip[PccTPIACKR];
+			sink = in_8(PCCTPIACKR);
 			if ((base_addr[CyLICR] >> 2) == port) {
 				if (i == count) {
 					/* Last char of string is now output */
@@ -359,13 +359,26 @@ static u32 clk_total;
 #define PCC_TIMER_CLOCK_FREQ 1000000
 #define PCC_TIMER_CYCLES     (PCC_TIMER_CLOCK_FREQ / HZ)
 
+#define PCCTCMP1             (PCC2CHIP + 0x04)
+#define PCCTCNT1             (PCC2CHIP + 0x08)
+#define PCCTOVR1             (PCC2CHIP + 0x17)
+#define PCCTIC1              (PCC2CHIP + 0x1b)
+
+#define PCCTOVR1_TIC_EN      0x01
+#define PCCTOVR1_COC_EN      0x02
+#define PCCTOVR1_OVR_CLR     0x04
+
+#define PCCTIC1_INT_CLR      0x08
+#define PCCTIC1_INT_EN       0x10
+
 static irqreturn_t mvme16x_timer_int (int irq, void *dev_id)
 {
 	irq_handler_t timer_routine = dev_id;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	*(volatile unsigned char *)0xfff4201b |= 8;
+	out_8(PCCTIC1, in_8(PCCTIC1) | PCCTIC1_INT_CLR);
+	out_8(PCCTOVR1, PCCTOVR1_OVR_CLR);
 	clk_total += PCC_TIMER_CYCLES;
 	timer_routine(0, NULL);
 	local_irq_restore(flags);
@@ -379,10 +392,10 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
     int irq;
 
     /* Using PCCchip2 or MC2 chip tick timer 1 */
-    *(volatile unsigned long *)0xfff42008 = 0;
-    *(volatile unsigned long *)0xfff42004 = PCC_TIMER_CYCLES;
-    *(volatile unsigned char *)0xfff42017 |= 3;
-    *(volatile unsigned char *)0xfff4201b = 0x16;
+    out_be32(PCCTCNT1, 0);
+    out_be32(PCCTCMP1, PCC_TIMER_CYCLES);
+    out_8(PCCTOVR1, in_8(PCCTOVR1) | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN);
+    out_8(PCCTIC1, PCCTIC1_INT_EN | 6);
     if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, IRQF_TIMER, "timer",
                     timer_routine))
 	panic ("Couldn't register timer int");
@@ -401,10 +414,16 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
 static u64 mvme16x_read_clk(struct clocksource *cs)
 {
 	unsigned long flags;
+	u8 overflow, tmp;
 	u32 ticks;
 
 	local_irq_save(flags);
-	ticks = *(volatile u32 *)0xfff42008;
+	tmp = in_8(PCCTOVR1) >> 4;
+	ticks = in_be32(PCCTCNT1);
+	overflow = in_8(PCCTOVR1) >> 4;
+	if (overflow != tmp)
+		ticks = in_be32(PCCTCNT1);
+	ticks += overflow * PCC_TIMER_CYCLES;
 	ticks += clk_total;
 	local_irq_restore(flags);
 
-- 
GitLab


From 733df11cfc3614254e2b9a5c81ac205e98a90b82 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 18 Mar 2019 08:41:48 +0100
Subject: [PATCH 0167/1861] m68k: defconfig: Update defconfigs for v5.1-rc1

Actual changes:
     -CONFIG_FS_ENCRYPTION=m
     -CONFIG_NET_DEVLINK=m
     -CONFIG_NFT_CHAIN_NAT_IPV4=m
     -CONFIG_NFT_CHAIN_NAT_IPV6=m
     -CONFIG_NFT_MASQ_IPV4=m
     -CONFIG_NFT_MASQ_IPV6=m
     -CONFIG_NFT_REDIR_IPV4=m
     -CONFIG_NFT_REDIR_IPV6=m
     +CONFIG_CRYPTO_CTS=m
     +CONFIG_CRYPTO_XTS=m
     +CONFIG_TEST_STACKINIT=m
     +CONFIG_TEST_VMALLOC=m
     +# CONFIG_VALIDATE_FS_PARSER is not set
     +CONFIG_XDP_SOCKETS_DIAG=m

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/amiga_defconfig    | 14 ++++++--------
 arch/m68k/configs/apollo_defconfig   | 14 ++++++--------
 arch/m68k/configs/atari_defconfig    | 14 ++++++--------
 arch/m68k/configs/bvme6000_defconfig | 14 ++++++--------
 arch/m68k/configs/hp300_defconfig    | 14 ++++++--------
 arch/m68k/configs/mac_defconfig      | 14 ++++++--------
 arch/m68k/configs/multi_defconfig    | 14 ++++++--------
 arch/m68k/configs/mvme147_defconfig  | 14 ++++++--------
 arch/m68k/configs/mvme16x_defconfig  | 14 ++++++--------
 arch/m68k/configs/q40_defconfig      | 14 ++++++--------
 arch/m68k/configs/sun3_defconfig     | 14 ++++++--------
 arch/m68k/configs/sun3x_defconfig    | 14 ++++++--------
 12 files changed, 72 insertions(+), 96 deletions(-)

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 525421ae277d6..fea392cfcf1be 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -56,6 +56,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -210,9 +211,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -234,9 +232,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -313,7 +308,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -460,12 +454,12 @@ CONFIG_RTC_DRV_RP5C01=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -573,9 +567,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -640,6 +636,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -649,4 +646,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index db0e654a88d53..2474d267460e9 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -52,6 +52,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -206,9 +207,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -230,9 +228,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -309,7 +304,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -420,12 +414,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -533,9 +527,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -600,6 +596,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -609,4 +606,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 1451168eb789b..0fc7d2992fe03 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -59,6 +59,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -213,9 +214,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -237,9 +235,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -316,7 +311,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -442,12 +436,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -555,9 +549,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -622,6 +618,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -631,4 +628,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index b0d3609f5bb3c..699df9fdf8668 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -49,6 +49,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -203,9 +204,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -227,9 +225,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -306,7 +301,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -413,12 +407,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -526,9 +520,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -593,6 +589,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -602,4 +599,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 4ed7c151347cd..b508022553247 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -51,6 +51,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -205,9 +206,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -229,9 +227,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -308,7 +303,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -422,12 +416,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -535,9 +529,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -602,6 +598,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -611,4 +608,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 0dc544e1ce1fb..04e7d70f6030e 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -50,6 +50,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -204,9 +205,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -228,9 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -310,7 +305,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -444,12 +438,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -557,9 +551,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -624,6 +620,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -633,4 +630,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 5a7b7b0d6e72e..5e1cc4c178522 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -70,6 +70,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -224,9 +225,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -248,9 +246,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -330,7 +325,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -526,12 +520,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -639,9 +633,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -706,6 +702,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -715,4 +712,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 71eb9be1803b1..170ac8792c2df 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -48,6 +48,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -202,9 +203,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -226,9 +224,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -305,7 +300,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -412,12 +406,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -525,9 +519,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -592,6 +588,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -601,4 +598,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index ea2ebd4241c03..d865592a423e3 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -49,6 +49,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -203,9 +204,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -227,9 +225,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -306,7 +301,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -413,12 +407,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -526,9 +520,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -593,6 +589,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -602,4 +599,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index cef6dc47c7250..034a9de904846 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -50,6 +50,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -204,9 +205,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -228,9 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -307,7 +302,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -431,12 +425,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -544,9 +538,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -611,6 +607,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -620,4 +617,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 69f2282dc4e9c..49be0f9fcd8dd 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -46,6 +46,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -200,9 +201,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -224,9 +222,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -303,7 +298,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -415,12 +409,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -528,9 +522,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -595,6 +591,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -604,3 +601,4 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index e91267e868b2d..a71acf4a6004c 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -46,6 +46,7 @@ CONFIG_TLS=m
 CONFIG_XFRM_MIGRATE=y
 CONFIG_NET_KEY=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
@@ -200,9 +201,6 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -224,9 +222,6 @@ CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
 CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -303,7 +298,6 @@ CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
 CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
@@ -414,12 +408,12 @@ CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
 CONFIG_JFS_FS=m
 CONFIG_OCFS2_FS=m
 # CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -527,9 +521,11 @@ CONFIG_CRYPTO_AEGIS256=m
 CONFIG_CRYPTO_MORUS640=m
 CONFIG_CRYPTO_MORUS1280=m
 CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_KEYWRAP=m
 CONFIG_CRYPTO_ADIANTUM=m
 CONFIG_CRYPTO_XCBC=m
@@ -594,6 +590,7 @@ CONFIG_TEST_OVERFLOW=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_HASH=m
 CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_FIND_BIT_BENCHMARK=m
@@ -603,4 +600,5 @@ CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_TEST_KMOD=m
 CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
 CONFIG_EARLY_PRINTK=y
-- 
GitLab


From 8f71370f4b02730e8c27faf460af7a3586e24e1f Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 22 Mar 2019 15:39:48 -0700
Subject: [PATCH 0168/1861] ASoC: intel: Fix crash at suspend/resume after
 failed codec registration

If codec registration fails after the ASoC Intel SST driver has been probed,
the kernel will Oops and crash at suspend/resume.

general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI
CPU: 1 PID: 2811 Comm: cat Tainted: G        W         4.19.30 #15
Hardware name: GOOGLE Clapper, BIOS Google_Clapper.5216.199.7 08/22/2014
RIP: 0010:snd_soc_suspend+0x5a/0xd21
Code: 03 80 3c 10 00 49 89 d7 74 0b 48 89 df e8 71 72 c4 fe 4c 89
fa 48 8b 03 48 89 45 d0 48 8d 98 a0 01 00 00 48 89 d8 48 c1 e8 03
<8a> 04 10 84 c0 0f 85 85 0c 00 00 80 3b 00 0f 84 6b 0c 00 00 48 8b
RSP: 0018:ffff888035407750 EFLAGS: 00010202
RAX: 0000000000000034 RBX: 00000000000001a0 RCX: 0000000000000000
RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff88805c417098
RBP: ffff8880354077b0 R08: dffffc0000000000 R09: ffffed100b975718
R10: 0000000000000001 R11: ffffffff949ea4a3 R12: 1ffff1100b975746
R13: dffffc0000000000 R14: ffff88805cba4588 R15: dffffc0000000000
FS:  0000794a78e91b80(0000) GS:ffff888068d00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007bd5283ccf58 CR3: 000000004b7aa000 CR4: 00000000001006e0
Call Trace:
? dpm_complete+0x67b/0x67b
? i915_gem_suspend+0x14d/0x1ad
sst_soc_prepare+0x91/0x1dd
? sst_be_hw_params+0x7e/0x7e
dpm_prepare+0x39a/0x88b
dpm_suspend_start+0x13/0x9d
suspend_devices_and_enter+0x18f/0xbd7
? arch_suspend_enable_irqs+0x11/0x11
? printk+0xd9/0x12d
? lock_release+0x95f/0x95f
? log_buf_vmcoreinfo_setup+0x131/0x131
? rcu_read_lock_sched_held+0x140/0x22a
? __bpf_trace_rcu_utilization+0xa/0xa
? __pm_pr_dbg+0x186/0x190
? pm_notifier_call_chain+0x39/0x39
? suspend_test+0x9d/0x9d
pm_suspend+0x2f4/0x728
? trace_suspend_resume+0x3da/0x3da
? lock_release+0x95f/0x95f
? kernfs_fop_write+0x19f/0x32d
state_store+0xd8/0x147
? sysfs_kf_read+0x155/0x155
kernfs_fop_write+0x23e/0x32d
__vfs_write+0x108/0x608
? vfs_read+0x2e9/0x2e9
? rcu_read_lock_sched_held+0x140/0x22a
? __bpf_trace_rcu_utilization+0xa/0xa
? debug_smp_processor_id+0x10/0x10
? selinux_file_permission+0x1c5/0x3c8
? rcu_sync_lockdep_assert+0x6a/0xad
? __sb_start_write+0x129/0x2ac
vfs_write+0x1aa/0x434
ksys_write+0xfe/0x1be
? __ia32_sys_read+0x82/0x82
do_syscall_64+0xcd/0x120
entry_SYSCALL_64_after_hwframe+0x49/0xbe

In the observed situation, the problem is seen because the codec driver
failed to probe due to a hardware problem.

max98090 i2c-193C9890:00: Failed to read device revision: -1
max98090 i2c-193C9890:00: ASoC: failed to probe component -1
cht-bsw-max98090 cht-bsw-max98090: ASoC: failed to instantiate card -1
cht-bsw-max98090 cht-bsw-max98090: snd_soc_register_card failed -1
cht-bsw-max98090: probe of cht-bsw-max98090 failed with error -1

The problem is similar to the problem solved with commit 2fc995a87f2e
("ASoC: intel: Fix crash at suspend/resume without card registration"),
but codec registration fails at a later point. At that time, the pointer
checked with the above mentioned commit is already set, but it is not
cleared if the device is subsequently removed. Adding a remove function
to clear the pointer fixes the problem.

Cc: stable@vger.kernel.org
Cc: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Cc: Curtis Malainey <cujomalainey@chromium.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/atom/sst-mfld-platform-pcm.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c
index 08cea5b5cda9f..0e8b1c5eec888 100644
--- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c
+++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c
@@ -706,9 +706,17 @@ static int sst_soc_probe(struct snd_soc_component *component)
 	return sst_dsp_init_v2_dpcm(component);
 }
 
+static void sst_soc_remove(struct snd_soc_component *component)
+{
+	struct sst_data *drv = dev_get_drvdata(component->dev);
+
+	drv->soc_card = NULL;
+}
+
 static const struct snd_soc_component_driver sst_soc_platform_drv  = {
 	.name		= DRV_NAME,
 	.probe		= sst_soc_probe,
+	.remove		= sst_soc_remove,
 	.ops		= &sst_platform_ops,
 	.compr_ops	= &sst_platform_compr_ops,
 	.pcm_new	= sst_pcm_new,
-- 
GitLab


From 836f90f9e2d11263f9c6d7610c82f3bc7335d9a6 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Fri, 15 Mar 2019 10:54:14 +0100
Subject: [PATCH 0169/1861] drm/bridge: dw-hdmi: disable SCDC configuration for
 invalid setups

This patch is an attempt to limit HDMI 2.0 SCDC setup when :
- the SoC embeds an HDMI 1.4 only controller
- the EDID supports SCDC but not scrambling
- the EDID supports SCDC scrambling but not for low TMDS bit rates,
  while only supporting low TMDS bit rates

This to avoid communicating with the SCDC DDC slave uncessary, and
setting the DW-HDMI TMDS Scrambler setup when not supported by the
underlying hardware.

Reported-by: Rob Herring <robh@kernel.org>
Fixes: 264fce6cc2c1 ("drm/bridge: dw-hdmi: Add SCDC and TMDS Scrambling support")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Rob Herring <robh@kernel.org>
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190315095414.28520-1-narmstrong@baylibre.com
---
 drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 34 ++++++++++++++++++++---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index a63e5f0dae56a..db761329a1e3e 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1037,6 +1037,31 @@ void dw_hdmi_phy_i2c_write(struct dw_hdmi *hdmi, unsigned short data,
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_phy_i2c_write);
 
+/* Filter out invalid setups to avoid configuring SCDC and scrambling */
+static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi)
+{
+	struct drm_display_info *display = &hdmi->connector.display_info;
+
+	/* Completely disable SCDC support for older controllers */
+	if (hdmi->version < 0x200a)
+		return false;
+
+	/* Disable if SCDC is not supported, or if an HF-VSDB block is absent */
+	if (!display->hdmi.scdc.supported ||
+	    !display->hdmi.scdc.scrambling.supported)
+		return false;
+
+	/*
+	 * Disable if display only support low TMDS rates and scrambling
+	 * for low rates is not supported either
+	 */
+	if (!display->hdmi.scdc.scrambling.low_rates &&
+	    display->max_tmds_clock <= 340000)
+		return false;
+
+	return true;
+}
+
 /*
  * HDMI2.0 Specifies the following procedure for High TMDS Bit Rates:
  * - The Source shall suspend transmission of the TMDS clock and data
@@ -1055,7 +1080,7 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi *hdmi)
 	unsigned long mtmdsclock = hdmi->hdmi_data.video_mode.mtmdsclock;
 
 	/* Control for TMDS Bit Period/TMDS Clock-Period Ratio */
-	if (hdmi->connector.display_info.hdmi.scdc.supported) {
+	if (dw_hdmi_support_scdc(hdmi)) {
 		if (mtmdsclock > HDMI14_MAX_TMDSCLK)
 			drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1);
 		else
@@ -1579,8 +1604,9 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
 
 	/* Set up HDMI_FC_INVIDCONF */
 	inv_val = (hdmi->hdmi_data.hdcp_enable ||
-		   vmode->mtmdsclock > HDMI14_MAX_TMDSCLK ||
-		   hdmi_info->scdc.scrambling.low_rates ?
+		   (dw_hdmi_support_scdc(hdmi) &&
+		    (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK ||
+		     hdmi_info->scdc.scrambling.low_rates)) ?
 		HDMI_FC_INVIDCONF_HDCP_KEEPOUT_ACTIVE :
 		HDMI_FC_INVIDCONF_HDCP_KEEPOUT_INACTIVE);
 
@@ -1646,7 +1672,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
 	}
 
 	/* Scrambling Control */
-	if (hdmi_info->scdc.supported) {
+	if (dw_hdmi_support_scdc(hdmi)) {
 		if (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK ||
 		    hdmi_info->scdc.scrambling.low_rates) {
 			/*
-- 
GitLab


From d6f987c8462ab97591fbd6ed6bea6df61d2919e5 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 24 Mar 2019 17:43:27 +0100
Subject: [PATCH 0170/1861] clk: meson: pll: fix rounding and setting a rate
 that matches precisely

Make meson_clk_pll_is_better() consider a rate that precisely matches
the requested rate to be better than any previous rate (which was
smaller than the current).

Prior to commit 8eed1db1adec6a ("clk: meson: pll: update driver for the
g12a") meson_clk_get_pll_settings() returned early (before calling
meson_clk_pll_is_better()) if the rate from the current iteration
matches the requested rate precisely. After this commit
meson_clk_pll_is_better() is called unconditionally. This requires
meson_clk_pll_is_better() to work with the case where "now == rate".

This fixes a hang during boot on Meson8b / Odroid-C1 for me.

Fixes: 8eed1db1adec6a ("clk: meson: pll: update driver for the g12a")
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lkml.kernel.org/r/20190324164327.22590-2-martin.blumenstingl@googlemail.com
---
 drivers/clk/meson/clk-pll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/clk-pll.c b/drivers/clk/meson/clk-pll.c
index 41e16dd7272a5..7a14ac9b2fecf 100644
--- a/drivers/clk/meson/clk-pll.c
+++ b/drivers/clk/meson/clk-pll.c
@@ -120,7 +120,7 @@ static bool meson_clk_pll_is_better(unsigned long rate,
 			return true;
 	} else {
 		/* Round down */
-		if (now < rate && best < now)
+		if (now <= rate && best < now)
 			return true;
 	}
 
-- 
GitLab


From cacea3a90e211f0c111975535508d446a4a928d2 Mon Sep 17 00:00:00 2001
From: Pankaj Bharadiya <pankaj.laxminarayan.bharadiya@intel.com>
Date: Fri, 22 Mar 2019 18:00:09 +0530
Subject: [PATCH 0171/1861] ASoC: dapm: Fix NULL pointer dereference in
 snd_soc_dapm_free_kcontrol

w_text_param can be NULL and it is being dereferenced without checking.
Add the missing sanity check to prevent  NULL pointer dereference.

Signed-off-by: Pankaj Bharadiya <pankaj.laxminarayan.bharadiya@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-dapm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 1ec06ef6d1616..67b032ca1601f 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3957,6 +3957,10 @@ snd_soc_dapm_free_kcontrol(struct snd_soc_card *card,
 	int count;
 
 	devm_kfree(card->dev, (void *)*private_value);
+
+	if (!w_param_text)
+		return;
+
 	for (count = 0 ; count < num_params; count++)
 		devm_kfree(card->dev, (void *)w_param_text[count]);
 	devm_kfree(card->dev, w_param_text);
-- 
GitLab


From d040e4e8deeaa8257d6aa260e29ad69832b5d630 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Wed, 20 Mar 2019 13:14:00 -0700
Subject: [PATCH 0172/1861] ARM: dts: rockchip: Fix gpu opp node names for
 rk3288

The device tree compiler yells like this:
  Warning (unit_address_vs_reg):
  /gpu-opp-table/opp@100000000:
  node has a unit name, but no reg property

Let's match the cpu opp node names and use a dash.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm/boot/dts/rk3288.dtsi | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 09868dcee34b9..df0c5456c94fd 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -1282,27 +1282,27 @@
 	gpu_opp_table: gpu-opp-table {
 		compatible = "operating-points-v2";
 
-		opp@100000000 {
+		opp-100000000 {
 			opp-hz = /bits/ 64 <100000000>;
 			opp-microvolt = <950000>;
 		};
-		opp@200000000 {
+		opp-200000000 {
 			opp-hz = /bits/ 64 <200000000>;
 			opp-microvolt = <950000>;
 		};
-		opp@300000000 {
+		opp-300000000 {
 			opp-hz = /bits/ 64 <300000000>;
 			opp-microvolt = <1000000>;
 		};
-		opp@400000000 {
+		opp-400000000 {
 			opp-hz = /bits/ 64 <400000000>;
 			opp-microvolt = <1100000>;
 		};
-		opp@500000000 {
+		opp-500000000 {
 			opp-hz = /bits/ 64 <500000000>;
 			opp-microvolt = <1200000>;
 		};
-		opp@600000000 {
+		opp-600000000 {
 			opp-hz = /bits/ 64 <600000000>;
 			opp-microvolt = <1250000>;
 		};
-- 
GitLab


From 282e2e078ba5338c72150477b743794bc7523917 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Wed, 20 Mar 2019 13:14:01 -0700
Subject: [PATCH 0173/1861] ARM: dts: rockchip: Remove #address/#size-cells
 from rk3288 mipi_dsi

They are pointless.  As dtc points out:
  Warning (avoid_unnecessary_addr_size):
  /mipi@ff960000:
  unnecessary #address-cells/#size-cells without "ranges" or child "reg" property

Let's remove them.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm/boot/dts/rk3288.dtsi | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index df0c5456c94fd..a024d1e7e74cd 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -1119,8 +1119,6 @@
 		clock-names = "ref", "pclk";
 		power-domains = <&power RK3288_PD_VIO>;
 		rockchip,grf = <&grf>;
-		#address-cells = <1>;
-		#size-cells = <0>;
 		status = "disabled";
 
 		ports {
-- 
GitLab


From 1a96665143c355b1019ed13b927266185d2a1e4f Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Wed, 20 Mar 2019 13:14:02 -0700
Subject: [PATCH 0174/1861] ARM: dts: rockchip: Remove #address/#size-cells
 from rk3288-veyron gpio-keys

They are pointless.  As dtc points out:
  Warning (avoid_unnecessary_addr_size):
  /gpio-keys:
  unnecessary #address-cells/#size-cells without "ranges" or child "reg" property

Let's remove them.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm/boot/dts/rk3288-veyron.dtsi | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 0bc2409f6903f..192dbc089ade1 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
@@ -25,8 +25,6 @@
 
 	gpio_keys: gpio-keys {
 		compatible = "gpio-keys";
-		#address-cells = <1>;
-		#size-cells = <0>;
 
 		pinctrl-names = "default";
 		pinctrl-0 = <&pwr_key_l>;
-- 
GitLab


From a6256b3a92cbaf3f5ff034ce09d5665607e2d7a4 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 4 Mar 2019 11:49:16 +0100
Subject: [PATCH 0175/1861] dt-bindings: reset: meson-g12a: Add missing USB2
 PHY resets

The G12A Documentation lacked these 2 reset lines, but they are present and
used for each USB 2 PHYs.

Add them to the dt-bindings for the upcoming USB support.

Fixes: dbfc54534dfc ("dt-bindings: reset: meson: add g12a bindings")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/dt-bindings/reset/amlogic,meson-g12a-reset.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
index 8063e8314eefb..6d487c5eba2ca 100644
--- a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
+++ b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
@@ -51,7 +51,10 @@
 #define RESET_SD_EMMC_A			44
 #define RESET_SD_EMMC_B			45
 #define RESET_SD_EMMC_C			46
-/*					47-60 */
+/*					47	*/
+#define RESET_USB_PHY20			48
+#define RESET_USB_PHY21			49
+/*					50-60	*/
 #define RESET_AUDIO_CODEC		61
 /*					62-63	*/
 /*	RESET2					*/
-- 
GitLab


From 13e8a05b922457761ddef39cfff6231bd4ed9eef Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 18 Mar 2019 22:03:52 +0800
Subject: [PATCH 0176/1861] reset: meson-audio-arb: Fix missing .owner setting
 of reset_controller_dev

Set .owner to prevent module unloading while being used.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Fixes: d903779b58be ("reset: meson: add meson audio arb driver")
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/reset/reset-meson-audio-arb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/reset/reset-meson-audio-arb.c b/drivers/reset/reset-meson-audio-arb.c
index 91751617b37af..c53a2185a0393 100644
--- a/drivers/reset/reset-meson-audio-arb.c
+++ b/drivers/reset/reset-meson-audio-arb.c
@@ -130,6 +130,7 @@ static int meson_audio_arb_probe(struct platform_device *pdev)
 	arb->rstc.nr_resets = ARRAY_SIZE(axg_audio_arb_reset_bits);
 	arb->rstc.ops = &meson_audio_arb_rstc_ops;
 	arb->rstc.of_node = dev->of_node;
+	arb->rstc.owner = THIS_MODULE;
 
 	/*
 	 * Enable general :
-- 
GitLab


From 1da6c4d9140cb7c13e87667dc4e1488d6c8fc10f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 25 Mar 2019 15:54:43 +0100
Subject: [PATCH 0177/1861] bpf: fix use after free in bpf_evict_inode

syzkaller was able to generate the following UAF in bpf:

  BUG: KASAN: use-after-free in lookup_last fs/namei.c:2269 [inline]
  BUG: KASAN: use-after-free in path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318
  Read of size 1 at addr ffff8801c4865c47 by task syz-executor2/9423

  CPU: 0 PID: 9423 Comm: syz-executor2 Not tainted 4.20.0-rc1-next-20181109+
  #110
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
  Google 01/01/2011
  Call Trace:
    __dump_stack lib/dump_stack.c:77 [inline]
    dump_stack+0x244/0x39d lib/dump_stack.c:113
    print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256
    kasan_report_error mm/kasan/report.c:354 [inline]
    kasan_report.cold.8+0x242/0x309 mm/kasan/report.c:412
    __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430
    lookup_last fs/namei.c:2269 [inline]
    path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318
    filename_lookup+0x26a/0x520 fs/namei.c:2348
    user_path_at_empty+0x40/0x50 fs/namei.c:2608
    user_path include/linux/namei.h:62 [inline]
    do_mount+0x180/0x1ff0 fs/namespace.c:2980
    ksys_mount+0x12d/0x140 fs/namespace.c:3258
    __do_sys_mount fs/namespace.c:3272 [inline]
    __se_sys_mount fs/namespace.c:3269 [inline]
    __x64_sys_mount+0xbe/0x150 fs/namespace.c:3269
    do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe
  RIP: 0033:0x457569
  Code: fd b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7
  48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
  ff 0f 83 cb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00
  RSP: 002b:00007fde6ed96c78 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
  RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000457569
  RDX: 0000000020000040 RSI: 0000000020000000 RDI: 0000000000000000
  RBP: 000000000072bf00 R08: 0000000020000340 R09: 0000000000000000
  R10: 0000000000200000 R11: 0000000000000246 R12: 00007fde6ed976d4
  R13: 00000000004c2c24 R14: 00000000004d4990 R15: 00000000ffffffff

  Allocated by task 9424:
    save_stack+0x43/0xd0 mm/kasan/kasan.c:448
    set_track mm/kasan/kasan.c:460 [inline]
    kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553
    __do_kmalloc mm/slab.c:3722 [inline]
    __kmalloc_track_caller+0x157/0x760 mm/slab.c:3737
    kstrdup+0x39/0x70 mm/util.c:49
    bpf_symlink+0x26/0x140 kernel/bpf/inode.c:356
    vfs_symlink+0x37a/0x5d0 fs/namei.c:4127
    do_symlinkat+0x242/0x2d0 fs/namei.c:4154
    __do_sys_symlink fs/namei.c:4173 [inline]
    __se_sys_symlink fs/namei.c:4171 [inline]
    __x64_sys_symlink+0x59/0x80 fs/namei.c:4171
    do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe

  Freed by task 9425:
    save_stack+0x43/0xd0 mm/kasan/kasan.c:448
    set_track mm/kasan/kasan.c:460 [inline]
    __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521
    kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
    __cache_free mm/slab.c:3498 [inline]
    kfree+0xcf/0x230 mm/slab.c:3817
    bpf_evict_inode+0x11f/0x150 kernel/bpf/inode.c:565
    evict+0x4b9/0x980 fs/inode.c:558
    iput_final fs/inode.c:1550 [inline]
    iput+0x674/0xa90 fs/inode.c:1576
    do_unlinkat+0x733/0xa30 fs/namei.c:4069
    __do_sys_unlink fs/namei.c:4110 [inline]
    __se_sys_unlink fs/namei.c:4108 [inline]
    __x64_sys_unlink+0x42/0x50 fs/namei.c:4108
    do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
    entry_SYSCALL_64_after_hwframe+0x49/0xbe

In this scenario path lookup under RCU is racing with the final
unlink in case of symlinks. As Linus puts it in his analysis:

  [...] We actually RCU-delay the inode freeing itself, but
  when we do the final iput(), the "evict()" function is called
  synchronously. Now, the simple fix would seem to just RCU-delay
  the kfree() of the symlink data in bpf_evict_inode(). Maybe
  that's the right thing to do. [...]

Al suggested to piggy-back on the ->destroy_inode() callback in
order to implement RCU deferral there which can then kfree() the
inode->i_link eventually right before putting inode back into
inode cache. By reusing free_inode_nonrcu() from there we can
avoid the need for our own inode cache and just reuse generic
one as we currently do.

And in-fact on top of all this we should just get rid of the
bpf_evict_inode() entirely. This means truncate_inode_pages_final()
and clear_inode() will then simply be called by the fs core via
evict(). Dropping the reference should really only be done when
inode is unhashed and nothing reachable anymore, so it's better
also moved into the final ->destroy_inode() callback.

Fixes: 0f98621bef5d ("bpf, inode: add support for symlinks and fix mtime/ctime")
Reported-by: syzbot+fb731ca573367b7f6564@syzkaller.appspotmail.com
Reported-by: syzbot+a13e5ead792d6df37818@syzkaller.appspotmail.com
Reported-by: syzbot+7a8ba368b47fdefca61e@syzkaller.appspotmail.com
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Analyzed-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/lkml/0000000000006946d2057bbd0eef@google.com/T/
---
 kernel/bpf/inode.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 2ada5e21dfa62..4a8f390a2b821 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -554,19 +554,6 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ
 }
 EXPORT_SYMBOL(bpf_prog_get_type_path);
 
-static void bpf_evict_inode(struct inode *inode)
-{
-	enum bpf_type type;
-
-	truncate_inode_pages_final(&inode->i_data);
-	clear_inode(inode);
-
-	if (S_ISLNK(inode->i_mode))
-		kfree(inode->i_link);
-	if (!bpf_inode_type(inode, &type))
-		bpf_any_put(inode->i_private, type);
-}
-
 /*
  * Display the mount options in /proc/mounts.
  */
@@ -579,11 +566,28 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
 	return 0;
 }
 
+static void bpf_destroy_inode_deferred(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	enum bpf_type type;
+
+	if (S_ISLNK(inode->i_mode))
+		kfree(inode->i_link);
+	if (!bpf_inode_type(inode, &type))
+		bpf_any_put(inode->i_private, type);
+	free_inode_nonrcu(inode);
+}
+
+static void bpf_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred);
+}
+
 static const struct super_operations bpf_super_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
 	.show_options	= bpf_show_options,
-	.evict_inode	= bpf_evict_inode,
+	.destroy_inode	= bpf_destroy_inode,
 };
 
 enum {
-- 
GitLab


From 1762058319f1cf34b11fc6b82678828ecc88c8ee Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 25 Mar 2019 16:34:17 -0700
Subject: [PATCH 0178/1861] MAINTAINERS: Fix file pattern for X86 MCE
 INFRASTRUCTURE

Code restructuring renamed arch/x86/kernel/cpu/mcheck/ to
be arch/x86/kernel/cpu/mce/

Update the MAINTAINERS file pattern to account for this change.

Fixes: 21afaf181362 ("x86/mce: Streamline MCE subsystem's naming")
Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190325233416.GB8869@agluck-desk
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e17ebf70b5480..58acbc246adc8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16910,7 +16910,7 @@ M:	Tony Luck <tony.luck@intel.com>
 M:	Borislav Petkov <bp@alien8.de>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
-F:	arch/x86/kernel/cpu/mcheck/*
+F:	arch/x86/kernel/cpu/mce/*
 
 X86 MICROCODE UPDATE SUPPORT
 M:	Borislav Petkov <bp@alien8.de>
-- 
GitLab


From 76fc276f4a91bdf96940c276398954f291daf034 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 25 Mar 2019 16:29:33 -0700
Subject: [PATCH 0179/1861] MAINTAINERS: Update entry for EDAC-SKYLAKE

Code refactoring to share some source code with a new EDAC driver
resulted in renaming one file (skx_edac.c became skx_base.c) and adding
a new file (skx_common.c).

Update the file pattern in MAINTAINERS to take account of this change.

Fixes: 98f2fc829e3b ("EDAC, skx_edac: Delete duplicated code")
Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Aristeu Rozanski <aris@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190325232932.GA8869@agluck-desk
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e17ebf70b5480..a708bf6a02fd6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5677,7 +5677,7 @@ EDAC-SKYLAKE
 M:	Tony Luck <tony.luck@intel.com>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
-F:	drivers/edac/skx_edac.c
+F:	drivers/edac/skx_*.c
 
 EDAC-TI
 M:	Tero Kristo <t-kristo@ti.com>
-- 
GitLab


From bcc5c1bbf76c40764735a5b3bdb884575e7fd822 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 25 Mar 2019 16:56:27 -0700
Subject: [PATCH 0180/1861] MAINTAINERS: Add entry for EDAC-I10NM

Updating the MAINTAINERS file when adding this new driver was forgotten.
Fix it.

Fixes: d4dc89d069aa ("EDAC, i10nm: Add a driver for Intel 10nm server processors")
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Aristeu Rozanski <aris@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190325235627.GA11938@agluck-desk
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index a708bf6a02fd6..7d1246b62a990 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5596,6 +5596,12 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/ghes_edac.c
 
+EDAC-I10NM
+M:	Tony Luck <tony.luck@intel.com>
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/i10nm_base.c
+
 EDAC-I3000
 L:	linux-edac@vger.kernel.org
 S:	Orphan
-- 
GitLab


From dbb2483b2a46fbaf833cfb5deb5ed9cace9c7399 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 22 Mar 2019 16:26:19 -0700
Subject: [PATCH 0181/1861] xfrm: clean up xfrm protocol checks

In commit 6a53b7593233 ("xfrm: check id proto in validate_tmpl()")
I introduced a check for xfrm protocol, but according to Herbert
IPSEC_PROTO_ANY should only be used as a wildcard for lookup, so
it should be removed from validate_tmpl().

And, IPSEC_PROTO_ANY is expected to only match 3 IPSec-specific
protocols, this is why xfrm_state_flush() could still miss
IPPROTO_ROUTING, which leads that those entries are left in
net->xfrm.state_all before exit net. Fix this by replacing
IPSEC_PROTO_ANY with zero.

This patch also extracts the check from validate_tmpl() to
xfrm_id_proto_valid() and uses it in parse_ipsecrequest().
With this, no other protocols should be added into xfrm.

Fixes: 6a53b7593233 ("xfrm: check id proto in validate_tmpl()")
Reported-by: syzbot+0bf0519d6e0de15914fe@syzkaller.appspotmail.com
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      | 17 +++++++++++++++++
 net/ipv6/xfrm6_tunnel.c |  2 +-
 net/key/af_key.c        |  4 +++-
 net/xfrm/xfrm_state.c   |  2 +-
 net/xfrm/xfrm_user.c    | 14 +-------------
 5 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 85386becbaea2..902437dfbce77 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1404,6 +1404,23 @@ static inline int xfrm_state_kern(const struct xfrm_state *x)
 	return atomic_read(&x->tunnel_users);
 }
 
+static inline bool xfrm_id_proto_valid(u8 proto)
+{
+	switch (proto) {
+	case IPPROTO_AH:
+	case IPPROTO_ESP:
+	case IPPROTO_COMP:
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ROUTING:
+	case IPPROTO_DSTOPTS:
+#endif
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */
 static inline int xfrm_id_proto_match(u8 proto, u8 userproto)
 {
 	return (!userproto || proto == userproto ||
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 12cb3aa990af4..d9e5f6808811a 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -345,7 +345,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
 	unsigned int i;
 
 	xfrm_flush_gc();
-	xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
+	xfrm_state_flush(net, 0, false, true);
 
 	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
 		WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5651c29cb5bd0..4af1e1d60b9f2 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1951,8 +1951,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
 
 	if (rq->sadb_x_ipsecrequest_mode == 0)
 		return -EINVAL;
+	if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto))
+		return -EINVAL;
 
-	t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
+	t->id.proto = rq->sadb_x_ipsecrequest_proto;
 	if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
 		return -EINVAL;
 	t->mode = mode;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1bb971f46fc6f..178baaa037e5b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2384,7 +2384,7 @@ void xfrm_state_fini(struct net *net)
 
 	flush_work(&net->xfrm.state_hash_work);
 	flush_work(&xfrm_state_gc_work);
-	xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
+	xfrm_state_flush(net, 0, false, true);
 
 	WARN_ON(!list_empty(&net->xfrm.state_all));
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8d4d52fd457b2..6916931b1de1c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1513,20 +1513,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 			return -EINVAL;
 		}
 
-		switch (ut[i].id.proto) {
-		case IPPROTO_AH:
-		case IPPROTO_ESP:
-		case IPPROTO_COMP:
-#if IS_ENABLED(CONFIG_IPV6)
-		case IPPROTO_ROUTING:
-		case IPPROTO_DSTOPTS:
-#endif
-		case IPSEC_PROTO_ANY:
-			break;
-		default:
+		if (!xfrm_id_proto_valid(ut[i].id.proto))
 			return -EINVAL;
-		}
-
 	}
 
 	return 0;
-- 
GitLab


From 8dfb4eba4100e7cdd161a8baef2d8d61b7a7e62e Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 25 Mar 2019 14:30:00 +0100
Subject: [PATCH 0182/1861] esp4: add length check for UDP encapsulation

esp_output_udp_encap can produce a length that doesn't fit in the 16
bits of a UDP header's length field. In that case, we'll send a
fragmented packet whose length is larger than IP_MAX_MTU (resulting in
"Oversized IP packet" warnings on receive) and with a bogus UDP
length.

To prevent this, add a length check to esp_output_udp_encap and return
 -EMSGSIZE on failure.

This seems to be older than git history.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 10e809b296ec8..fb065a8937ea2 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -226,7 +226,7 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
 	tail[plen - 1] = proto;
 }
 
-static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
 {
 	int encap_type;
 	struct udphdr *uh;
@@ -234,6 +234,7 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
 	__be16 sport, dport;
 	struct xfrm_encap_tmpl *encap = x->encap;
 	struct ip_esp_hdr *esph = esp->esph;
+	unsigned int len;
 
 	spin_lock_bh(&x->lock);
 	sport = encap->encap_sport;
@@ -241,11 +242,14 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
 	encap_type = encap->encap_type;
 	spin_unlock_bh(&x->lock);
 
+	len = skb->len + esp->tailen - skb_transport_offset(skb);
+	if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
+		return -EMSGSIZE;
+
 	uh = (struct udphdr *)esph;
 	uh->source = sport;
 	uh->dest = dport;
-	uh->len = htons(skb->len + esp->tailen
-		  - skb_transport_offset(skb));
+	uh->len = htons(len);
 	uh->check = 0;
 
 	switch (encap_type) {
@@ -262,6 +266,8 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
 
 	*skb_mac_header(skb) = IPPROTO_UDP;
 	esp->esph = esph;
+
+	return 0;
 }
 
 int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
@@ -275,8 +281,12 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	int tailen = esp->tailen;
 
 	/* this is non-NULL only with UDP Encapsulation */
-	if (x->encap)
-		esp_output_udp_encap(x, skb, esp);
+	if (x->encap) {
+		int err = esp_output_udp_encap(x, skb, esp);
+
+		if (err < 0)
+			return err;
+	}
 
 	if (!skb_cloned(skb)) {
 		if (tailen <= skb_tailroom(skb)) {
-- 
GitLab


From 36e075ce74ec4e261a638bf09d10b3348ca4d883 Mon Sep 17 00:00:00 2001
From: Jenny TC <jenny.tc@intel.com>
Date: Sat, 23 Mar 2019 18:40:10 +0530
Subject: [PATCH 0183/1861] ASoC: Intel: Skylake: enable S24_LE format support

To enable S24_LE format, sample_type in topology fw has to be set to 1.
But sample_type defined in topology firmware configuration is not
getting reflected in the dsp param. This patch sets sample_type in base
config so that the sample type defined in the topology firmware is reflected
in the dsp params. This issues was uncovered while debugging the S24_LE format
which require the MSB byte in 32 bit word to be skipped. Setting sample_type
in topology firmware to 1 helps to skip MSB byte word.

Signed-off-by: Jenny TC <jenny.tc@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/skylake/skl-messages.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c
index 28c4806b196a2..4bf70b4429f03 100644
--- a/sound/soc/intel/skylake/skl-messages.c
+++ b/sound/soc/intel/skylake/skl-messages.c
@@ -483,6 +483,7 @@ static void skl_set_base_module_format(struct skl_sst *ctx,
 	base_cfg->audio_fmt.bit_depth = format->bit_depth;
 	base_cfg->audio_fmt.valid_bit_depth = format->valid_bit_depth;
 	base_cfg->audio_fmt.ch_cfg = format->ch_cfg;
+	base_cfg->audio_fmt.sample_type = format->sample_type;
 
 	dev_dbg(ctx->dev, "bit_depth=%x valid_bd=%x ch_config=%x\n",
 			format->bit_depth, format->valid_bit_depth,
-- 
GitLab


From 766460852cfaeca4042e5f3aeb9616b3689147bc Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kjlu@umn.edu>
Date: Mon, 25 Mar 2019 15:29:22 -0500
Subject: [PATCH 0184/1861] x86/platform/uv: Fix missing checks of kcalloc()
 return values

Handle potential errors returned from kcalloc().

 [ bp: rewrite commit message. ]

Signed-off-by: Kangjie Lu <kjlu@umn.edu>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Banman <abanman@hpe.com>
Cc: Andy Shevchenko <andy@infradead.org>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mike Travis <mike.travis@hpe.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: pakki001@umn.edu
Cc: platform-driver-x86@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Varsha Rao <rvarsha016@gmail.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190325202924.4624-1-kjlu@umn.edu
---
 arch/x86/platform/uv/tlb_uv.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 2c53b0f19329a..1297e185b8c8d 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2133,14 +2133,19 @@ static int __init summarize_uvhub_sockets(int nuvhubs,
  */
 static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
 {
-	unsigned char *uvhub_mask;
 	struct uvhub_desc *uvhub_descs;
+	unsigned char *uvhub_mask = NULL;
 
 	if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
 		timeout_us = calculate_destination_timeout();
 
 	uvhub_descs = kcalloc(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
+	if (!uvhub_descs)
+		goto fail;
+
 	uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
+	if (!uvhub_mask)
+		goto fail;
 
 	if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
 		goto fail;
-- 
GitLab


From 7d56bedb2730dc2ea8abf0fd7240ee99ecfee3c9 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 26 Mar 2019 10:32:23 -0700
Subject: [PATCH 0185/1861] ARM: dts: Fix dcan clkctrl clock for am3

We must not use legacy clock defines for dts clckctrl clocks as the offsets
will be wrong.

Fixes: 87fc89ced3a7 ("ARM: dts: am335x: Move l4 child devices to probe them with ti-sysc")
Cc: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am33xx-l4.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi
index f459ec316a22d..ca6d9f02a800c 100644
--- a/arch/arm/boot/dts/am33xx-l4.dtsi
+++ b/arch/arm/boot/dts/am33xx-l4.dtsi
@@ -1762,7 +1762,7 @@
 			reg = <0xcc000 0x4>;
 			reg-names = "rev";
 			/* Domains (P, C): per_pwrdm, l4ls_clkdm */
-			clocks = <&l4ls_clkctrl AM3_D_CAN0_CLKCTRL 0>;
+			clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>;
 			clock-names = "fck";
 			#address-cells = <1>;
 			#size-cells = <1>;
@@ -1785,7 +1785,7 @@
 			reg = <0xd0000 0x4>;
 			reg-names = "rev";
 			/* Domains (P, C): per_pwrdm, l4ls_clkdm */
-			clocks = <&l4ls_clkctrl AM3_D_CAN1_CLKCTRL 0>;
+			clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>;
 			clock-names = "fck";
 			#address-cells = <1>;
 			#size-cells = <1>;
-- 
GitLab


From 927cb78177ae3773d0d27404566a93cb8e88890c Mon Sep 17 00:00:00 2001
From: Paul Chaignon <paul.chaignon@orange.com>
Date: Wed, 20 Mar 2019 13:58:27 +0100
Subject: [PATCH 0186/1861] bpf: remove incorrect 'verifier bug' warning

The BPF verifier checks the maximum number of call stack frames twice,
first in the main CFG traversal (do_check) and then in a subsequent
traversal (check_max_stack_depth).  If the second check fails, it logs a
'verifier bug' warning and errors out, as the number of call stack frames
should have been verified already.

However, the second check may fail without indicating a verifier bug: if
the excessive function calls reside in dead code, the main CFG traversal
may not visit them; the subsequent traversal visits all instructions,
including dead code.

This case raises the question of how invalid dead code should be treated.
This patch implements the conservative option and rejects such code.

Signed-off-by: Paul Chaignon <paul.chaignon@orange.com>
Tested-by: Xiao Han <xiao.han@orange.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fd502c1f71eb0..6c5a41f7f3385 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1897,8 +1897,9 @@ continue_func:
 		}
 		frame++;
 		if (frame >= MAX_CALL_FRAMES) {
-			WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
-			return -EFAULT;
+			verbose(env, "the call stack of %d frames is too deep !\n",
+				frame);
+			return -E2BIG;
 		}
 		goto process_func;
 	}
-- 
GitLab


From cabacfbbe54ec6730b3c147599763892c6c03525 Mon Sep 17 00:00:00 2001
From: Paul Chaignon <paul.chaignon@orange.com>
Date: Wed, 20 Mar 2019 13:58:50 +0100
Subject: [PATCH 0187/1861] selftests/bpf: test case for invalid call stack in
 dead code

This patch adds a test case with an excessive number of call stack frames
in dead code.

Signed-off-by: Paul Chaignon <paul.chaignon@orange.com>
Tested-by: Xiao Han <xiao.han@orange.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/calls.c | 38 ++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index f2ccae39ee66b..fb11240b758b1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -907,6 +907,44 @@
 	.errstr = "call stack",
 	.result = REJECT,
 },
+{
+	"calls: stack depth check in dead code",
+	.insns = {
+	/* main */
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */
+	BPF_EXIT_INSN(),
+	/* A */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+	BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), /* call B */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	/* B */
+	BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */
+	BPF_EXIT_INSN(),
+	/* C */
+	BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */
+	BPF_EXIT_INSN(),
+	/* D */
+	BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */
+	BPF_EXIT_INSN(),
+	/* E */
+	BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */
+	BPF_EXIT_INSN(),
+	/* F */
+	BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */
+	BPF_EXIT_INSN(),
+	/* G */
+	BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */
+	BPF_EXIT_INSN(),
+	/* H */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_XDP,
+	.errstr = "call stack",
+	.result = REJECT,
+},
 {
 	"calls: spill into caller stack frame",
 	.insns = {
-- 
GitLab


From f52c97d9df983cc38a8809d0910e5eaba0c180b3 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 25 Mar 2019 15:12:15 +0100
Subject: [PATCH 0188/1861] bpf, doc: fix BTF docs reflow of bullet list

Section 2.2.1 BTF_KIND_INT a bullet list was collapsed due to
text reflow in commit 9ab5305dbe3f ("docs/btf: reflow text to
fill up to 78 characters").

This patch correct the mistake. Also adjust next bullet list,
which is used for comparison, to get rendered the same way.

Fixes: 9ab5305dbe3f ("docs/btf: reflow text to fill up to 78 characters")
Link: https://www.kernel.org/doc/html/latest/bpf/btf.html#btf-kind-int
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/btf.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 9a60a5d60e380..7313d354f20e6 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -148,16 +148,16 @@ The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()``
 for the type. The maximum value of ``BTF_INT_BITS()`` is 128.
 
 The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values
-for this int. For example, a bitfield struct member has: * btf member bit
-offset 100 from the start of the structure, * btf member pointing to an int
-type, * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
+for this int. For example, a bitfield struct member has:
+ * btf member bit offset 100 from the start of the structure,
+ * btf member pointing to an int type,
+ * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
 
 Then in the struct memory layout, this member will occupy ``4`` bits starting
 from bits ``100 + 2 = 102``.
 
 Alternatively, the bitfield struct member can be the following to access the
 same bits as the above:
-
  * btf member bit offset 102,
  * btf member pointing to an int type,
  * the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
-- 
GitLab


From 603fadf33604a2e170eb833f99f569d3597f1f09 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 25 Mar 2019 13:34:00 -0500
Subject: [PATCH 0189/1861] ACPI: Fix comment typos

Fix some misspellings in comments.  No functional change intended.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_dbg.c     |  2 +-
 drivers/acpi/acpi_lpat.c    |  2 +-
 drivers/acpi/cppc_acpi.c    | 34 +++++++++++++-------------
 drivers/acpi/power.c        |  4 ++--
 drivers/acpi/pptt.c         | 48 ++++++++++++++++++-------------------
 drivers/acpi/scan.c         |  2 +-
 drivers/acpi/spcr.c         |  2 +-
 drivers/acpi/video_detect.c |  2 +-
 8 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c
index 4a434c23a1963..d18246a2a65e4 100644
--- a/drivers/acpi/acpi_dbg.c
+++ b/drivers/acpi/acpi_dbg.c
@@ -390,7 +390,7 @@ again:
 	return size > 0 ? size : ret;
 }
 
-static int acpi_aml_thread(void *unsed)
+static int acpi_aml_thread(void *unused)
 {
 	acpi_osd_exec_callback function = NULL;
 	void *context;
diff --git a/drivers/acpi/acpi_lpat.c b/drivers/acpi/acpi_lpat.c
index 2cd9f738812ba..43f1b99c86ca4 100644
--- a/drivers/acpi/acpi_lpat.c
+++ b/drivers/acpi/acpi_lpat.c
@@ -22,7 +22,7 @@
  * LPAT conversion table
  *
  * @lpat_table: the temperature_raw mapping table structure
- * @raw: the raw value, used as a key to get the temerature from the
+ * @raw: the raw value, used as a key to get the temperature from the
  *       above mapping table
  *
  * A positive converted temperature value will be returned on success,
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 1b207fca1420b..e947a1ec82f9d 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -81,9 +81,9 @@ struct cppc_pcc_data {
 	int refcount;
 };
 
-/* Array  to represent the PCC channel per subspace id */
+/* Array to represent the PCC channel per subspace ID */
 static struct cppc_pcc_data *pcc_data[MAX_PCC_SUBSPACES];
-/* The cpu_pcc_subspace_idx containsper CPU subspace id */
+/* The cpu_pcc_subspace_idx contains per CPU subspace ID */
 static DEFINE_PER_CPU(int, cpu_pcc_subspace_idx);
 
 /*
@@ -436,7 +436,7 @@ int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
 		return -ENOMEM;
 
 	/*
-	 * Now that we have _PSD data from all CPUs, lets setup P-state
+	 * Now that we have _PSD data from all CPUs, let's setup P-state
 	 * domain info.
 	 */
 	for_each_possible_cpu(i) {
@@ -588,7 +588,7 @@ static int register_pcc_channel(int pcc_ss_idx)
 			return -ENOMEM;
 		}
 
-		/* Set flag so that we dont come here for each CPU. */
+		/* Set flag so that we don't come here for each CPU. */
 		pcc_data[pcc_ss_idx]->pcc_channel_acquired = true;
 	}
 
@@ -613,7 +613,7 @@ bool __weak cpc_ffh_supported(void)
  *
  * Check and allocate the cppc_pcc_data memory.
  * In some processor configurations it is possible that same subspace
- * is shared between multiple CPU's. This is seen especially in CPU's
+ * is shared between multiple CPUs. This is seen especially in CPUs
  * with hardware multi-threading support.
  *
  * Return: 0 for success, errno for failure
@@ -711,7 +711,7 @@ static bool is_cppc_supported(int revision, int num_ent)
 
 /**
  * acpi_cppc_processor_probe - Search for per CPU _CPC objects.
- * @pr: Ptr to acpi_processor containing this CPUs logical Id.
+ * @pr: Ptr to acpi_processor containing this CPU's logical ID.
  *
  *	Return: 0 for success or negative value for err.
  */
@@ -728,7 +728,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 	acpi_status status;
 	int ret = -EFAULT;
 
-	/* Parse the ACPI _CPC table for this cpu. */
+	/* Parse the ACPI _CPC table for this CPU. */
 	status = acpi_evaluate_object_typed(handle, "_CPC", NULL, &output,
 			ACPI_TYPE_PACKAGE);
 	if (ACPI_FAILURE(status)) {
@@ -840,7 +840,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 	if (ret)
 		goto out_free;
 
-	/* Register PCC channel once for all PCC subspace id. */
+	/* Register PCC channel once for all PCC subspace ID. */
 	if (pcc_subspace_id >= 0 && !pcc_data[pcc_subspace_id]->pcc_channel_acquired) {
 		ret = register_pcc_channel(pcc_subspace_id);
 		if (ret)
@@ -860,7 +860,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
 		goto out_free;
 	}
 
-	/* Plug PSD data into this CPUs CPC descriptor. */
+	/* Plug PSD data into this CPU's CPC descriptor. */
 	per_cpu(cpc_desc_ptr, pr->id) = cpc_ptr;
 
 	ret = kobject_init_and_add(&cpc_ptr->kobj, &cppc_ktype, &cpu_dev->kobj,
@@ -891,7 +891,7 @@ EXPORT_SYMBOL_GPL(acpi_cppc_processor_probe);
 
 /**
  * acpi_cppc_processor_exit - Cleanup CPC structs.
- * @pr: Ptr to acpi_processor containing this CPUs logical Id.
+ * @pr: Ptr to acpi_processor containing this CPU's logical ID.
  *
  * Return: Void
  */
@@ -931,7 +931,7 @@ EXPORT_SYMBOL_GPL(acpi_cppc_processor_exit);
 
 /**
  * cpc_read_ffh() - Read FFH register
- * @cpunum:	cpu number to read
+ * @cpunum:	CPU number to read
  * @reg:	cppc register information
  * @val:	place holder for return value
  *
@@ -946,7 +946,7 @@ int __weak cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
 
 /**
  * cpc_write_ffh() - Write FFH register
- * @cpunum:	cpu number to write
+ * @cpunum:	CPU number to write
  * @reg:	cppc register information
  * @val:	value to write
  *
@@ -1093,7 +1093,7 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
 EXPORT_SYMBOL_GPL(cppc_get_desired_perf);
 
 /**
- * cppc_get_perf_caps - Get a CPUs performance capabilities.
+ * cppc_get_perf_caps - Get a CPU's performance capabilities.
  * @cpunum: CPU from which to get capabilities info.
  * @perf_caps: ptr to cppc_perf_caps. See cppc_acpi.h
  *
@@ -1178,7 +1178,7 @@ out_err:
 EXPORT_SYMBOL_GPL(cppc_get_perf_caps);
 
 /**
- * cppc_get_perf_ctrs - Read a CPUs performance feedback counters.
+ * cppc_get_perf_ctrs - Read a CPU's performance feedback counters.
  * @cpunum: CPU from which to read counters.
  * @perf_fb_ctrs: ptr to cppc_perf_fb_ctrs. See cppc_acpi.h
  *
@@ -1205,7 +1205,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
 	ctr_wrap_reg = &cpc_desc->cpc_regs[CTR_WRAP_TIME];
 
 	/*
-	 * If refernce perf register is not supported then we should
+	 * If reference perf register is not supported then we should
 	 * use the nominal perf value
 	 */
 	if (!CPC_SUPPORTED(ref_perf_reg))
@@ -1258,7 +1258,7 @@ out_err:
 EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs);
 
 /**
- * cppc_set_perf - Set a CPUs performance controls.
+ * cppc_set_perf - Set a CPU's performance controls.
  * @cpu: CPU for which to set performance controls.
  * @perf_ctrls: ptr to cppc_perf_ctrls. See cppc_acpi.h
  *
@@ -1339,7 +1339,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
 	 * executing the Phase-II.
 	 *     2. Some other CPU has beaten this CPU to successfully execute the
 	 * write_trylock and has already acquired the write_lock. We know for a
-	 * fact it(other CPU acquiring the write_lock) couldn't have happened
+	 * fact it (other CPU acquiring the write_lock) couldn't have happened
 	 * before this CPU's Phase-I as we held the read_lock.
 	 *     3. Some other CPU executing pcc CMD_READ has stolen the
 	 * down_write, in which case, send_pcc_cmd will check for pending
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 665e93ca0b40f..87db3e124725e 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -535,12 +535,12 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
 	/*
 	 * Try to execute _DSW first.
 	 *
-	 * Three agruments are needed for the _DSW object:
+	 * Three arguments are needed for the _DSW object:
 	 * Argument 0: enable/disable the wake capabilities
 	 * Argument 1: target system state
 	 * Argument 2: target device state
 	 * When _DSW object is called to disable the wake capabilities, maybe
-	 * the first argument is filled. The values of the other two agruments
+	 * the first argument is filled. The values of the other two arguments
 	 * are meaningless.
 	 */
 	in_arg[0].type = ACPI_TYPE_INTEGER;
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index 065c4fc245d11..b72e6afaa8fb9 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -164,7 +164,7 @@ static struct acpi_pptt_cache *acpi_find_cache_level(struct acpi_table_header *t
 }
 
 /**
- * acpi_count_levels() - Given a PPTT table, and a cpu node, count the caches
+ * acpi_count_levels() - Given a PPTT table, and a CPU node, count the caches
  * @table_hdr: Pointer to the head of the PPTT table
  * @cpu_node: processor node we wish to count caches for
  *
@@ -235,7 +235,7 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
 /**
  * acpi_find_processor_node() - Given a PPTT table find the requested processor
  * @table_hdr:  Pointer to the head of the PPTT table
- * @acpi_cpu_id: cpu we are searching for
+ * @acpi_cpu_id: CPU we are searching for
  *
  * Find the subtable entry describing the provided processor.
  * This is done by iterating the PPTT table looking for processor nodes
@@ -456,21 +456,21 @@ static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_ta
 
 static void acpi_pptt_warn_missing(void)
 {
-	pr_warn_once("No PPTT table found, cpu and cache topology may be inaccurate\n");
+	pr_warn_once("No PPTT table found, CPU and cache topology may be inaccurate\n");
 }
 
 /**
  * topology_get_acpi_cpu_tag() - Find a unique topology value for a feature
  * @table: Pointer to the head of the PPTT table
- * @cpu: Kernel logical cpu number
+ * @cpu: Kernel logical CPU number
  * @level: A level that terminates the search
  * @flag: A flag which terminates the search
  *
- * Get a unique value given a cpu, and a topology level, that can be
+ * Get a unique value given a CPU, and a topology level, that can be
  * matched to determine which cpus share common topological features
  * at that level.
  *
- * Return: Unique value, or -ENOENT if unable to locate cpu
+ * Return: Unique value, or -ENOENT if unable to locate CPU
  */
 static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
 				     unsigned int cpu, int level, int flag)
@@ -510,7 +510,7 @@ static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag)
 		return -ENOENT;
 	}
 	retval = topology_get_acpi_cpu_tag(table, cpu, level, flag);
-	pr_debug("Topology Setup ACPI cpu %d, level %d ret = %d\n",
+	pr_debug("Topology Setup ACPI CPU %d, level %d ret = %d\n",
 		 cpu, level, retval);
 	acpi_put_table(table);
 
@@ -519,9 +519,9 @@ static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag)
 
 /**
  * acpi_find_last_cache_level() - Determines the number of cache levels for a PE
- * @cpu: Kernel logical cpu number
+ * @cpu: Kernel logical CPU number
  *
- * Given a logical cpu number, returns the number of levels of cache represented
+ * Given a logical CPU number, returns the number of levels of cache represented
  * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0
  * indicating we didn't find any cache levels.
  *
@@ -534,7 +534,7 @@ int acpi_find_last_cache_level(unsigned int cpu)
 	int number_of_levels = 0;
 	acpi_status status;
 
-	pr_debug("Cache Setup find last level cpu=%d\n", cpu);
+	pr_debug("Cache Setup find last level CPU=%d\n", cpu);
 
 	acpi_cpu_id = get_acpi_id_for_cpu(cpu);
 	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
@@ -551,14 +551,14 @@ int acpi_find_last_cache_level(unsigned int cpu)
 
 /**
  * cache_setup_acpi() - Override CPU cache topology with data from the PPTT
- * @cpu: Kernel logical cpu number
+ * @cpu: Kernel logical CPU number
  *
  * Updates the global cache info provided by cpu_get_cacheinfo()
  * when there are valid properties in the acpi_pptt_cache nodes. A
  * successful parse may not result in any updates if none of the
- * cache levels have any valid flags set.  Futher, a unique value is
+ * cache levels have any valid flags set.  Further, a unique value is
  * associated with each known CPU cache entry. This unique value
- * can be used to determine whether caches are shared between cpus.
+ * can be used to determine whether caches are shared between CPUs.
  *
  * Return: -ENOENT on failure to find table, or 0 on success
  */
@@ -567,7 +567,7 @@ int cache_setup_acpi(unsigned int cpu)
 	struct acpi_table_header *table;
 	acpi_status status;
 
-	pr_debug("Cache Setup ACPI cpu %d\n", cpu);
+	pr_debug("Cache Setup ACPI CPU %d\n", cpu);
 
 	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
 	if (ACPI_FAILURE(status)) {
@@ -582,8 +582,8 @@ int cache_setup_acpi(unsigned int cpu)
 }
 
 /**
- * find_acpi_cpu_topology() - Determine a unique topology value for a given cpu
- * @cpu: Kernel logical cpu number
+ * find_acpi_cpu_topology() - Determine a unique topology value for a given CPU
+ * @cpu: Kernel logical CPU number
  * @level: The topological level for which we would like a unique ID
  *
  * Determine a topology unique ID for each thread/core/cluster/mc_grouping
@@ -596,7 +596,7 @@ int cache_setup_acpi(unsigned int cpu)
  * other levels beyond this use a generated value to uniquely identify
  * a topological feature.
  *
- * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
+ * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
  * Otherwise returns a value which represents a unique topological feature.
  */
 int find_acpi_cpu_topology(unsigned int cpu, int level)
@@ -606,12 +606,12 @@ int find_acpi_cpu_topology(unsigned int cpu, int level)
 
 /**
  * find_acpi_cpu_cache_topology() - Determine a unique cache topology value
- * @cpu: Kernel logical cpu number
+ * @cpu: Kernel logical CPU number
  * @level: The cache level for which we would like a unique ID
  *
  * Determine a unique ID for each unified cache in the system
  *
- * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
+ * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
  * Otherwise returns a value which represents a unique topological feature.
  */
 int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
@@ -643,17 +643,17 @@ int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
 
 
 /**
- * find_acpi_cpu_topology_package() - Determine a unique cpu package value
- * @cpu: Kernel logical cpu number
+ * find_acpi_cpu_topology_package() - Determine a unique CPU package value
+ * @cpu: Kernel logical CPU number
  *
- * Determine a topology unique package ID for the given cpu.
+ * Determine a topology unique package ID for the given CPU.
  * This ID can then be used to group peers, which will have matching ids.
  *
  * The search terminates when either a level is found with the PHYSICAL_PACKAGE
  * flag set or we reach a root node.
  *
- * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
- * Otherwise returns a value which represents the package for this cpu.
+ * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
+ * Otherwise returns a value which represents the package for this CPU.
  */
 int find_acpi_cpu_topology_package(unsigned int cpu)
 {
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 446c959a8f082..1b66fc835e39c 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -895,7 +895,7 @@ static void acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
 	/*
 	 * Call _PSW/_DSW object to disable its ability to wake the sleeping
 	 * system for the ACPI device with the _PRW object.
-	 * The _PSW object is depreciated in ACPI 3.0 and is replaced by _DSW.
+	 * The _PSW object is deprecated in ACPI 3.0 and is replaced by _DSW.
 	 * So it is necessary to call _DSW object first. Only when it is not
 	 * present will the _PSW object used.
 	 */
diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index c336784d0bcbe..b34d05e365b76 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -28,7 +28,7 @@ EXPORT_SYMBOL(qdf2400_e44_present);
 
 /*
  * Some Qualcomm Datacenter Technologies SoCs have a defective UART BUSY bit.
- * Detect them by examining the OEM fields in the SPCR header, similiar to PCI
+ * Detect them by examining the OEM fields in the SPCR header, similar to PCI
  * quirk detection in pci_mcfg.c.
  */
 static bool qdf2400_erratum_44_present(struct acpi_table_header *h)
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 43587ac680e47..cc2888930fe97 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -112,7 +112,7 @@ static int video_detect_force_none(const struct dmi_system_id *d)
 static const struct dmi_system_id video_detect_dmi_table[] = {
 	/* On Samsung X360, the BIOS will set a flag (VDRV) if generic
 	 * ACPI backlight device is used. This flag will definitively break
-	 * the backlight interface (even the vendor interface) untill next
+	 * the backlight interface (even the vendor interface) until next
 	 * reboot. It's why we should prevent video.ko from being used here
 	 * and we can't rely on a later call to acpi_video_unregister().
 	 */
-- 
GitLab


From 40381a3c1fa3ed37458c7f745e51fc81e9b48fe2 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 25 Mar 2019 13:34:02 -0500
Subject: [PATCH 0190/1861] ACPI / scan: Simplify
 acpi_bus_extract_wakeup_device_power_package()

acpi_bus_extract_wakeup_device_power_package() is a static function
with a single caller that supplies (device->handle, &device->wakeup).

Simplify the interface so the caller need only supply "device".

This makes it obvious that "wakeup", i.e., &device->wakeup, can never
be NULL, so remove the unnecessary check for that.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 446c959a8f082..f3fb1fa79429f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -763,18 +763,16 @@ acpi_bus_get_ejd(acpi_handle handle, acpi_handle *ejd)
 }
 EXPORT_SYMBOL_GPL(acpi_bus_get_ejd);
 
-static int acpi_bus_extract_wakeup_device_power_package(acpi_handle handle,
-					struct acpi_device_wakeup *wakeup)
+static int acpi_bus_extract_wakeup_device_power_package(struct acpi_device *dev)
 {
+	acpi_handle handle = dev->handle;
+	struct acpi_device_wakeup *wakeup = &dev->wakeup;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *package = NULL;
 	union acpi_object *element = NULL;
 	acpi_status status;
 	int err = -ENODATA;
 
-	if (!wakeup)
-		return -EINVAL;
-
 	INIT_LIST_HEAD(&wakeup->resources);
 
 	/* _PRW */
@@ -883,8 +881,7 @@ static void acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
 	if (!acpi_has_method(device->handle, "_PRW"))
 		return;
 
-	err = acpi_bus_extract_wakeup_device_power_package(device->handle,
-							   &device->wakeup);
+	err = acpi_bus_extract_wakeup_device_power_package(device);
 	if (err) {
 		dev_err(&device->dev, "_PRW evaluation error: %d\n", err);
 		return;
-- 
GitLab


From 5ceb5f0522bdc20d507fef2cd77fec2caa4e541e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 25 Mar 2019 13:34:03 -0500
Subject: [PATCH 0191/1861] ACPI / scan: Add labels for PNP button devices

Subsequent code treats button_device_ids[] entries differently, and
it's hard to follow without a hint as to which is which.

Add comments to identify the power button, lid, and sleep button
devices.

The "PNP" prefix is owned by Microsoft, so they distribute the
canonical list of "PNP" IDs.

Link: https://uefi.org/PNP_ACPI_Registry
Link: https://download.microsoft.com/download/1/6/1/161ba512-40e2-4cc9-843a-923143f3456c/devids.txt
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index f3fb1fa79429f..2297440a06228 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -846,9 +846,9 @@ static int acpi_bus_extract_wakeup_device_power_package(struct acpi_device *dev)
 static bool acpi_wakeup_gpe_init(struct acpi_device *device)
 {
 	static const struct acpi_device_id button_device_ids[] = {
-		{"PNP0C0C", 0},
-		{"PNP0C0D", 0},
-		{"PNP0C0E", 0},
+		{"PNP0C0C", 0},		/* Power button */
+		{"PNP0C0D", 0},		/* Lid */
+		{"PNP0C0E", 0},		/* Sleep button */
 		{"", 0},
 	};
 	struct acpi_device_wakeup *wakeup = &device->wakeup;
-- 
GitLab


From 4fea6ef0b219d66b8a901fea1744745a1ed2f79b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 9 Jan 2019 14:48:09 -0800
Subject: [PATCH 0192/1861] doc: Remove obsolete RCU update functions from RCU
 documentation

Now that synchronize_rcu_bh, synchronize_rcu_bh_expedited, call_rcu_bh,
rcu_barrier_bh, synchronize_sched, synchronize_sched_expedited,
call_rcu_sched, rcu_barrier_sched, get_state_synchronize_sched,
and cond_synchronize_sched are obsolete, let's remove them from the
documentation aside from a small historical section.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 .../Data-Structures/Data-Structures.html      |  3 +-
 .../Expedited-Grace-Periods.html              |  4 +-
 .../Tree-RCU-Memory-Ordering.html             |  5 +-
 Documentation/RCU/NMI-RCU.txt                 | 13 ++--
 Documentation/RCU/UP.txt                      |  6 +-
 Documentation/RCU/checklist.txt               | 76 +++++++++----------
 Documentation/RCU/rcu.txt                     |  8 +-
 Documentation/RCU/rcubarrier.txt              | 27 ++++---
 8 files changed, 66 insertions(+), 76 deletions(-)

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index 18f1798075633..c30c1957c7e6b 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -155,8 +155,7 @@ keeping lock contention under control at all tree levels regardless
 of the level of loading on the system.
 
 </p><p>RCU updaters wait for normal grace periods by registering
-RCU callbacks, either directly via <tt>call_rcu()</tt> and
-friends (namely <tt>call_rcu_bh()</tt> and <tt>call_rcu_sched()</tt>),
+RCU callbacks, either directly via <tt>call_rcu()</tt>
 or indirectly via <tt>synchronize_rcu()</tt> and friends.
 RCU callbacks are represented by <tt>rcu_head</tt> structures,
 which are queued on <tt>rcu_data</tt> structures while they are
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
index 19e7a5fb6b739..57300db4b5ff6 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
@@ -56,6 +56,7 @@ sections.
 RCU-preempt Expedited Grace Periods</a></h2>
 
 <p>
+<tt>CONFIG_PREEMPT=y</tt> kernels implement RCU-preempt.
 The overall flow of the handling of a given CPU by an RCU-preempt
 expedited grace period is shown in the following diagram:
 
@@ -139,6 +140,7 @@ or offline, among other things.
 RCU-sched Expedited Grace Periods</a></h2>
 
 <p>
+<tt>CONFIG_PREEMPT=n</tt> kernels implement RCU-sched.
 The overall flow of the handling of a given CPU by an RCU-sched
 expedited grace period is shown in the following diagram:
 
@@ -146,7 +148,7 @@ expedited grace period is shown in the following diagram:
 
 <p>
 As with RCU-preempt, RCU-sched's
-<tt>synchronize_sched_expedited()</tt> ignores offline and
+<tt>synchronize_rcu_expedited()</tt> ignores offline and
 idle CPUs, again because they are in remotely detectable
 quiescent states.
 However, because the
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
index 8d21af02b1f07..c64f8d26609fb 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
@@ -34,12 +34,11 @@ Similarly, any code that happens before the beginning of a given RCU grace
 period is guaranteed to see the effects of all accesses following the end
 of that grace period that are within RCU read-side critical sections.
 
-<p>This guarantee is particularly pervasive for <tt>synchronize_sched()</tt>,
-for which RCU-sched read-side critical sections include any region
+<p>Note well that RCU-sched read-side critical sections include any region
 of code for which preemption is disabled.
 Given that each individual machine instruction can be thought of as
 an extremely small region of preemption-disabled code, one can think of
-<tt>synchronize_sched()</tt> as <tt>smp_mb()</tt> on steroids.
+<tt>synchronize_rcu()</tt> as <tt>smp_mb()</tt> on steroids.
 
 <p>RCU updaters use this guarantee by splitting their updates into
 two phases, one of which is executed before the grace period and
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index 687777f83b237..881353fd5bff1 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -81,18 +81,19 @@ currently executing on some other CPU.  We therefore cannot free
 up any data structures used by the old NMI handler until execution
 of it completes on all other CPUs.
 
-One way to accomplish this is via synchronize_sched(), perhaps as
+One way to accomplish this is via synchronize_rcu(), perhaps as
 follows:
 
 	unset_nmi_callback();
-	synchronize_sched();
+	synchronize_rcu();
 	kfree(my_nmi_data);
 
-This works because synchronize_sched() blocks until all CPUs complete
-any preemption-disabled segments of code that they were executing.
-Since NMI handlers disable preemption, synchronize_sched() is guaranteed
+This works because (as of v4.20) synchronize_rcu() blocks until all
+CPUs complete any preemption-disabled segments of code that they were
+executing.
+Since NMI handlers disable preemption, synchronize_rcu() is guaranteed
 not to return until all ongoing NMI handlers exit.  It is therefore safe
-to free up the handler's data as soon as synchronize_sched() returns.
+to free up the handler's data as soon as synchronize_rcu() returns.
 
 Important note: for this to work, the architecture in question must
 invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt
index 90ec5341ee981..53bde717017bb 100644
--- a/Documentation/RCU/UP.txt
+++ b/Documentation/RCU/UP.txt
@@ -86,10 +86,8 @@ even on a UP system.  So do not do it!  Even on a UP system, the RCU
 infrastructure -must- respect grace periods, and -must- invoke callbacks
 from a known environment in which no locks are held.
 
-It -is- safe for synchronize_sched() and synchronize_rcu_bh() to return
-immediately on an UP system.  It is also safe for synchronize_rcu()
-to return immediately on UP systems, except when running preemptable
-RCU.
+Note that it -is- safe for synchronize_rcu() to return immediately on
+UP systems, including !PREEMPT SMP builds running on UP systems.
 
 Quick Quiz #3: Why can't synchronize_rcu() return immediately on
 	UP systems running preemptable RCU?
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 6f469864d9f59..fcc59fea5cd4f 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -182,16 +182,13 @@ over a rather long period of time, but improvements are always welcome!
 		when publicizing a pointer to a structure that can
 		be traversed by an RCU read-side critical section.
 
-5.	If call_rcu(), or a related primitive such as call_rcu_bh(),
-	call_rcu_sched(), or call_srcu() is used, the callback function
-	will be called from softirq context.  In particular, it cannot
-	block.
+5.	If call_rcu() or call_srcu() is used, the callback function will
+	be called from softirq context.  In particular, it cannot block.
 
-6.	Since synchronize_rcu() can block, it cannot be called from
-	any sort of irq context.  The same rule applies for
-	synchronize_rcu_bh(), synchronize_sched(), synchronize_srcu(),
-	synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(),
-	synchronize_sched_expedite(), and synchronize_srcu_expedited().
+6.	Since synchronize_rcu() can block, it cannot be called
+	from any sort of irq context.  The same rule applies
+	for synchronize_srcu(), synchronize_rcu_expedited(), and
+	synchronize_srcu_expedited().
 
 	The expedited forms of these primitives have the same semantics
 	as the non-expedited forms, but expediting is both expensive and
@@ -212,20 +209,20 @@ over a rather long period of time, but improvements are always welcome!
 	of the system, especially to real-time workloads running on
 	the rest of the system.
 
-7.	If the updater uses call_rcu() or synchronize_rcu(), then the
-	corresponding readers must use rcu_read_lock() and
-	rcu_read_unlock().  If the updater uses call_rcu_bh() or
-	synchronize_rcu_bh(), then the corresponding readers must
-	use rcu_read_lock_bh() and rcu_read_unlock_bh().  If the
-	updater uses call_rcu_sched() or synchronize_sched(), then
-	the corresponding readers must disable preemption, possibly
-	by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
-	If the updater uses synchronize_srcu() or call_srcu(), then
-	the corresponding readers must use srcu_read_lock() and
+7.	As of v4.20, a given kernel implements only one RCU flavor,
+	which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y.
+	If the updater uses call_rcu() or synchronize_rcu(),
+	then the corresponding readers my use rcu_read_lock() and
+	rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
+	or any pair of primitives that disables and re-enables preemption,
+	for example, rcu_read_lock_sched() and rcu_read_unlock_sched().
+	If the updater uses synchronize_srcu() or call_srcu(),
+	then the corresponding readers must use srcu_read_lock() and
 	srcu_read_unlock(), and with the same srcu_struct.  The rules for
 	the expedited primitives are the same as for their non-expedited
 	counterparts.  Mixing things up will result in confusion and
-	broken kernels.
+	broken kernels, and has even resulted in an exploitable security
+	issue.
 
 	One exception to this rule: rcu_read_lock() and rcu_read_unlock()
 	may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -288,8 +285,7 @@ over a rather long period of time, but improvements are always welcome!
 	d.	Periodically invoke synchronize_rcu(), permitting a limited
 		number of updates per grace period.
 
-	The same cautions apply to call_rcu_bh(), call_rcu_sched(),
-	call_srcu(), and kfree_rcu().
+	The same cautions apply to call_srcu() and kfree_rcu().
 
 	Note that although these primitives do take action to avoid memory
 	exhaustion when any given CPU has too many callbacks, a determined
@@ -336,12 +332,12 @@ over a rather long period of time, but improvements are always welcome!
 	to safely access and/or modify that data structure.
 
 	RCU callbacks are -usually- executed on the same CPU that executed
-	the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(),
-	but are by -no- means guaranteed to be.  For example, if a given
-	CPU goes offline while having an RCU callback pending, then that
-	RCU callback will execute on some surviving CPU.  (If this was
-	not the case, a self-spawning RCU callback would prevent the
-	victim CPU from ever going offline.)
+	the corresponding call_rcu() or call_srcu().  but are by -no-
+	means guaranteed to be.  For example, if a given CPU goes offline
+	while having an RCU callback pending, then that RCU callback
+	will execute on some surviving CPU.  (If this was not the case,
+	a self-spawning RCU callback would prevent the victim CPU from
+	ever going offline.)
 
 13.	Unlike other forms of RCU, it -is- permissible to block in an
 	SRCU read-side critical section (demarked by srcu_read_lock()
@@ -381,8 +377,7 @@ over a rather long period of time, but improvements are always welcome!
 
 	SRCU's expedited primitive (synchronize_srcu_expedited())
 	never sends IPIs to other CPUs, so it is easier on
-	real-time workloads than is synchronize_rcu_expedited(),
-	synchronize_rcu_bh_expedited() or synchronize_sched_expedited().
+	real-time workloads than is synchronize_rcu_expedited().
 
 	Note that rcu_dereference() and rcu_assign_pointer() relate to
 	SRCU just as they do to other forms of RCU.
@@ -428,22 +423,19 @@ over a rather long period of time, but improvements are always welcome!
 	These debugging aids can help you find problems that are
 	otherwise extremely difficult to spot.
 
-17.	If you register a callback using call_rcu(), call_rcu_bh(),
-	call_rcu_sched(), or call_srcu(), and pass in a function defined
-	within a loadable module, then it in necessary to wait for
-	all pending callbacks to be invoked after the last invocation
-	and before unloading that module.  Note that it is absolutely
-	-not- sufficient to wait for a grace period!  The current (say)
-	synchronize_rcu() implementation waits only for all previous
-	callbacks registered on the CPU that synchronize_rcu() is running
-	on, but it is -not- guaranteed to wait for callbacks registered
-	on other CPUs.
+17.	If you register a callback using call_rcu() or call_srcu(),
+	and pass in a function defined within a loadable module,
+	then it in necessary to wait for all pending callbacks to
+	be invoked after the last invocation and before unloading
+	that module.  Note that it is absolutely -not- sufficient to
+	wait for a grace period!  The current (say) synchronize_rcu()
+	implementation waits only for all previous callbacks registered
+	on the CPU that synchronize_rcu() is running on, but it is -not-
+	guaranteed to wait for callbacks registered on other CPUs.
 
 	You instead need to use one of the barrier functions:
 
 	o	call_rcu() -> rcu_barrier()
-	o	call_rcu_bh() -> rcu_barrier()
-	o	call_rcu_sched() -> rcu_barrier()
 	o	call_srcu() -> srcu_barrier()
 
 	However, these barrier functions are absolutely -not- guaranteed
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 721b3e4265155..c818cf65c5a9a 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -52,10 +52,10 @@ o	If I am running on a uniprocessor kernel, which can only do one
 o	How can I see where RCU is currently used in the Linux kernel?
 
 	Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
-	"rcu_read_lock_bh", "rcu_read_unlock_bh", "call_rcu_bh",
-	"srcu_read_lock", "srcu_read_unlock", "synchronize_rcu",
-	"synchronize_net", "synchronize_srcu", and the other RCU
-	primitives.  Or grab one of the cscope databases from:
+	"rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock",
+	"srcu_read_unlock", "synchronize_rcu", "synchronize_net",
+	"synchronize_srcu", and the other RCU primitives.  Or grab one
+	of the cscope databases from:
 
 	http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html
 
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index 5d7759071a3ed..a2782df697328 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -83,16 +83,15 @@ Pseudo-code using rcu_barrier() is as follows:
    2. Execute rcu_barrier().
    3. Allow the module to be unloaded.
 
-There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier()
-functions for the other flavors of RCU, and you of course must match
-the flavor of rcu_barrier() with that of call_rcu().  If your module
-uses multiple flavors of call_rcu(), then it must also use multiple
+There is also an srcu_barrier() function for SRCU, and you of course
+must match the flavor of rcu_barrier() with that of call_rcu().  If your
+module uses multiple flavors of call_rcu(), then it must also use multiple
 flavors of rcu_barrier() when unloading that module.  For example, if
-it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on
+it uses call_rcu(), call_srcu() on srcu_struct_1, and call_srcu() on
 srcu_struct_2(), then the following three lines of code will be required
 when unloading:
 
- 1 rcu_barrier_bh();
+ 1 rcu_barrier();
  2 srcu_barrier(&srcu_struct_1);
  3 srcu_barrier(&srcu_struct_2);
 
@@ -185,12 +184,12 @@ module invokes call_rcu() from timers, you will need to first cancel all
 the timers, and only then invoke rcu_barrier() to wait for any remaining
 RCU callbacks to complete.
 
-Of course, if you module uses call_rcu_bh(), you will need to invoke
-rcu_barrier_bh() before unloading.  Similarly, if your module uses
-call_rcu_sched(), you will need to invoke rcu_barrier_sched() before
-unloading.  If your module uses call_rcu(), call_rcu_bh(), -and-
-call_rcu_sched(), then you will need to invoke each of rcu_barrier(),
-rcu_barrier_bh(), and rcu_barrier_sched().
+Of course, if you module uses call_rcu(), you will need to invoke
+rcu_barrier() before unloading.  Similarly, if your module uses
+call_srcu(), you will need to invoke srcu_barrier() before unloading,
+and on the same srcu_struct structure.  If your module uses call_rcu()
+-and- call_srcu(), then you will need to invoke rcu_barrier() -and-
+srcu_barrier().
 
 
 Implementing rcu_barrier()
@@ -223,8 +222,8 @@ shown below. Note that the final "1" in on_each_cpu()'s argument list
 ensures that all the calls to rcu_barrier_func() will have completed
 before on_each_cpu() returns. Line 9 then waits for the completion.
 
-This code was rewritten in 2008 to support rcu_barrier_bh() and
-rcu_barrier_sched() in addition to the original rcu_barrier().
+This code was rewritten in 2008 and several times thereafter, but this
+still gives the general idea.
 
 The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
 to post an RCU callback, as follows:
-- 
GitLab


From 0fa201d1618e0b39a60b1045aa1b323f9d351721 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho@tycho.ws>
Date: Tue, 29 Jan 2019 15:05:46 -0700
Subject: [PATCH 0193/1861] doc: Repair some whitespace damage

A diagram in whatisRCU.txt has space character before tabs.  This commit
therefore makes this diagram consistent with elsewhere in the document:
Use one leading tab, followed by spaces for any additional whitespace
required.

Signed-off-by: Tycho Andersen <tycho@tycho.ws>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/RCU/whatisRCU.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 1ace20815bb1c..981651a8b65d2 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -310,7 +310,7 @@ reader, updater, and reclaimer.
 
 
 	    rcu_assign_pointer()
-	    			    +--------+
+	                            +--------+
 	    +---------------------->| reader |---------+
 	    |                       +--------+         |
 	    |                           |              |
@@ -318,12 +318,12 @@ reader, updater, and reclaimer.
 	    |                           |              | rcu_read_lock()
 	    |                           |              | rcu_read_unlock()
 	    |        rcu_dereference()  |              |
-       +---------+                      |              |
-       | updater |<---------------------+              |
-       +---------+                                     V
+	    +---------+                 |              |
+	    | updater |<----------------+              |
+	    +---------+                                V
 	    |                                    +-----------+
 	    +----------------------------------->| reclaimer |
-	    				         +-----------+
+	                                         +-----------+
 	      Defer:
 	      synchronize_rcu() & call_rcu()
 
-- 
GitLab


From d1b493bbe101d85c86970f1b6c0401d4c1c473ed Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 12 Feb 2019 07:51:24 -0800
Subject: [PATCH 0194/1861] doc: Describe choice of rcu_dereference() APIs and
 __rcu usage

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/RCU/rcu_dereference.txt | 103 ++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index ab96227bad426..bf699e8cfc75c 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -351,3 +351,106 @@ garbage values.
 
 In short, rcu_dereference() is -not- optional when you are going to
 dereference the resulting pointer.
+
+
+WHICH MEMBER OF THE rcu_dereference() FAMILY SHOULD YOU USE?
+
+First, please avoid using rcu_dereference_raw() and also please avoid
+using rcu_dereference_check() and rcu_dereference_protected() with a
+second argument with a constant value of 1 (or true, for that matter).
+With that caution out of the way, here is some guidance for which
+member of the rcu_dereference() to use in various situations:
+
+1.	If the access needs to be within an RCU read-side critical
+	section, use rcu_dereference().  With the new consolidated
+	RCU flavors, an RCU read-side critical section is entered
+	using rcu_read_lock(), anything that disables bottom halves,
+	anything that disables interrupts, or anything that disables
+	preemption.
+
+2.	If the access might be within an RCU read-side critical section
+	on the one hand, or protected by (say) my_lock on the other,
+	use rcu_dereference_check(), for example:
+
+		p1 = rcu_dereference_check(p->rcu_protected_pointer,
+					   lockdep_is_held(&my_lock));
+
+
+3.	If the access might be within an RCU read-side critical section
+	on the one hand, or protected by either my_lock or your_lock on
+	the other, again use rcu_dereference_check(), for example:
+
+		p1 = rcu_dereference_check(p->rcu_protected_pointer,
+					   lockdep_is_held(&my_lock) ||
+					   lockdep_is_held(&your_lock));
+
+4.	If the access is on the update side, so that it is always protected
+	by my_lock, use rcu_dereference_protected():
+
+		p1 = rcu_dereference_protected(p->rcu_protected_pointer,
+					       lockdep_is_held(&my_lock));
+
+	This can be extended to handle multiple locks as in #3 above,
+	and both can be extended to check other conditions as well.
+
+5.	If the protection is supplied by the caller, and is thus unknown
+	to this code, that is the rare case when rcu_dereference_raw()
+	is appropriate.  In addition, rcu_dereference_raw() might be
+	appropriate when the lockdep expression would be excessively
+	complex, except that a better approach in that case might be to
+	take a long hard look at your synchronization design.  Still,
+	there are data-locking cases where any one of a very large number
+	of locks or reference counters suffices to protect the pointer,
+	so rcu_dereference_raw() does have its place.
+
+	However, its place is probably quite a bit smaller than one
+	might expect given the number of uses in the current kernel.
+	Ditto for its synonym, rcu_dereference_check( ... , 1), and
+	its close relative, rcu_dereference_protected(... , 1).
+
+
+SPARSE CHECKING OF RCU-PROTECTED POINTERS
+
+The sparse static-analysis tool checks for direct access to RCU-protected
+pointers, which can result in "interesting" bugs due to compiler
+optimizations involving invented loads and perhaps also load tearing.
+For example, suppose someone mistakenly does something like this:
+
+	p = q->rcu_protected_pointer;
+	do_something_with(p->a);
+	do_something_else_with(p->b);
+
+If register pressure is high, the compiler might optimize "p" out
+of existence, transforming the code to something like this:
+
+	do_something_with(q->rcu_protected_pointer->a);
+	do_something_else_with(q->rcu_protected_pointer->b);
+
+This could fatally disappoint your code if q->rcu_protected_pointer
+changed in the meantime.  Nor is this a theoretical problem:  Exactly
+this sort of bug cost Paul E. McKenney (and several of his innocent
+colleagues) a three-day weekend back in the early 1990s.
+
+Load tearing could of course result in dereferencing a mashup of a pair
+of pointers, which also might fatally disappoint your code.
+
+These problems could have been avoided simply by making the code instead
+read as follows:
+
+	p = rcu_dereference(q->rcu_protected_pointer);
+	do_something_with(p->a);
+	do_something_else_with(p->b);
+
+Unfortunately, these sorts of bugs can be extremely hard to spot during
+review.  This is where the sparse tool comes into play, along with the
+"__rcu" marker.  If you mark a pointer declaration, whether in a structure
+or as a formal parameter, with "__rcu", which tells sparse to complain if
+this pointer is accessed directly.  It will also cause sparse to complain
+if a pointer not marked with "__rcu" is accessed using rcu_dereference()
+and friends.  For example, ->rcu_protected_pointer might be declared as
+follows:
+
+	struct foo __rcu *rcu_protected_pointer;
+
+Use of "__rcu" is opt-in.  If you choose not to use it, then you should
+ignore the sparse warnings.
-- 
GitLab


From 884b429ae66758bd2e006dc5fd56757e0fde896d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 6 Mar 2019 11:24:35 -0800
Subject: [PATCH 0195/1861] doc: Fix typos and otherwise modernize
 checklist.txt

This commit fixes some issues with Documentation/RCU/checklist.txt.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/RCU/checklist.txt | 43 +++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index fcc59fea5cd4f..e98ff261a438b 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -318,7 +318,7 @@ over a rather long period of time, but improvements are always welcome!
 
 11.	Any lock acquired by an RCU callback must be acquired elsewhere
 	with softirq disabled, e.g., via spin_lock_irqsave(),
-	spin_lock_bh(), etc.  Failing to disable irq on a given
+	spin_lock_bh(), etc.  Failing to disable softirq on a given
 	acquisition of that lock will result in deadlock as soon as
 	the RCU softirq handler happens to run your RCU callback while
 	interrupting that acquisition's critical section.
@@ -331,13 +331,16 @@ over a rather long period of time, but improvements are always welcome!
 	must use whatever locking or other synchronization is required
 	to safely access and/or modify that data structure.
 
-	RCU callbacks are -usually- executed on the same CPU that executed
-	the corresponding call_rcu() or call_srcu().  but are by -no-
-	means guaranteed to be.  For example, if a given CPU goes offline
-	while having an RCU callback pending, then that RCU callback
-	will execute on some surviving CPU.  (If this was not the case,
-	a self-spawning RCU callback would prevent the victim CPU from
-	ever going offline.)
+	Do not assume that RCU callbacks will be executed on the same
+	CPU that executed the corresponding call_rcu() or call_srcu().
+	For example, if a given CPU goes offline while having an RCU
+	callback pending, then that RCU callback will execute on some
+	surviving CPU.	(If this was not the case, a self-spawning RCU
+	callback would prevent the victim CPU from ever going offline.)
+	Furthermore, CPUs designated by rcu_nocbs= might well -always-
+	have their RCU callbacks executed on some other CPUs, in fact,
+	for some  real-time workloads, this is the whole point of using
+	the rcu_nocbs= kernel boot parameter.
 
 13.	Unlike other forms of RCU, it -is- permissible to block in an
 	SRCU read-side critical section (demarked by srcu_read_lock()
@@ -379,8 +382,9 @@ over a rather long period of time, but improvements are always welcome!
 	never sends IPIs to other CPUs, so it is easier on
 	real-time workloads than is synchronize_rcu_expedited().
 
-	Note that rcu_dereference() and rcu_assign_pointer() relate to
-	SRCU just as they do to other forms of RCU.
+	Note that rcu_assign_pointer() relates to SRCU just as it does to
+	other forms of RCU, but instead of rcu_dereference() you should
+	use srcu_dereference() in order to avoid lockdep splats.
 
 14.	The whole point of call_rcu(), synchronize_rcu(), and friends
 	is to wait until all pre-existing readers have finished before
@@ -400,6 +404,9 @@ over a rather long period of time, but improvements are always welcome!
 	read-side critical sections.  It is the responsibility of the
 	RCU update-side primitives to deal with this.
 
+	For SRCU readers, you can use smp_mb__after_srcu_read_unlock()
+	immediately after an srcu_read_unlock() to get a full barrier.
+
 16.	Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
 	__rcu sparse checks to validate your RCU code.	These can help
 	find problems as follows:
@@ -423,15 +430,15 @@ over a rather long period of time, but improvements are always welcome!
 	These debugging aids can help you find problems that are
 	otherwise extremely difficult to spot.
 
-17.	If you register a callback using call_rcu() or call_srcu(),
-	and pass in a function defined within a loadable module,
-	then it in necessary to wait for all pending callbacks to
-	be invoked after the last invocation and before unloading
-	that module.  Note that it is absolutely -not- sufficient to
-	wait for a grace period!  The current (say) synchronize_rcu()
-	implementation waits only for all previous callbacks registered
-	on the CPU that synchronize_rcu() is running on, but it is -not-
+17.	If you register a callback using call_rcu() or call_srcu(), and
+	pass in a function defined within a loadable module, then it in
+	necessary to wait for all pending callbacks to be invoked after
+	the last invocation and before unloading that module.  Note that
+	it is absolutely -not- sufficient to wait for a grace period!
+	The current (say) synchronize_rcu() implementation is -not-
 	guaranteed to wait for callbacks registered on other CPUs.
+	Or even on the current CPU if that CPU recently went offline
+	and came back online.
 
 	You instead need to use one of the barrier functions:
 
-- 
GitLab


From e85e6a21b2b5f31148cc3f2e785262b37c3e1ec7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 10 Jan 2019 15:30:15 -0800
Subject: [PATCH 0196/1861] rcu: Unconditionally expedite during
 suspend/hibernate

The rcu_pm_notify() function refuses to switch to/from expedited grace
periods on systems with more than 256 CPUs due to the serialized
initialization of expedited grace periods.  However, expedited grace
periods are now initialized in parallel, removing this concern.
This commit therefore removes the checks from rcu_pm_notify(), so that
expedited grace periods are used unconditionally during suspend/resume
and hibernate/wake operations.

As always, real-time workloads wishing to completely avoid expedited
grace periods can use the rcupdate.rcu_normal= kernel parameter.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index acd6ccf56faf9..95e3250b7b6e9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3559,13 +3559,11 @@ static int rcu_pm_notify(struct notifier_block *self,
 	switch (action) {
 	case PM_HIBERNATION_PREPARE:
 	case PM_SUSPEND_PREPARE:
-		if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
-			rcu_expedite_gp();
+		rcu_expedite_gp();
 		break;
 	case PM_POST_HIBERNATION:
 	case PM_POST_SUSPEND:
-		if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
-			rcu_unexpedite_gp();
+		rcu_unexpedite_gp();
 		break;
 	default:
 		break;
-- 
GitLab


From 671a63517cf983ad8eaa324167165cef245ab744 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Sat, 19 Jan 2019 11:14:18 -0500
Subject: [PATCH 0197/1861] rcu: Avoid unnecessary softirq when system is idle

When there are no callbacks pending on an idle system, I noticed that
RCU softirq is continuously firing. During this the cpu_no_qs is set to
false, and core_needs_qs is set to true indefinitely. This causes
rcu_process_callbacks to be repeatedly called, even though the node
corresponding to the CPU has that CPU's mask bit cleared and the system
is idle. I believe the race is when such mask clearing is done during
idle CPU scan of the quiescent state forcing stage in the kthread
instead of the softirq. Since the rnp mask is cleared, but the flags on
the CPU's rdp are not cleared, the CPU thinks it still needs to report
to core RCU.

Cure this by clearing the core_needs_qs flag when the CPU detects that
its node is already updated which will avoid the unwanted softirq raises
to the benefit of real-time systems.

Test: Ran rcutorture for various tree RCU configs.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 95e3250b7b6e9..2f78a115d34cd 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2296,6 +2296,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
 	}
 	mask = rdp->grpmask;
 	if ((rnp->qsmask & mask) == 0) {
+		rdp->core_needs_qs = false;
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	} else {
 		rdp->core_needs_qs = false;
-- 
GitLab


From 18d7e40679ef574e428f893101be1c0035e95ee3 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 24 Jan 2019 21:14:37 +0300
Subject: [PATCH 0198/1861] rcu: rcu_qs -- Use raise_softirq_irqoff to not save
 irqs twice

The rcu_qs is disabling IRQs by self so no need to do the same in raise_softirq
but instead we can save some cycles using raise_softirq_irqoff directly.

CC: Paul E. McKenney <paulmck@linux.ibm.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tiny.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 911bd9076d435..477b4eb44af5c 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -52,7 +52,7 @@ void rcu_qs(void)
 	local_irq_save(flags);
 	if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
 		rcu_ctrlblk.donetail = rcu_ctrlblk.curtail;
-		raise_softirq(RCU_SOFTIRQ);
+		raise_softirq_irqoff(RCU_SOFTIRQ);
 	}
 	local_irq_restore(flags);
 }
-- 
GitLab


From 884157cef0acf05648fe921d80c680afababb428 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Mon, 11 Feb 2019 07:21:29 -0800
Subject: [PATCH 0199/1861] rcu: Make exit_rcu() handle non-preempted RCU
 readers

The purpose of exit_rcu() is to handle cases where buggy code causes a
task to exit within an RCU read-side critical section.  It currently
does that in the case where said RCU read-side critical section was
preempted at least once, but fails to handle cases where preemption did
not occur.  This case needs to be handled because otherwise the final
context switch away from the exiting task will incorrectly behave as if
task exit were instead a preemption of an RCU read-side critical section,
and will therefore queue the exiting task.  The exiting task will have
exited, and thus won't ever execute rcu_read_unlock(), which means that
it will remain queued forever, blocking all subsequent grace periods,
and eventually resulting in OOM.

Although this is arguably better than letting grace periods proceed
and having a later rcu_read_unlock() access the now-freed task
structure that once belonged to the exiting tasks, it would obviously
be better to correctly handle this case.  This commit therefore sets
->rcu_read_lock_nesting to 1 in that case, so that the subsequence call
to __rcu_read_unlock() causes the exiting task to exit its dangling RCU
read-side critical section.

Note that deferred quiescent states need not be considered.  The reason
is that removing the task from the ->blkd_tasks[] list in the call to
rcu_preempt_deferred_qs() handles the per-task component of any deferred
quiescent state, and all other components of any deferred quiescent state
are associated with the CPU, which isn't going anywhere until some later
CPU-hotplug operation, which will report any remaining deferred quiescent
states from within the rcu_report_dead() function.

Note also that negative values of ->rcu_read_lock_nesting need not be
considered.  First, these won't show up in exit_rcu() unless there is
a serious bug in RCU, and second, setting ->rcu_read_lock_nesting sets
the state so that the RCU read-side critical section will be exited
normally.

Again, this code has no effect unless there has been some prior bug
that prevents a task from leaving an RCU read-side critical section
before exiting.  Furthermore, there have been no reports of the bug
fixed by this commit appearing in production.  This commit is therefore
absolutely -not- recommended for backporting to -stable.

Reported-by: ABHISHEK DUBEY <dabhishek@iisc.ac.in>
Reported-by: BHARATH Y MOURYA <bharathm@iisc.ac.in>
Reported-by: Aravinda Prasad <aravinda@iisc.ac.in>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Tested-by: ABHISHEK DUBEY <dabhishek@iisc.ac.in>
---
 kernel/rcu/tree_plugin.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 97dba50f6fb24..d408661d5fb73 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -804,19 +804,25 @@ static void rcu_flavor_sched_clock_irq(int user)
 
 /*
  * Check for a task exiting while in a preemptible-RCU read-side
- * critical section, clean up if so.  No need to issue warnings,
- * as debug_check_no_locks_held() already does this if lockdep
- * is enabled.
+ * critical section, clean up if so.  No need to issue warnings, as
+ * debug_check_no_locks_held() already does this if lockdep is enabled.
+ * Besides, if this function does anything other than just immediately
+ * return, there was a bug of some sort.  Spewing warnings from this
+ * function is like as not to simply obscure important prior warnings.
  */
 void exit_rcu(void)
 {
 	struct task_struct *t = current;
 
-	if (likely(list_empty(&current->rcu_node_entry)))
+	if (unlikely(!list_empty(&current->rcu_node_entry))) {
+		t->rcu_read_lock_nesting = 1;
+		barrier();
+		t->rcu_read_unlock_special.b.blocked = true;
+	} else if (unlikely(t->rcu_read_lock_nesting)) {
+		t->rcu_read_lock_nesting = 1;
+	} else {
 		return;
-	t->rcu_read_lock_nesting = 1;
-	barrier();
-	t->rcu_read_unlock_special.b.blocked = true;
+	}
 	__rcu_read_unlock();
 	rcu_preempt_deferred_qs(current);
 }
-- 
GitLab


From 3ffe3d1adc0b6cfb9b24db9995a537bb0aa30a8b Mon Sep 17 00:00:00 2001
From: Liu Song <fishland@aliyun.com>
Date: Thu, 21 Feb 2019 22:13:27 +0800
Subject: [PATCH 0200/1861] rcu: Set rcutree.kthread_prio sysfs access to
 read-only

The rcutree.kthread_prio kernel-boot parameter is used to set the
priority for boost (rcub), per-CPU (rcuc), and grace-period (rcu_preempt
or rcu_sched) kthreads.  It is also used by rcutorture to check whether
it is possible to meaningfully test RCU priority boosting.  However,
all of these cases will either ignore or be confused by any post-boot
changes to rcutree.kthread_prio.

Note that the user really can change the priorities of all of these
kthreads using chrt, given sufficient privileges.  Therefore, the
read-write nature of sysfs access to rcutree.kthread_prio is thus at
best an attractive nuisance.

This commit therefore changes sysfs access to rcutree.kthread_prio to
be read-only.

Signed-off-by: Liu Song <liu.song11@zte.com.cn>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2f78a115d34cd..296131450414e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -149,7 +149,7 @@ static void sync_sched_exp_online_cleanup(int cpu);
 
 /* rcuc/rcub kthread realtime priority */
 static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
-module_param(kthread_prio, int, 0644);
+module_param(kthread_prio, int, 0444);
 
 /* Delay in jiffies for grace-period initialization delays, debug only. */
 
-- 
GitLab


From bdead419fb1ddd9df99be9f98b40631ae697a623 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 28 Feb 2019 12:31:06 -0800
Subject: [PATCH 0201/1861] MAINTAINERS: RCU now has its own email list

This commit makes rcu@vger.kernel.org be the official list for RCU-related
topics.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e17ebf70b5480..1924b52937a6c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13031,7 +13031,7 @@ M:	Josh Triplett <josh@joshtriplett.org>
 R:	Steven Rostedt <rostedt@goodmis.org>
 R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 R:	Lai Jiangshan <jiangshanlai@gmail.com>
-L:	linux-kernel@vger.kernel.org
+L:	rcu@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
 F:	tools/testing/selftests/rcutorture
@@ -13079,7 +13079,7 @@ R:	Steven Rostedt <rostedt@goodmis.org>
 R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 R:	Lai Jiangshan <jiangshanlai@gmail.com>
 R:	Joel Fernandes <joel@joelfernandes.org>
-L:	linux-kernel@vger.kernel.org
+L:	rcu@vger.kernel.org
 W:	http://www.rdrop.com/users/paulmck/RCU/
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
@@ -14234,7 +14234,7 @@ M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 M:	Josh Triplett <josh@joshtriplett.org>
 R:	Steven Rostedt <rostedt@goodmis.org>
 R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-L:	linux-kernel@vger.kernel.org
+L:	rcu@vger.kernel.org
 W:	http://www.rdrop.com/users/paulmck/RCU/
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
-- 
GitLab


From 9145b505bbbd87c6c8d4854ec411daeb33ed3deb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Sat, 16 Mar 2019 16:42:49 -0700
Subject: [PATCH 0202/1861] MAINTAINERS: Add -rcu branch name ("dev")

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 MAINTAINERS | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1924b52937a6c..a9b5270d006e6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8983,7 +8983,7 @@ R:	Daniel Lustig <dlustig@nvidia.com>
 L:	linux-kernel@vger.kernel.org
 L:	linux-arch@vger.kernel.org
 S:	Supported
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:	tools/memory-model/
 F:	Documentation/atomic_bitops.txt
 F:	Documentation/atomic_t.txt
@@ -13033,7 +13033,7 @@ R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 R:	Lai Jiangshan <jiangshanlai@gmail.com>
 L:	rcu@vger.kernel.org
 S:	Supported
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:	tools/testing/selftests/rcutorture
 
 RDC R-321X SoC
@@ -13082,7 +13082,7 @@ R:	Joel Fernandes <joel@joelfernandes.org>
 L:	rcu@vger.kernel.org
 W:	http://www.rdrop.com/users/paulmck/RCU/
 S:	Supported
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:	Documentation/RCU/
 X:	Documentation/RCU/torture.txt
 F:	include/linux/rcu*
@@ -14237,7 +14237,7 @@ R:	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 L:	rcu@vger.kernel.org
 W:	http://www.rdrop.com/users/paulmck/RCU/
 S:	Supported
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:	include/linux/srcu*.h
 F:	kernel/rcu/srcu*.c
 
@@ -15684,7 +15684,7 @@ M:	"Paul E. McKenney" <paulmck@linux.ibm.com>
 M:	Josh Triplett <josh@joshtriplett.org>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:	Documentation/RCU/torture.txt
 F:	kernel/torture.c
 F:	kernel/rcu/rcutorture.c
-- 
GitLab


From b2eb85b49a576515fb845cb12568b173c2bedffc Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Sat, 2 Mar 2019 17:25:19 +0900
Subject: [PATCH 0203/1861] rcu: Move common code out of if-else block

As the result of recent addition of "rdp->core_needs_qs = false;" in
the "if" block, now both branches of the if-else have the same
assignment.

Factor it out and reduce line count.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Acked-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/tree.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 296131450414e..5aefd36ac648a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2295,12 +2295,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
 		return;
 	}
 	mask = rdp->grpmask;
+	rdp->core_needs_qs = false;
 	if ((rnp->qsmask & mask) == 0) {
-		rdp->core_needs_qs = false;
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	} else {
-		rdp->core_needs_qs = false;
-
 		/*
 		 * This GP can't end until cpu checks in, so all of our
 		 * callbacks can be processed during the next GP.
-- 
GitLab


From da8739f23fadf05809c6c37c327367b229467045 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 5 Mar 2019 15:28:19 -0800
Subject: [PATCH 0204/1861] rcu: Allow rcu_nocbs= to specify all CPUs

Currently, the rcu_nocbs= kernel boot parameter requires that a specific
list of CPUs be specified, and has no way to say "all of them".
As noted by user RavFX in a comment to Phoronix topic 1002538, this
is an inconvenient side effect of the removal of the RCU_NOCB_CPU_ALL
Kconfig option.  This commit therefore enables the rcu_nocbs= kernel boot
parameter to be given the string "all", as in "rcu_nocbs=all" to specify
that all CPUs on the system are to have their RCU callbacks offloaded.

Another approach would be to make cpulist_parse() check for "all", but
there are uses of cpulist_parse() that do other checking, which could
conflict with an "all".  This commit therefore focuses on the specific
use of cpulist_parse() in rcu_nocb_setup().

Just a note to other people who would like changes to Linux-kernel RCU:
If you send your requests to me directly, they might get fixed somewhat
faster.  RavFX's comment was posted on January 22, 2018 and I first saw
it on March 5, 2019.  And the only reason that I found it -at- -all- was
that I was looking for projects using RCU, and my search engine showed
me that Phoronix comment quite by accident.  Your choice, though!  ;-)

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 4 +++-
 kernel/rcu/tree_plugin.h                        | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b8ee90bb6447..d377a2166b79c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3623,7 +3623,9 @@
 				see CONFIG_RAS_CEC help text.
 
 	rcu_nocbs=	[KNL]
-			The argument is a cpu list, as described above.
+			The argument is a cpu list, as described above,
+			except that the string "all" can be used to
+			specify every CPU on the system.
 
 			In kernels built with CONFIG_RCU_NOCB_CPU=y, set
 			the specified list of CPUs to be no-callback CPUs.
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index d408661d5fb73..ed4a6dabf31d0 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1776,7 +1776,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)
 static int __init rcu_nocb_setup(char *str)
 {
 	alloc_bootmem_cpumask_var(&rcu_nocb_mask);
-	cpulist_parse(str, rcu_nocb_mask);
+	if (!strcasecmp(str, "all"))
+		cpumask_setall(rcu_nocb_mask);
+	else
+		cpulist_parse(str, rcu_nocb_mask);
 	return 1;
 }
 __setup("rcu_nocbs=", rcu_nocb_setup);
-- 
GitLab


From 497e42600b69aca1b799c840d2cfc7ad60bb8017 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 6 Mar 2019 14:47:56 -0800
Subject: [PATCH 0205/1861] rcu: Report error for bad rcu_nocbs= parameter
 values

This commit prints a console message when cpulist_parse() reports a
bad list of CPUs, and sets all CPUs' bits in that case.  The reason for
setting all CPUs' bits is that this is the safe(r) choice for real-time
workloads, which would normally be the ones using the rcu_nocbs= kernel
boot parameter.  Either way, later RCU console log messages list the
actual set of CPUs whose RCU callbacks will be offloaded.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree_plugin.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index ed4a6dabf31d0..f0aeb7416dccf 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1772,14 +1772,22 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)
  */
 
 
-/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
+/*
+ * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
+ * The string after the "rcu_nocbs=" is either "all" for all CPUs, or a
+ * comma-separated list of CPUs and/or CPU ranges.  If an invalid list is
+ * given, a warning is emitted and all CPUs are offloaded.
+ */
 static int __init rcu_nocb_setup(char *str)
 {
 	alloc_bootmem_cpumask_var(&rcu_nocb_mask);
 	if (!strcasecmp(str, "all"))
 		cpumask_setall(rcu_nocb_mask);
 	else
-		cpulist_parse(str, rcu_nocb_mask);
+		if (cpulist_parse(str, rcu_nocb_mask)) {
+			pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
+			cpumask_setall(rcu_nocb_mask);
+		}
 	return 1;
 }
 __setup("rcu_nocbs=", rcu_nocb_setup);
-- 
GitLab


From 0f58d2ac2c87d27006c2b610668ebc4ff1f7c3ba Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Fri, 8 Mar 2019 15:16:18 +0530
Subject: [PATCH 0206/1861] rcu: Fix self-wakeups for grace-period kthread

The current rcu_gp_kthread_wake() function uses in_interrupt()
and thus does a self-wakeup from all interrupt contexts, including
the pointless case where the GP kthread happens to be running with
bottom halves disabled, along with the impossible case where the GP
kthread is running within an NMI handler (you are not supposed to invoke
rcu_gp_kthread_wake() from within an NMI handler.  This commit therefore
replaces the in_interrupt() with in_irq(), so that the self-wakeups
happen only from handlers for hardware interrupts and softirqs.
This also makes the code match the comment.

Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5aefd36ac648a..139fa1f5c5378 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1585,7 +1585,7 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
 static void rcu_gp_kthread_wake(void)
 {
 	if ((current == rcu_state.gp_kthread &&
-	     !in_interrupt() && !in_serving_softirq()) ||
+	     !in_irq() && !in_serving_softirq()) ||
 	    !READ_ONCE(rcu_state.gp_flags) ||
 	    !rcu_state.gp_kthread)
 		return;
-- 
GitLab


From 6973032a602ee678c98644a30d57ebf9c72dd6d3 Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Mon, 11 Mar 2019 15:16:11 +0530
Subject: [PATCH 0207/1861] rcu: Default jiffies_to_sched_qs to
 jiffies_till_sched_qs

The current code only calls adjust_jiffies_till_sched_qs() if
jiffies_till_sched_qs is left at its default value, so when the
jiffies_till_sched_qs kernel-boot parameter actually is specified,
jiffies_to_sched_qs will be left with the value zero, which
will result in useless slowdowns of cond_resched().  This commit
therefore changes rcu_init_geometry() to unconditionally invoke
adjust_jiffies_till_sched_qs(), which ensures that jiffies_to_sched_qs
will be initialized in all cases, thus maintaining good cond_resched()
performance.

Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 139fa1f5c5378..466299c3d2da6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3739,8 +3739,7 @@ static void __init rcu_init_geometry(void)
 		jiffies_till_first_fqs = d;
 	if (jiffies_till_next_fqs == ULONG_MAX)
 		jiffies_till_next_fqs = d;
-	if (jiffies_till_sched_qs == ULONG_MAX)
-		adjust_jiffies_till_sched_qs();
+	adjust_jiffies_till_sched_qs();
 
 	/* If the compile-time values are accurate, just leave. */
 	if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
-- 
GitLab


From b699cce1604e828f19c39845252626eb78cdf38a Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Mon, 11 Mar 2019 17:28:03 +0530
Subject: [PATCH 0208/1861] rcu: Do a single rhp->func read in
 rcu_head_after_call_rcu()

The rcu_head_after_call_rcu() function reads the rhp->func pointer twice,
which can result in a false-positive WARN_ON_ONCE() if the callback
were passed to call_rcu() between the two reads.  Although racing
rcu_head_after_call_rcu() with call_rcu() is to be a dubious use case
(the return value is not reliable in that case), intermittent and
irreproducible warnings are also quite dubious.  This commit therefore
uses a single READ_ONCE() to pick up the value of rhp->func once, then
tests that value twice, thus guaranteeing consistent processing within
rcu_head_after_call_rcu()().

Neverthless, racing rcu_head_after_call_rcu() with call_rcu() is still
a dubious use case.

Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
[ paulmck: Add blank line after declaration per checkpatch.pl. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcupdate.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6cdb1db776cf9..922bb68488133 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -878,9 +878,11 @@ static inline void rcu_head_init(struct rcu_head *rhp)
 static inline bool
 rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
 {
-	if (READ_ONCE(rhp->func) == f)
+	rcu_callback_t func = READ_ONCE(rhp->func);
+
+	if (func == f)
 		return true;
-	WARN_ON_ONCE(READ_ONCE(rhp->func) != (rcu_callback_t)~0L);
+	WARN_ON_ONCE(func != (rcu_callback_t)~0L);
 	return false;
 }
 
-- 
GitLab


From 85f2b60c4321b088ba08ec9a05b8a7b68e4ada2a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Mon, 11 Mar 2019 15:45:13 -0700
Subject: [PATCH 0209/1861] rcu: Update jiffies_to_sched_qs and
 adjust_jiffies_till_sched_qs() comments

This commit better documents the jiffies_to_sched_qs default-value
strategy used by adjust_jiffies_till_sched_qs()

Reported-by: Joel Fernandes <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 466299c3d2da6..e117732bcd5d7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -406,7 +406,7 @@ static bool rcu_kick_kthreads;
  */
 static ulong jiffies_till_sched_qs = ULONG_MAX;
 module_param(jiffies_till_sched_qs, ulong, 0444);
-static ulong jiffies_to_sched_qs; /* Adjusted version of above if not default */
+static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */
 module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
 
 /*
@@ -424,6 +424,7 @@ static void adjust_jiffies_till_sched_qs(void)
 		WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
 		return;
 	}
+	/* Otherwise, set to third fqs scan, but bound below on large system. */
 	j = READ_ONCE(jiffies_till_first_fqs) +
 		      2 * READ_ONCE(jiffies_till_next_fqs);
 	if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
-- 
GitLab


From 5d8a752e31aaa4c9703f201956a40b45ed791217 Mon Sep 17 00:00:00 2001
From: Zhouyi Zhou <zhouzhouyi@gmail.com>
Date: Wed, 20 Mar 2019 03:33:00 +0000
Subject: [PATCH 0210/1861] rcu: Fix force_qs_rnp() header comment

Previously, threads blocked on offlining CPUS were migrated to the
root rcu_node structure, thus requiring RCU priority boosting on this
structure.  However, since commit d19fb8d1f3f6 ("rcu: Don't migrate
blocked tasks even if all corresponding CPUs offline"), RCU does not
migrate blocked tasks.  Consequently, RCU no longer does RCU priority
boosting on the root rcu_node structure as of commit 1be0085b515e ("rcu:
Don't initiate RCU priority boosting on root rcu_node").

This commit therefore brings comments for the force_qs_rnp() function's
header comment in line with this new no-root-boosting reality.

Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
[ paulmck: Also remove obsolete comment on suppressing new grace periods. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e117732bcd5d7..abc8512ceb5f4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2548,11 +2548,11 @@ void rcu_sched_clock_irq(int user)
 }
 
 /*
- * Scan the leaf rcu_node structures, processing dyntick state for any that
- * have not yet encountered a quiescent state, using the function specified.
- * Also initiate boosting for any threads blocked on the root rcu_node.
- *
- * The caller must have suppressed start of new grace periods.
+ * Scan the leaf rcu_node structures.  For each structure on which all
+ * CPUs have reported a quiescent state and on which there are tasks
+ * blocking the current grace period, initiate RCU priority boosting.
+ * Otherwise, invoke the specified function to check dyntick state for
+ * each CPU that has not yet reported a quiescent state.
  */
 static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
 {
-- 
GitLab


From a2badefa8574a7a424a72f67a3bd43248141f76a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 21 Mar 2019 16:29:50 -0700
Subject: [PATCH 0211/1861] rcu: Eliminate redundant NULL-pointer check

Because rcu_wake_cond() checks for a null task_struct pointer, there is
no need for its callers to do so.  This commit eliminates the redundant
check.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree_plugin.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index f0aeb7416dccf..81d3cd821891b 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1191,8 +1191,6 @@ static int rcu_boost_kthread(void *arg)
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	__releases(rnp->lock)
 {
-	struct task_struct *t;
-
 	raw_lockdep_assert_held_rcu_node(rnp);
 	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -1206,9 +1204,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 		if (rnp->exp_tasks == NULL)
 			rnp->boost_tasks = rnp->gp_tasks;
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-		t = rnp->boost_kthread_task;
-		if (t)
-			rcu_wake_cond(t, rnp->boost_kthread_status);
+		rcu_wake_cond(rnp->boost_kthread_task,
+			      rnp->boost_kthread_status);
 	} else {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
-- 
GitLab


From f1a98045abd824df833354b309b5fa64ff95f792 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Sat, 23 Mar 2019 09:19:12 -0700
Subject: [PATCH 0212/1861] rcu: Fix typo in tree_exp.h comment

This commit changes a rcu_exp_handler() comment from rcu_preempt_defer_qs()
to rcu_preempt_deferred_qs() in order to better match reality.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree_exp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 4c2a0189e7489..ec4fb93a5dbe6 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -648,7 +648,7 @@ static void rcu_exp_handler(void *unused)
 	 *
 	 * If the CPU is fully enabled (or if some buggy RCU-preempt
 	 * read-side critical section is being used from idle), just
-	 * invoke rcu_preempt_defer_qs() to immediately report the
+	 * invoke rcu_preempt_deferred_qs() to immediately report the
 	 * quiescent state.  We cannot use rcu_read_unlock_special()
 	 * because we are in an interrupt handler, which will cause that
 	 * function to take an early exit without doing anything.
-- 
GitLab


From add0d37b4f1e77de7d170ece43c8d765572a1eab Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 26 Mar 2019 10:22:22 -0700
Subject: [PATCH 0213/1861] rcu: Correct READ_ONCE()/WRITE_ONCE() for
 ->rcu_read_unlock_special

The task_struct structure's ->rcu_read_unlock_special field is only ever
read or written by the owning task, but it is accessed both at process
and interrupt levels.  It may therefore be accessed using plain reads
and writes while interrupts are disabled, but must be accessed using
READ_ONCE() and WRITE_ONCE() or better otherwise.  This commit makes a
few adjustments to align with this discipline.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree_exp.h    | 2 +-
 kernel/rcu/tree_plugin.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index ec4fb93a5dbe6..1ee0782213b82 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -633,7 +633,7 @@ static void rcu_exp_handler(void *unused)
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->expmask & rdp->grpmask) {
 			rdp->deferred_qs = true;
-			WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true);
+			t->rcu_read_unlock_special.b.exp_hint = true;
 		}
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 81d3cd821891b..6ddb3c05e88f0 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -285,7 +285,7 @@ static void rcu_qs(void)
 				       TPS("cpuqs"));
 		__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
 		barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */
-		current->rcu_read_unlock_special.b.need_qs = false;
+		WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
 	}
 }
 
@@ -817,7 +817,7 @@ void exit_rcu(void)
 	if (unlikely(!list_empty(&current->rcu_node_entry))) {
 		t->rcu_read_lock_nesting = 1;
 		barrier();
-		t->rcu_read_unlock_special.b.blocked = true;
+		WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
 	} else if (unlikely(t->rcu_read_lock_nesting)) {
 		t->rcu_read_lock_nesting = 1;
 	} else {
-- 
GitLab


From 5cdfd174ea6c2dc1d331b61bdc9572698658600a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 12 Feb 2019 10:44:33 -0800
Subject: [PATCH 0214/1861] srcu: Check for in-flight callbacks in
 _cleanup_srcu_struct()

If someone fails to drain the corresponding SRCU callbacks (for
example, by failing to invoke srcu_barrier()) before invoking either
cleanup_srcu_struct() or cleanup_srcu_struct_quiesced(), the resulting
diagnostic is an ambiguous use-after-free diagnostic, and even then
only if you are running something like KASAN.  This commit therefore
improves SRCU diagnostics by adding checks for in-flight callbacks at
_cleanup_srcu_struct() time.

Note that these diagnostics can still be defeated, for example, by
invoking call_srcu() concurrently with cleanup_srcu_struct().  Which is
a really bad idea, but sometimes all too easy to do.  But even then,
these diagnostics have at least some probability of catching the problem.

Reported-by: Sagi Grimberg <sagi@grimberg.me>
Reported-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Tested-by: Bart Van Assche <bvanassche@acm.org>
---
 kernel/rcu/srcutree.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index a60b8ba9e1aca..4f30f3ecabc1f 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -387,6 +387,8 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
 			del_timer_sync(&sdp->delay_work);
 			flush_work(&sdp->work);
 		}
+		if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
+			return; /* Forgot srcu_barrier(), so just leak it! */
 	}
 	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
 	    WARN_ON(srcu_readers_active(ssp))) {
-- 
GitLab


From f5ad3991493c69d203d42b94d32349b54c58a3f1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 13 Feb 2019 13:54:37 -0800
Subject: [PATCH 0215/1861] srcu: Remove cleanup_srcu_struct_quiesced()

The cleanup_srcu_struct_quiesced() function was added because NVME
used WQ_MEM_RECLAIM workqueues and SRCU did not, which meant that
NVME workqueues waiting on SRCU workqueues could result in deadlocks
during low-memory conditions.  However, SRCU now also has WQ_MEM_RECLAIM
workqueues, so there is no longer a potential for deadlock.  Furthermore,
it turns out to be extremely hard to use cleanup_srcu_struct_quiesced()
correctly due to the fact that SRCU callback invocation accesses the
srcu_struct structure's per-CPU data area just after callbacks are
invoked.  Therefore, the usual practice of using srcu_barrier() to wait
for callbacks to be invoked before invoking cleanup_srcu_struct_quiesced()
fails because SRCU's callback-invocation workqueue handler might be
delayed, which can result in cleanup_srcu_struct_quiesced() being invoked
(and thus freeing the per-CPU data) before the SRCU's callback-invocation
workqueue handler is finished using that per-CPU data.  Nor is this a
theoretical problem: KASAN emitted use-after-free warnings because of
this problem on actual runs.

In short, NVME can now safely invoke cleanup_srcu_struct(), which
avoids the use-after-free scenario.  And cleanup_srcu_struct_quiesced()
is quite difficult to use safely.  This commit therefore removes
cleanup_srcu_struct_quiesced(), switching its sole user back to
cleanup_srcu_struct().  This effectively reverts the following pair
of commits:

f7194ac32ca2 ("srcu: Add cleanup_srcu_struct_quiesced()")
4317228ad9b8 ("nvme: Avoid flush dependency in delete controller flow")

Reported-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/nvme/host/core.c |  2 +-
 include/linux/srcu.h     | 36 +-----------------------------------
 kernel/rcu/rcutorture.c  |  7 +------
 kernel/rcu/srcutiny.c    |  9 +++------
 kernel/rcu/srcutree.c    | 30 ++++++++++++------------------
 5 files changed, 18 insertions(+), 66 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 470601980794e..739c5b4830d70 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -388,7 +388,7 @@ static void nvme_free_ns_head(struct kref *ref)
 	nvme_mpath_remove_disk(head);
 	ida_simple_remove(&head->subsys->ns_ida, head->instance);
 	list_del_init(&head->entry);
-	cleanup_srcu_struct_quiesced(&head->srcu);
+	cleanup_srcu_struct(&head->srcu);
 	nvme_put_subsystem(head->subsys);
 	kfree(head);
 }
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index c495b2d51569e..e432cc92c73de 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -56,45 +56,11 @@ struct srcu_struct { };
 
 void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
 		void (*func)(struct rcu_head *head));
-void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced);
+void cleanup_srcu_struct(struct srcu_struct *ssp);
 int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
 void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
 void synchronize_srcu(struct srcu_struct *ssp);
 
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @ssp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
- */
-static inline void cleanup_srcu_struct(struct srcu_struct *ssp)
-{
-	_cleanup_srcu_struct(ssp, false);
-}
-
-/**
- * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
- * @ssp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.  Also,
- * all grace-period processing must have completed.
- *
- * "Completed" means that the last synchronize_srcu() and
- * synchronize_srcu_expedited() calls must have returned before the call
- * to cleanup_srcu_struct_quiesced().  It also means that the callback
- * from the last call_srcu() must have been invoked before the call to
- * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
- * with this last.  Violating these rules will get you a WARN_ON() splat
- * (with high probability, anyway), and will also cause the srcu_struct
- * to be leaked.
- */
-static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *ssp)
-{
-	_cleanup_srcu_struct(ssp, true);
-}
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 /**
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index f14d1b18a74fc..d2b226110835b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -592,12 +592,7 @@ static void srcu_torture_init(void)
 
 static void srcu_torture_cleanup(void)
 {
-	static DEFINE_TORTURE_RANDOM(rand);
-
-	if (torture_random(&rand) & 0x800)
-		cleanup_srcu_struct(&srcu_ctld);
-	else
-		cleanup_srcu_struct_quiesced(&srcu_ctld);
+	cleanup_srcu_struct(&srcu_ctld);
 	srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
 }
 
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 5d4a39a6505a4..44d6606b83257 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -76,19 +76,16 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
  * Must invoke this after you are finished using a given srcu_struct that
  * was initialized via init_srcu_struct(), else you leak memory.
  */
-void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
+void cleanup_srcu_struct(struct srcu_struct *ssp)
 {
 	WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
-	if (quiesced)
-		WARN_ON(work_pending(&ssp->srcu_work));
-	else
-		flush_work(&ssp->srcu_work);
+	flush_work(&ssp->srcu_work);
 	WARN_ON(ssp->srcu_gp_running);
 	WARN_ON(ssp->srcu_gp_waiting);
 	WARN_ON(ssp->srcu_cb_head);
 	WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail);
 }
-EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Removes the count for the old reader from the appropriate element of
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 4f30f3ecabc1f..9b761e546de8c 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -360,8 +360,14 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 	return SRCU_INTERVAL;
 }
 
-/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
-void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @ssp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *ssp)
 {
 	int cpu;
 
@@ -369,24 +375,12 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
 		return; /* Just leak it! */
 	if (WARN_ON(srcu_readers_active(ssp)))
 		return; /* Just leak it! */
-	if (quiesced) {
-		if (WARN_ON(delayed_work_pending(&ssp->work)))
-			return; /* Just leak it! */
-	} else {
-		flush_delayed_work(&ssp->work);
-	}
+	flush_delayed_work(&ssp->work);
 	for_each_possible_cpu(cpu) {
 		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
 
-		if (quiesced) {
-			if (WARN_ON(timer_pending(&sdp->delay_work)))
-				return; /* Just leak it! */
-			if (WARN_ON(work_pending(&sdp->work)))
-				return; /* Just leak it! */
-		} else {
-			del_timer_sync(&sdp->delay_work);
-			flush_work(&sdp->work);
-		}
+		del_timer_sync(&sdp->delay_work);
+		flush_work(&sdp->work);
 		if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
 			return; /* Forgot srcu_barrier(), so just leak it! */
 	}
@@ -399,7 +393,7 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
 	free_percpu(ssp->sda);
 	ssp->sda = NULL;
 }
-EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
-- 
GitLab


From 2e018c59fe8fbc4ae1a8db1523ea0f6159f48b4b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 25 Mar 2019 13:34:01 -0500
Subject: [PATCH 0216/1861] ACPI / tables: Clean up whitespace

Cleanup some whitespace to match the rest of the file.  No functional
change intended.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/tables.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 8fccbe49612a0..4672f9ef183aa 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -240,8 +240,7 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
  * On success returns sum of all matching entries for all proc handlers.
  * Otherwise, -ENODEV or -EINVAL is returned.
  */
-static int __init
-acpi_parse_entries_array(char *id, unsigned long table_size,
+static int __init acpi_parse_entries_array(char *id, unsigned long table_size,
 		struct acpi_table_header *table_header,
 		struct acpi_subtable_proc *proc, int proc_num,
 		unsigned int max_entries)
@@ -314,8 +313,7 @@ acpi_parse_entries_array(char *id, unsigned long table_size,
 	return errs ? -EINVAL : count;
 }
 
-int __init
-acpi_table_parse_entries_array(char *id,
+int __init acpi_table_parse_entries_array(char *id,
 			 unsigned long table_size,
 			 struct acpi_subtable_proc *proc, int proc_num,
 			 unsigned int max_entries)
@@ -346,8 +344,7 @@ acpi_table_parse_entries_array(char *id,
 	return count;
 }
 
-int __init
-acpi_table_parse_entries(char *id,
+int __init acpi_table_parse_entries(char *id,
 			unsigned long table_size,
 			int entry_id,
 			acpi_tbl_entry_handler handler,
@@ -362,8 +359,7 @@ acpi_table_parse_entries(char *id,
 						max_entries);
 }
 
-int __init
-acpi_table_parse_madt(enum acpi_madt_type id,
+int __init acpi_table_parse_madt(enum acpi_madt_type id,
 		      acpi_tbl_entry_handler handler, unsigned int max_entries)
 {
 	return acpi_table_parse_entries(ACPI_SIG_MADT,
@@ -725,8 +721,7 @@ static void *amlcode __attribute__ ((weakref("AmlCode")));
 static void *dsdt_amlcode __attribute__ ((weakref("dsdt_aml_code")));
 #endif
 
-acpi_status
-acpi_os_table_override(struct acpi_table_header *existing_table,
+acpi_status acpi_os_table_override(struct acpi_table_header *existing_table,
 		       struct acpi_table_header **new_table)
 {
 	if (!existing_table || !new_table)
@@ -788,7 +783,6 @@ static int __init acpi_parse_apic_instance(char *str)
 
 	return 0;
 }
-
 early_param("acpi_apic_instance", acpi_parse_apic_instance);
 
 static int __init acpi_force_table_verification_setup(char *s)
@@ -797,7 +791,6 @@ static int __init acpi_force_table_verification_setup(char *s)
 
 	return 0;
 }
-
 early_param("acpi_force_table_verification", acpi_force_table_verification_setup);
 
 static int __init acpi_force_32bit_fadt_addr(char *s)
@@ -807,5 +800,4 @@ static int __init acpi_force_32bit_fadt_addr(char *s)
 
 	return 0;
 }
-
 early_param("acpi_force_32bit_fadt_addr", acpi_force_32bit_fadt_addr);
-- 
GitLab


From 10462d6f58fb6dbde7563e9343505d98d5bfba3d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 11 Jan 2019 16:10:57 -0800
Subject: [PATCH 0217/1861] rcu: Move RCU CPU stall-warning code out of
 update.c

The RCU CPU stall-warning code for normal grace periods is currently
scattered across three files, due to earlier Tiny RCU support for RCU
CPU stall warnings and for old Kconfig options that have long since
been retired.  Given that it is hard for the lead RCU maintainer to
find relevant stall-warning code, it would be good to consolidate it.
This commit starts this process by moving stall-warning code from
kernel/rcu/update.c to a new kernel/rcu/tree_stall.h file.

Note that the definitions of rcu_cpu_stall_suppress and
rcu_cpu_stall_timeout must remain in kernel/rcu/update.h to provide
compatibility for kernel boot parameter lists.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/rcu.h        |  1 +
 kernel/rcu/tree.c       |  1 +
 kernel/rcu/tree_stall.h | 63 +++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/update.c     | 59 +-------------------------------------
 4 files changed, 66 insertions(+), 58 deletions(-)
 create mode 100644 kernel/rcu/tree_stall.h

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index acee72c0b24b5..4b58c907b4b7f 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -233,6 +233,7 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 #ifdef CONFIG_RCU_STALL_COMMON
 
 extern int rcu_cpu_stall_suppress;
+extern int rcu_cpu_stall_timeout;
 int rcu_jiffies_till_stall_check(void);
 
 #define rcu_ftrace_dump_stall_suppress() \
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index acd6ccf56faf9..424d50ccf9e62 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3858,5 +3858,6 @@ void __init rcu_init(void)
 	srcu_init();
 }
 
+#include "tree_stall.h"
 #include "tree_exp.h"
 #include "tree_plugin.h"
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
new file mode 100644
index 0000000000000..682189f4d0832
--- /dev/null
+++ b/kernel/rcu/tree_stall.h
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * RCU CPU stall warnings for normal RCU grace periods
+ *
+ * Copyright IBM Corporation, 2019
+ *
+ * Author: Paul E. McKenney <paulmck@linux.ibm.com>
+ */
+
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA	       (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA	       0
+#endif
+
+int rcu_jiffies_till_stall_check(void)
+{
+	int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
+
+	/*
+	 * Limit check must be consistent with the Kconfig limits
+	 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
+	 */
+	if (till_stall_check < 3) {
+		WRITE_ONCE(rcu_cpu_stall_timeout, 3);
+		till_stall_check = 3;
+	} else if (till_stall_check > 300) {
+		WRITE_ONCE(rcu_cpu_stall_timeout, 300);
+		till_stall_check = 300;
+	}
+	return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
+}
+EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
+
+void rcu_sysrq_start(void)
+{
+	if (!rcu_cpu_stall_suppress)
+		rcu_cpu_stall_suppress = 2;
+}
+
+void rcu_sysrq_end(void)
+{
+	if (rcu_cpu_stall_suppress == 2)
+		rcu_cpu_stall_suppress = 0;
+}
+
+static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
+{
+	rcu_cpu_stall_suppress = 1;
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block rcu_panic_block = {
+	.notifier_call = rcu_panic,
+};
+
+static int __init check_cpu_stall_init(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
+	return 0;
+}
+early_initcall(check_cpu_stall_init);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index cbaa976c59451..c3bf44ba42e54 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -424,68 +424,11 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
 #endif
 
 #ifdef CONFIG_RCU_STALL_COMMON
-
-#ifdef CONFIG_PROVE_RCU
-#define RCU_STALL_DELAY_DELTA	       (5 * HZ)
-#else
-#define RCU_STALL_DELAY_DELTA	       0
-#endif
-
 int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
 EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
-static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
-
 module_param(rcu_cpu_stall_suppress, int, 0644);
+int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
 module_param(rcu_cpu_stall_timeout, int, 0644);
-
-int rcu_jiffies_till_stall_check(void)
-{
-	int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
-
-	/*
-	 * Limit check must be consistent with the Kconfig limits
-	 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
-	 */
-	if (till_stall_check < 3) {
-		WRITE_ONCE(rcu_cpu_stall_timeout, 3);
-		till_stall_check = 3;
-	} else if (till_stall_check > 300) {
-		WRITE_ONCE(rcu_cpu_stall_timeout, 300);
-		till_stall_check = 300;
-	}
-	return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
-}
-EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
-
-void rcu_sysrq_start(void)
-{
-	if (!rcu_cpu_stall_suppress)
-		rcu_cpu_stall_suppress = 2;
-}
-
-void rcu_sysrq_end(void)
-{
-	if (rcu_cpu_stall_suppress == 2)
-		rcu_cpu_stall_suppress = 0;
-}
-
-static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
-{
-	rcu_cpu_stall_suppress = 1;
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block rcu_panic_block = {
-	.notifier_call = rcu_panic,
-};
-
-static int __init check_cpu_stall_init(void)
-{
-	atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
-	return 0;
-}
-early_initcall(check_cpu_stall_init);
-
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
 
 #ifdef CONFIG_TASKS_RCU
-- 
GitLab


From 3fc3d1709fc75995ee09ad4f35f160cf360b397b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 11 Jan 2019 16:34:47 -0800
Subject: [PATCH 0218/1861] rcu: Move RCU CPU stall-warning code out of
 tree_plugin.h

The RCU CPU stall-warning code for normal grace periods is currently
scattered across two files, due to earlier Tiny RCU support for RCU
CPU stall warnings and for old Kconfig options that have long since
been retired.  Given that it is hard for the lead RCU maintainer to
find relevant stall-warning code, it would be good to consolidate it.
This commit continues this process by moving stall-warning code from
kernel/rcu/tree_plugin.c to a new kernel/rcu/tree_stall.h file.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree_plugin.h | 90 -------------------------------------
 kernel/rcu/tree_stall.h  | 95 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 90 deletions(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 97dba50f6fb24..7fa3bc4d481bf 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -642,79 +642,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	rcu_preempt_deferred_qs_irqrestore(t, flags);
 }
 
-/*
- * Dump detailed information for all tasks blocking the current RCU
- * grace period on the specified rcu_node structure.
- */
-static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
-{
-	unsigned long flags;
-	struct task_struct *t;
-
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	if (!rcu_preempt_blocked_readers_cgp(rnp)) {
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-		return;
-	}
-	t = list_entry(rnp->gp_tasks->prev,
-		       struct task_struct, rcu_node_entry);
-	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
-		/*
-		 * We could be printing a lot while holding a spinlock.
-		 * Avoid triggering hard lockup.
-		 */
-		touch_nmi_watchdog();
-		sched_show_task(t);
-	}
-	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-}
-
-/*
- * Dump detailed information for all tasks blocking the current RCU
- * grace period.
- */
-static void rcu_print_detail_task_stall(void)
-{
-	struct rcu_node *rnp = rcu_get_root();
-
-	rcu_print_detail_task_stall_rnp(rnp);
-	rcu_for_each_leaf_node(rnp)
-		rcu_print_detail_task_stall_rnp(rnp);
-}
-
-static void rcu_print_task_stall_begin(struct rcu_node *rnp)
-{
-	pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
-	       rnp->level, rnp->grplo, rnp->grphi);
-}
-
-static void rcu_print_task_stall_end(void)
-{
-	pr_cont("\n");
-}
-
-/*
- * Scan the current list of tasks blocked within RCU read-side critical
- * sections, printing out the tid of each.
- */
-static int rcu_print_task_stall(struct rcu_node *rnp)
-{
-	struct task_struct *t;
-	int ndetected = 0;
-
-	if (!rcu_preempt_blocked_readers_cgp(rnp))
-		return 0;
-	rcu_print_task_stall_begin(rnp);
-	t = list_entry(rnp->gp_tasks->prev,
-		       struct task_struct, rcu_node_entry);
-	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
-		pr_cont(" P%d", t->pid);
-		ndetected++;
-	}
-	rcu_print_task_stall_end();
-	return ndetected;
-}
-
 /*
  * Scan the current list of tasks blocked within RCU read-side critical
  * sections, printing out the tid of each that is blocking the current
@@ -979,23 +906,6 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 }
 static void rcu_preempt_deferred_qs(struct task_struct *t) { }
 
-/*
- * Because preemptible RCU does not exist, we never have to check for
- * tasks blocked within RCU read-side critical sections.
- */
-static void rcu_print_detail_task_stall(void)
-{
-}
-
-/*
- * Because preemptible RCU does not exist, we never have to check for
- * tasks blocked within RCU read-side critical sections.
- */
-static int rcu_print_task_stall(struct rcu_node *rnp)
-{
-	return 0;
-}
-
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections that are
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 682189f4d0832..6f5f94944f49e 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -61,3 +61,98 @@ static int __init check_cpu_stall_init(void)
 	return 0;
 }
 early_initcall(check_cpu_stall_init);
+
+#ifdef CONFIG_PREEMPT
+
+/*
+ * Dump detailed information for all tasks blocking the current RCU
+ * grace period on the specified rcu_node structure.
+ */
+static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+	unsigned long flags;
+	struct task_struct *t;
+
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	if (!rcu_preempt_blocked_readers_cgp(rnp)) {
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		return;
+	}
+	t = list_entry(rnp->gp_tasks->prev,
+		       struct task_struct, rcu_node_entry);
+	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+		/*
+		 * We could be printing a lot while holding a spinlock.
+		 * Avoid triggering hard lockup.
+		 */
+		touch_nmi_watchdog();
+		sched_show_task(t);
+	}
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+}
+
+/*
+ * Dump detailed information for all tasks blocking the current RCU
+ * grace period.
+ */
+static void rcu_print_detail_task_stall(void)
+{
+	struct rcu_node *rnp = rcu_get_root();
+
+	rcu_print_detail_task_stall_rnp(rnp);
+	rcu_for_each_leaf_node(rnp)
+		rcu_print_detail_task_stall_rnp(rnp);
+}
+
+static void rcu_print_task_stall_begin(struct rcu_node *rnp)
+{
+	pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
+	       rnp->level, rnp->grplo, rnp->grphi);
+}
+
+static void rcu_print_task_stall_end(void)
+{
+	pr_cont("\n");
+}
+
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, printing out the tid of each.
+ */
+static int rcu_print_task_stall(struct rcu_node *rnp)
+{
+	struct task_struct *t;
+	int ndetected = 0;
+
+	if (!rcu_preempt_blocked_readers_cgp(rnp))
+		return 0;
+	rcu_print_task_stall_begin(rnp);
+	t = list_entry(rnp->gp_tasks->prev,
+		       struct task_struct, rcu_node_entry);
+	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+		pr_cont(" P%d", t->pid);
+		ndetected++;
+	}
+	rcu_print_task_stall_end();
+	return ndetected;
+}
+
+#else /* #ifdef CONFIG_PREEMPT */
+
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
+static void rcu_print_detail_task_stall(void)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
+static int rcu_print_task_stall(struct rcu_node *rnp)
+{
+	return 0;
+}
+#endif /* #else #ifdef CONFIG_PREEMPT */
-- 
GitLab


From 32255d51b6ed00de2b88970ceea8db0ec3bae6f8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 11 Jan 2019 16:57:41 -0800
Subject: [PATCH 0219/1861] rcu: Move RCU CPU stall-warning code out of tree.c

This commit completes the process of consolidating the code for RCU CPU
stall warnings for normal grace periods by moving the remaining such
code from kernel/rcu/tree.c to kernel/rcu/tree_stall.h.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c       | 291 ---------------------------------------
 kernel/rcu/tree.h       |  10 +-
 kernel/rcu/tree_stall.h | 292 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 299 insertions(+), 294 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 424d50ccf9e62..001dd05f6e385 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -102,8 +102,6 @@ int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
 /* Number of rcu_nodes at specified level. */
 int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
 int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
-/* panic() on RCU Stall sysctl. */
-int sysctl_panic_on_rcu_stall __read_mostly;
 /* Commandeer a sysrq key to dump RCU's tree. */
 static bool sysrq_rcu;
 module_param(sysrq_rcu, bool, 0444);
@@ -1167,295 +1165,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	return 0;
 }
 
-static void record_gp_stall_check_time(void)
-{
-	unsigned long j = jiffies;
-	unsigned long j1;
-
-	rcu_state.gp_start = j;
-	j1 = rcu_jiffies_till_stall_check();
-	/* Record ->gp_start before ->jiffies_stall. */
-	smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
-	rcu_state.jiffies_resched = j + j1 / 2;
-	rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
-}
-
-/*
- * Complain about starvation of grace-period kthread.
- */
-static void rcu_check_gp_kthread_starvation(void)
-{
-	struct task_struct *gpk = rcu_state.gp_kthread;
-	unsigned long j;
-
-	j = jiffies - READ_ONCE(rcu_state.gp_activity);
-	if (j > 2 * HZ) {
-		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
-		       rcu_state.name, j,
-		       (long)rcu_seq_current(&rcu_state.gp_seq),
-		       READ_ONCE(rcu_state.gp_flags),
-		       gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
-		       gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
-		if (gpk) {
-			pr_err("RCU grace-period kthread stack dump:\n");
-			sched_show_task(gpk);
-			wake_up_process(gpk);
-		}
-	}
-}
-
-/*
- * Dump stacks of all tasks running on stalled CPUs.  First try using
- * NMIs, but fall back to manual remote stack tracing on architectures
- * that don't support NMI-based stack dumps.  The NMI-triggered stack
- * traces are more accurate because they are printed by the target CPU.
- */
-static void rcu_dump_cpu_stacks(void)
-{
-	int cpu;
-	unsigned long flags;
-	struct rcu_node *rnp;
-
-	rcu_for_each_leaf_node(rnp) {
-		raw_spin_lock_irqsave_rcu_node(rnp, flags);
-		for_each_leaf_node_possible_cpu(rnp, cpu)
-			if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
-				if (!trigger_single_cpu_backtrace(cpu))
-					dump_cpu_task(cpu);
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	}
-}
-
-/*
- * If too much time has passed in the current grace period, and if
- * so configured, go kick the relevant kthreads.
- */
-static void rcu_stall_kick_kthreads(void)
-{
-	unsigned long j;
-
-	if (!rcu_kick_kthreads)
-		return;
-	j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
-	if (time_after(jiffies, j) && rcu_state.gp_kthread &&
-	    (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
-		WARN_ONCE(1, "Kicking %s grace-period kthread\n",
-			  rcu_state.name);
-		rcu_ftrace_dump(DUMP_ALL);
-		wake_up_process(rcu_state.gp_kthread);
-		WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
-	}
-}
-
-static void panic_on_rcu_stall(void)
-{
-	if (sysctl_panic_on_rcu_stall)
-		panic("RCU Stall\n");
-}
-
-static void print_other_cpu_stall(unsigned long gp_seq)
-{
-	int cpu;
-	unsigned long flags;
-	unsigned long gpa;
-	unsigned long j;
-	int ndetected = 0;
-	struct rcu_node *rnp = rcu_get_root();
-	long totqlen = 0;
-
-	/* Kick and suppress, if so configured. */
-	rcu_stall_kick_kthreads();
-	if (rcu_cpu_stall_suppress)
-		return;
-
-	/*
-	 * OK, time to rat on our buddy...
-	 * See Documentation/RCU/stallwarn.txt for info on how to debug
-	 * RCU CPU stall warnings.
-	 */
-	pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name);
-	print_cpu_stall_info_begin();
-	rcu_for_each_leaf_node(rnp) {
-		raw_spin_lock_irqsave_rcu_node(rnp, flags);
-		ndetected += rcu_print_task_stall(rnp);
-		if (rnp->qsmask != 0) {
-			for_each_leaf_node_possible_cpu(rnp, cpu)
-				if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
-					print_cpu_stall_info(cpu);
-					ndetected++;
-				}
-		}
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	}
-
-	print_cpu_stall_info_end();
-	for_each_possible_cpu(cpu)
-		totqlen += rcu_get_n_cbs_cpu(cpu);
-	pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
-	       smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
-	       (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
-	if (ndetected) {
-		rcu_dump_cpu_stacks();
-
-		/* Complain about tasks blocking the grace period. */
-		rcu_print_detail_task_stall();
-	} else {
-		if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
-			pr_err("INFO: Stall ended before state dump start\n");
-		} else {
-			j = jiffies;
-			gpa = READ_ONCE(rcu_state.gp_activity);
-			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
-			       rcu_state.name, j - gpa, j, gpa,
-			       READ_ONCE(jiffies_till_next_fqs),
-			       rcu_get_root()->qsmask);
-			/* In this case, the current CPU might be at fault. */
-			sched_show_task(current);
-		}
-	}
-	/* Rewrite if needed in case of slow consoles. */
-	if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
-		WRITE_ONCE(rcu_state.jiffies_stall,
-			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-
-	rcu_check_gp_kthread_starvation();
-
-	panic_on_rcu_stall();
-
-	rcu_force_quiescent_state();  /* Kick them all. */
-}
-
-static void print_cpu_stall(void)
-{
-	int cpu;
-	unsigned long flags;
-	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
-	struct rcu_node *rnp = rcu_get_root();
-	long totqlen = 0;
-
-	/* Kick and suppress, if so configured. */
-	rcu_stall_kick_kthreads();
-	if (rcu_cpu_stall_suppress)
-		return;
-
-	/*
-	 * OK, time to rat on ourselves...
-	 * See Documentation/RCU/stallwarn.txt for info on how to debug
-	 * RCU CPU stall warnings.
-	 */
-	pr_err("INFO: %s self-detected stall on CPU", rcu_state.name);
-	print_cpu_stall_info_begin();
-	raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
-	print_cpu_stall_info(smp_processor_id());
-	raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
-	print_cpu_stall_info_end();
-	for_each_possible_cpu(cpu)
-		totqlen += rcu_get_n_cbs_cpu(cpu);
-	pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n",
-		jiffies - rcu_state.gp_start,
-		(long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
-
-	rcu_check_gp_kthread_starvation();
-
-	rcu_dump_cpu_stacks();
-
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	/* Rewrite if needed in case of slow consoles. */
-	if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
-		WRITE_ONCE(rcu_state.jiffies_stall,
-			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-
-	panic_on_rcu_stall();
-
-	/*
-	 * Attempt to revive the RCU machinery by forcing a context switch.
-	 *
-	 * A context switch would normally allow the RCU state machine to make
-	 * progress and it could be we're stuck in kernel space without context
-	 * switches for an entirely unreasonable amount of time.
-	 */
-	set_tsk_need_resched(current);
-	set_preempt_need_resched();
-}
-
-static void check_cpu_stall(struct rcu_data *rdp)
-{
-	unsigned long gs1;
-	unsigned long gs2;
-	unsigned long gps;
-	unsigned long j;
-	unsigned long jn;
-	unsigned long js;
-	struct rcu_node *rnp;
-
-	if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
-	    !rcu_gp_in_progress())
-		return;
-	rcu_stall_kick_kthreads();
-	j = jiffies;
-
-	/*
-	 * Lots of memory barriers to reject false positives.
-	 *
-	 * The idea is to pick up rcu_state.gp_seq, then
-	 * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally
-	 * another copy of rcu_state.gp_seq.  These values are updated in
-	 * the opposite order with memory barriers (or equivalent) during
-	 * grace-period initialization and cleanup.  Now, a false positive
-	 * can occur if we get an new value of rcu_state.gp_start and a old
-	 * value of rcu_state.jiffies_stall.  But given the memory barriers,
-	 * the only way that this can happen is if one grace period ends
-	 * and another starts between these two fetches.  This is detected
-	 * by comparing the second fetch of rcu_state.gp_seq with the
-	 * previous fetch from rcu_state.gp_seq.
-	 *
-	 * Given this check, comparisons of jiffies, rcu_state.jiffies_stall,
-	 * and rcu_state.gp_start suffice to forestall false positives.
-	 */
-	gs1 = READ_ONCE(rcu_state.gp_seq);
-	smp_rmb(); /* Pick up ->gp_seq first... */
-	js = READ_ONCE(rcu_state.jiffies_stall);
-	smp_rmb(); /* ...then ->jiffies_stall before the rest... */
-	gps = READ_ONCE(rcu_state.gp_start);
-	smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
-	gs2 = READ_ONCE(rcu_state.gp_seq);
-	if (gs1 != gs2 ||
-	    ULONG_CMP_LT(j, js) ||
-	    ULONG_CMP_GE(gps, js))
-		return; /* No stall or GP completed since entering function. */
-	rnp = rdp->mynode;
-	jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
-	if (rcu_gp_in_progress() &&
-	    (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
-	    cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
-
-		/* We haven't checked in, so go dump stack. */
-		print_cpu_stall();
-
-	} else if (rcu_gp_in_progress() &&
-		   ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
-		   cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
-
-		/* They had a few time units to dump stack, so complain. */
-		print_other_cpu_stall(gs2);
-	}
-}
-
-/**
- * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
- *
- * Set the stall-warning timeout way off into the future, thus preventing
- * any RCU CPU stall-warning messages from appearing in the current set of
- * RCU grace periods.
- *
- * The caller must disable hard irqs.
- */
-void rcu_cpu_stall_reset(void)
-{
-	WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
-}
-
 /* Trace-event wrapper function for trace_rcu_future_grace_period.  */
 static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 			      unsigned long gp_seq_req, const char *s)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index bb4f995f2d3f2..3c4e26fff8069 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -393,15 +393,13 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
 
 int rcu_dynticks_snap(struct rcu_data *rdp);
 
-/* Forward declarations for rcutree_plugin.h */
+/* Forward declarations for tree_plugin.h */
 static void rcu_bootup_announce(void);
 static void rcu_qs(void);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
-static void rcu_print_detail_task_stall(void);
-static int rcu_print_task_stall(struct rcu_node *rnp);
 static int rcu_print_task_exp_stall(struct rcu_node *rnp);
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
 static void rcu_flavor_sched_clock_irq(int user);
@@ -445,3 +443,9 @@ static void rcu_bind_gp_kthread(void);
 static bool rcu_nohz_full_cpu(void);
 static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
+
+/* Forward declarations for tree_stall.h */
+static void rcu_print_detail_task_stall(void);
+static int rcu_print_task_stall(struct rcu_node *rnp);
+static void record_gp_stall_check_time(void);
+static void check_cpu_stall(struct rcu_data *rdp);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 6f5f94944f49e..e0e73f4933639 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -8,6 +8,9 @@
  */
 
 
+/* panic() on RCU Stall sysctl. */
+int sysctl_panic_on_rcu_stall __read_mostly;
+
 #ifdef CONFIG_PROVE_RCU
 #define RCU_STALL_DELAY_DELTA	       (5 * HZ)
 #else
@@ -156,3 +159,292 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 	return 0;
 }
 #endif /* #else #ifdef CONFIG_PREEMPT */
+
+static void record_gp_stall_check_time(void)
+{
+	unsigned long j = jiffies;
+	unsigned long j1;
+
+	rcu_state.gp_start = j;
+	j1 = rcu_jiffies_till_stall_check();
+	/* Record ->gp_start before ->jiffies_stall. */
+	smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
+	rcu_state.jiffies_resched = j + j1 / 2;
+	rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
+}
+
+/*
+ * Complain about starvation of grace-period kthread.
+ */
+static void rcu_check_gp_kthread_starvation(void)
+{
+	struct task_struct *gpk = rcu_state.gp_kthread;
+	unsigned long j;
+
+	j = jiffies - READ_ONCE(rcu_state.gp_activity);
+	if (j > 2 * HZ) {
+		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
+		       rcu_state.name, j,
+		       (long)rcu_seq_current(&rcu_state.gp_seq),
+		       READ_ONCE(rcu_state.gp_flags),
+		       gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
+		       gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
+		if (gpk) {
+			pr_err("RCU grace-period kthread stack dump:\n");
+			sched_show_task(gpk);
+			wake_up_process(gpk);
+		}
+	}
+}
+
+/*
+ * Dump stacks of all tasks running on stalled CPUs.  First try using
+ * NMIs, but fall back to manual remote stack tracing on architectures
+ * that don't support NMI-based stack dumps.  The NMI-triggered stack
+ * traces are more accurate because they are printed by the target CPU.
+ */
+static void rcu_dump_cpu_stacks(void)
+{
+	int cpu;
+	unsigned long flags;
+	struct rcu_node *rnp;
+
+	rcu_for_each_leaf_node(rnp) {
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		for_each_leaf_node_possible_cpu(rnp, cpu)
+			if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
+				if (!trigger_single_cpu_backtrace(cpu))
+					dump_cpu_task(cpu);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
+}
+
+/*
+ * If too much time has passed in the current grace period, and if
+ * so configured, go kick the relevant kthreads.
+ */
+static void rcu_stall_kick_kthreads(void)
+{
+	unsigned long j;
+
+	if (!rcu_kick_kthreads)
+		return;
+	j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
+	if (time_after(jiffies, j) && rcu_state.gp_kthread &&
+	    (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
+		WARN_ONCE(1, "Kicking %s grace-period kthread\n",
+			  rcu_state.name);
+		rcu_ftrace_dump(DUMP_ALL);
+		wake_up_process(rcu_state.gp_kthread);
+		WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
+	}
+}
+
+static void panic_on_rcu_stall(void)
+{
+	if (sysctl_panic_on_rcu_stall)
+		panic("RCU Stall\n");
+}
+
+static void print_other_cpu_stall(unsigned long gp_seq)
+{
+	int cpu;
+	unsigned long flags;
+	unsigned long gpa;
+	unsigned long j;
+	int ndetected = 0;
+	struct rcu_node *rnp = rcu_get_root();
+	long totqlen = 0;
+
+	/* Kick and suppress, if so configured. */
+	rcu_stall_kick_kthreads();
+	if (rcu_cpu_stall_suppress)
+		return;
+
+	/*
+	 * OK, time to rat on our buddy...
+	 * See Documentation/RCU/stallwarn.txt for info on how to debug
+	 * RCU CPU stall warnings.
+	 */
+	pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name);
+	print_cpu_stall_info_begin();
+	rcu_for_each_leaf_node(rnp) {
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		ndetected += rcu_print_task_stall(rnp);
+		if (rnp->qsmask != 0) {
+			for_each_leaf_node_possible_cpu(rnp, cpu)
+				if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
+					print_cpu_stall_info(cpu);
+					ndetected++;
+				}
+		}
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
+
+	print_cpu_stall_info_end();
+	for_each_possible_cpu(cpu)
+		totqlen += rcu_get_n_cbs_cpu(cpu);
+	pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
+	       smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
+	       (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+	if (ndetected) {
+		rcu_dump_cpu_stacks();
+
+		/* Complain about tasks blocking the grace period. */
+		rcu_print_detail_task_stall();
+	} else {
+		if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
+			pr_err("INFO: Stall ended before state dump start\n");
+		} else {
+			j = jiffies;
+			gpa = READ_ONCE(rcu_state.gp_activity);
+			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
+			       rcu_state.name, j - gpa, j, gpa,
+			       READ_ONCE(jiffies_till_next_fqs),
+			       rcu_get_root()->qsmask);
+			/* In this case, the current CPU might be at fault. */
+			sched_show_task(current);
+		}
+	}
+	/* Rewrite if needed in case of slow consoles. */
+	if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
+		WRITE_ONCE(rcu_state.jiffies_stall,
+			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
+
+	rcu_check_gp_kthread_starvation();
+
+	panic_on_rcu_stall();
+
+	rcu_force_quiescent_state();  /* Kick them all. */
+}
+
+static void print_cpu_stall(void)
+{
+	int cpu;
+	unsigned long flags;
+	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+	struct rcu_node *rnp = rcu_get_root();
+	long totqlen = 0;
+
+	/* Kick and suppress, if so configured. */
+	rcu_stall_kick_kthreads();
+	if (rcu_cpu_stall_suppress)
+		return;
+
+	/*
+	 * OK, time to rat on ourselves...
+	 * See Documentation/RCU/stallwarn.txt for info on how to debug
+	 * RCU CPU stall warnings.
+	 */
+	pr_err("INFO: %s self-detected stall on CPU", rcu_state.name);
+	print_cpu_stall_info_begin();
+	raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
+	print_cpu_stall_info(smp_processor_id());
+	raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
+	print_cpu_stall_info_end();
+	for_each_possible_cpu(cpu)
+		totqlen += rcu_get_n_cbs_cpu(cpu);
+	pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n",
+		jiffies - rcu_state.gp_start,
+		(long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+
+	rcu_check_gp_kthread_starvation();
+
+	rcu_dump_cpu_stacks();
+
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	/* Rewrite if needed in case of slow consoles. */
+	if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
+		WRITE_ONCE(rcu_state.jiffies_stall,
+			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
+	panic_on_rcu_stall();
+
+	/*
+	 * Attempt to revive the RCU machinery by forcing a context switch.
+	 *
+	 * A context switch would normally allow the RCU state machine to make
+	 * progress and it could be we're stuck in kernel space without context
+	 * switches for an entirely unreasonable amount of time.
+	 */
+	set_tsk_need_resched(current);
+	set_preempt_need_resched();
+}
+
+static void check_cpu_stall(struct rcu_data *rdp)
+{
+	unsigned long gs1;
+	unsigned long gs2;
+	unsigned long gps;
+	unsigned long j;
+	unsigned long jn;
+	unsigned long js;
+	struct rcu_node *rnp;
+
+	if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
+	    !rcu_gp_in_progress())
+		return;
+	rcu_stall_kick_kthreads();
+	j = jiffies;
+
+	/*
+	 * Lots of memory barriers to reject false positives.
+	 *
+	 * The idea is to pick up rcu_state.gp_seq, then
+	 * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally
+	 * another copy of rcu_state.gp_seq.  These values are updated in
+	 * the opposite order with memory barriers (or equivalent) during
+	 * grace-period initialization and cleanup.  Now, a false positive
+	 * can occur if we get an new value of rcu_state.gp_start and a old
+	 * value of rcu_state.jiffies_stall.  But given the memory barriers,
+	 * the only way that this can happen is if one grace period ends
+	 * and another starts between these two fetches.  This is detected
+	 * by comparing the second fetch of rcu_state.gp_seq with the
+	 * previous fetch from rcu_state.gp_seq.
+	 *
+	 * Given this check, comparisons of jiffies, rcu_state.jiffies_stall,
+	 * and rcu_state.gp_start suffice to forestall false positives.
+	 */
+	gs1 = READ_ONCE(rcu_state.gp_seq);
+	smp_rmb(); /* Pick up ->gp_seq first... */
+	js = READ_ONCE(rcu_state.jiffies_stall);
+	smp_rmb(); /* ...then ->jiffies_stall before the rest... */
+	gps = READ_ONCE(rcu_state.gp_start);
+	smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
+	gs2 = READ_ONCE(rcu_state.gp_seq);
+	if (gs1 != gs2 ||
+	    ULONG_CMP_LT(j, js) ||
+	    ULONG_CMP_GE(gps, js))
+		return; /* No stall or GP completed since entering function. */
+	rnp = rdp->mynode;
+	jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+	if (rcu_gp_in_progress() &&
+	    (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
+	    cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
+
+		/* We haven't checked in, so go dump stack. */
+		print_cpu_stall();
+
+	} else if (rcu_gp_in_progress() &&
+		   ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
+		   cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
+
+		/* They had a few time units to dump stack, so complain. */
+		print_other_cpu_stall(gs2);
+	}
+}
+
+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+	WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
+}
-- 
GitLab


From 21d0d79ab051bf9facb9960a30e58b93a31c75a5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 11 Jan 2019 20:36:45 -0800
Subject: [PATCH 0220/1861] rcu: Inline RCU task stall-warning helper functions

The rcu_print_detail_task_stall(), rcu_print_task_stall_begin(), and
rcu_print_task_stall_end() functions were defined to allow long-gone
Kconfig options to provide an abbreviated RCU CPU stall warning printout.
This commit saves a few lines of code by inlining them into their sole
callers.

While in the area, a useless call of rcu_print_detail_task_stall_rnp()
on the root rcu_node structure was eliminated.  If there is only one
rcu_node structure, its tasks get printed twice, but if there are more,
the root rcu_node structure is guaranteed to have an empty list of blocked
tasks, hence the uselessness.  (Long ago, root rcu_node structures with
non-empty ->blkd_tasks lists could happen, but no longer.)

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.h       |  1 -
 kernel/rcu/tree_stall.h | 36 +++++++-----------------------------
 2 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 3c4e26fff8069..c6df9a13dd06c 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -445,7 +445,6 @@ static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
 
 /* Forward declarations for tree_stall.h */
-static void rcu_print_detail_task_stall(void);
 static int rcu_print_task_stall(struct rcu_node *rnp);
 static void record_gp_stall_check_time(void);
 static void check_cpu_stall(struct rcu_data *rdp);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index e0e73f4933639..b476786b8ef73 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -94,30 +94,6 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
-/*
- * Dump detailed information for all tasks blocking the current RCU
- * grace period.
- */
-static void rcu_print_detail_task_stall(void)
-{
-	struct rcu_node *rnp = rcu_get_root();
-
-	rcu_print_detail_task_stall_rnp(rnp);
-	rcu_for_each_leaf_node(rnp)
-		rcu_print_detail_task_stall_rnp(rnp);
-}
-
-static void rcu_print_task_stall_begin(struct rcu_node *rnp)
-{
-	pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
-	       rnp->level, rnp->grplo, rnp->grphi);
-}
-
-static void rcu_print_task_stall_end(void)
-{
-	pr_cont("\n");
-}
-
 /*
  * Scan the current list of tasks blocked within RCU read-side critical
  * sections, printing out the tid of each.
@@ -129,14 +105,15 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 
 	if (!rcu_preempt_blocked_readers_cgp(rnp))
 		return 0;
-	rcu_print_task_stall_begin(rnp);
+	pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
+	       rnp->level, rnp->grplo, rnp->grphi);
 	t = list_entry(rnp->gp_tasks->prev,
 		       struct task_struct, rcu_node_entry);
 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
 		pr_cont(" P%d", t->pid);
 		ndetected++;
 	}
-	rcu_print_task_stall_end();
+	pr_cont("\n");
 	return ndetected;
 }
 
@@ -146,7 +123,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
  * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
  */
-static void rcu_print_detail_task_stall(void)
+static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 {
 }
 
@@ -253,7 +230,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
 	unsigned long gpa;
 	unsigned long j;
 	int ndetected = 0;
-	struct rcu_node *rnp = rcu_get_root();
+	struct rcu_node *rnp;
 	long totqlen = 0;
 
 	/* Kick and suppress, if so configured. */
@@ -291,7 +268,8 @@ static void print_other_cpu_stall(unsigned long gp_seq)
 		rcu_dump_cpu_stacks();
 
 		/* Complain about tasks blocking the grace period. */
-		rcu_print_detail_task_stall();
+		rcu_for_each_leaf_node(rnp)
+			rcu_print_detail_task_stall_rnp(rnp);
 	} else {
 		if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
 			pr_err("INFO: Stall ended before state dump start\n");
-- 
GitLab


From d87cda5094585b7a0f62075de68266cb9c1b35ca Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 11 Jan 2019 20:51:49 -0800
Subject: [PATCH 0221/1861] rcu: Move rcu_print_task_exp_stall() to tree_exp.h

Because expedited CPU stall warnings are contained within the
kernel/rcu/tree_exp.h file, rcu_print_task_exp_stall() should live
there too.  This commit carries out the required code motion.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree_exp.h    | 32 ++++++++++++++++++++++++++++++++
 kernel/rcu/tree_plugin.h | 31 -------------------------------
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 4c2a0189e7489..7be3e085ddd6b 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -10,6 +10,7 @@
 #include <linux/lockdep.h>
 
 static void rcu_exp_handler(void *unused);
+static int rcu_print_task_exp_stall(struct rcu_node *rnp);
 
 /*
  * Record the start of an expedited grace period.
@@ -670,6 +671,27 @@ static void sync_sched_exp_online_cleanup(int cpu)
 {
 }
 
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, printing out the tid of each that is blocking the current
+ * expedited grace period.
+ */
+static int rcu_print_task_exp_stall(struct rcu_node *rnp)
+{
+	struct task_struct *t;
+	int ndetected = 0;
+
+	if (!rnp->exp_tasks)
+		return 0;
+	t = list_entry(rnp->exp_tasks->prev,
+		       struct task_struct, rcu_node_entry);
+	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+		pr_cont(" P%d", t->pid);
+		ndetected++;
+	}
+	return ndetected;
+}
+
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
 /* Invoked on each online non-idle CPU for expedited quiescent state. */
@@ -709,6 +731,16 @@ static void sync_sched_exp_online_cleanup(int cpu)
 	WARN_ON_ONCE(ret);
 }
 
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections that are
+ * blocking the current expedited grace period.
+ */
+static int rcu_print_task_exp_stall(struct rcu_node *rnp)
+{
+	return 0;
+}
+
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
 /**
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7fa3bc4d481bf..72519c57f6569 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -642,27 +642,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	rcu_preempt_deferred_qs_irqrestore(t, flags);
 }
 
-/*
- * Scan the current list of tasks blocked within RCU read-side critical
- * sections, printing out the tid of each that is blocking the current
- * expedited grace period.
- */
-static int rcu_print_task_exp_stall(struct rcu_node *rnp)
-{
-	struct task_struct *t;
-	int ndetected = 0;
-
-	if (!rnp->exp_tasks)
-		return 0;
-	t = list_entry(rnp->exp_tasks->prev,
-		       struct task_struct, rcu_node_entry);
-	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
-		pr_cont(" P%d", t->pid);
-		ndetected++;
-	}
-	return ndetected;
-}
-
 /*
  * Check that the list of blocked tasks for the newly completed grace
  * period is in fact empty.  It is a serious bug to complete a grace
@@ -906,16 +885,6 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 }
 static void rcu_preempt_deferred_qs(struct task_struct *t) { }
 
-/*
- * Because preemptible RCU does not exist, we never have to check for
- * tasks blocked within RCU read-side critical sections that are
- * blocking the current expedited grace period.
- */
-static int rcu_print_task_exp_stall(struct rcu_node *rnp)
-{
-	return 0;
-}
-
 /*
  * Because there is no preemptible RCU, there can be no readers blocked,
  * so there is no need to check for blocked tasks.  So check only for
-- 
GitLab


From 40e69ac7d0c5a19ea14656bc3131c55719baec96 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 11 Jan 2019 20:58:58 -0800
Subject: [PATCH 0222/1861] rcu: Inline RCU stall-warning info helper functions

The print_cpu_stall_info_begin() and print_cpu_stall_info_end() print a
single character each onto the console, and are a holdover from a time
when RCU CPU stall warning messages could be abbreviated using a long-gone
Kconfig option.  This commit therefore adds these single characters to
already-printed strings in the calling functions, and then eliminates
both print_cpu_stall_info_begin() and print_cpu_stall_info_end().

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.h        |  2 --
 kernel/rcu/tree_plugin.h | 12 ------------
 kernel/rcu/tree_stall.h  | 12 ++++--------
 3 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index c6df9a13dd06c..d73472af49e70 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -416,9 +416,7 @@ static void rcu_prepare_for_idle(void);
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
 static void rcu_preempt_deferred_qs(struct task_struct *t);
-static void print_cpu_stall_info_begin(void);
 static void print_cpu_stall_info(int cpu);
-static void print_cpu_stall_info_end(void);
 static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 static bool rcu_nocb_cpu_needs_barrier(int cpu);
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 72519c57f6569..2df5bb04fd7a3 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1550,12 +1550,6 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
 
 #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
 
-/* Initiate the stall-info list. */
-static void print_cpu_stall_info_begin(void)
-{
-	pr_cont("\n");
-}
-
 /*
  * Print out diagnostic information for the specified stalled CPU.
  *
@@ -1606,12 +1600,6 @@ static void print_cpu_stall_info(int cpu)
 	       fast_no_hz);
 }
 
-/* Terminate the stall-info list. */
-static void print_cpu_stall_info_end(void)
-{
-	pr_err("\t");
-}
-
 /* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
 static void zero_cpu_stall_ticks(struct rcu_data *rdp)
 {
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index b476786b8ef73..7ef3b596e45ff 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -243,8 +243,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
 	 * See Documentation/RCU/stallwarn.txt for info on how to debug
 	 * RCU CPU stall warnings.
 	 */
-	pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name);
-	print_cpu_stall_info_begin();
+	pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
 	rcu_for_each_leaf_node(rnp) {
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		ndetected += rcu_print_task_stall(rnp);
@@ -258,10 +257,9 @@ static void print_other_cpu_stall(unsigned long gp_seq)
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 
-	print_cpu_stall_info_end();
 	for_each_possible_cpu(cpu)
 		totqlen += rcu_get_n_cbs_cpu(cpu);
-	pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
+	pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
 	       smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
 	       (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
 	if (ndetected) {
@@ -314,15 +312,13 @@ static void print_cpu_stall(void)
 	 * See Documentation/RCU/stallwarn.txt for info on how to debug
 	 * RCU CPU stall warnings.
 	 */
-	pr_err("INFO: %s self-detected stall on CPU", rcu_state.name);
-	print_cpu_stall_info_begin();
+	pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
 	raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
 	print_cpu_stall_info(smp_processor_id());
 	raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
-	print_cpu_stall_info_end();
 	for_each_possible_cpu(cpu)
 		totqlen += rcu_get_n_cbs_cpu(cpu);
-	pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n",
+	pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
 		jiffies - rcu_state.gp_start,
 		(long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
 
-- 
GitLab


From 59b73a27681c5841440391f970a9a085228ba975 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 11 Jan 2019 21:05:17 -0800
Subject: [PATCH 0223/1861] rcu: Move FAST_NO_HZ stall-warning code to
 tree_stall.h

This commit further consolidates the stall-warning code by moving
print_cpu_stall_info() and its helper functions along with
zero_cpu_stall_ticks() to kernel/rcu/tree_stall.h.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.h        |  1 -
 kernel/rcu/tree_plugin.h | 80 ----------------------------------------
 kernel/rcu/tree_stall.h  | 80 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index d73472af49e70..49bf3b00bb503 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -416,7 +416,6 @@ static void rcu_prepare_for_idle(void);
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
 static void rcu_preempt_deferred_qs(struct task_struct *t);
-static void print_cpu_stall_info(int cpu);
 static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 static bool rcu_nocb_cpu_needs_barrier(int cpu);
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 2df5bb04fd7a3..a1f9d7c15bd82 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1528,86 +1528,6 @@ static void rcu_cleanup_after_idle(void)
 
 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 
-#ifdef CONFIG_RCU_FAST_NO_HZ
-
-static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-
-	sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c",
-		rdp->last_accelerate & 0xffff, jiffies & 0xffff,
-		".l"[rdp->all_lazy],
-		".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)],
-		".D"[!rdp->tick_nohz_enabled_snap]);
-}
-
-#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-
-static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
-{
-	*cp = '\0';
-}
-
-#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
-
-/*
- * Print out diagnostic information for the specified stalled CPU.
- *
- * If the specified CPU is aware of the current RCU grace period, then
- * print the number of scheduling clock interrupts the CPU has taken
- * during the time that it has been aware.  Otherwise, print the number
- * of RCU grace periods that this CPU is ignorant of, for example, "1"
- * if the CPU was aware of the previous grace period.
- *
- * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
- */
-static void print_cpu_stall_info(int cpu)
-{
-	unsigned long delta;
-	char fast_no_hz[72];
-	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
-	char *ticks_title;
-	unsigned long ticks_value;
-
-	/*
-	 * We could be printing a lot while holding a spinlock.  Avoid
-	 * triggering hard lockup.
-	 */
-	touch_nmi_watchdog();
-
-	ticks_value = rcu_seq_ctr(rcu_state.gp_seq - rdp->gp_seq);
-	if (ticks_value) {
-		ticks_title = "GPs behind";
-	} else {
-		ticks_title = "ticks this GP";
-		ticks_value = rdp->ticks_this_gp;
-	}
-	print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
-	delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
-	pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
-	       cpu,
-	       "O."[!!cpu_online(cpu)],
-	       "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
-	       "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
-	       !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :
-			rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
-				"!."[!delta],
-	       ticks_value, ticks_title,
-	       rcu_dynticks_snap(rdp) & 0xfff,
-	       rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
-	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
-	       READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
-	       fast_no_hz);
-}
-
-/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
-static void zero_cpu_stall_ticks(struct rcu_data *rdp)
-{
-	rdp->ticks_this_gp = 0;
-	rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
-	WRITE_ONCE(rdp->last_fqs_resched, jiffies);
-}
-
 #ifdef CONFIG_RCU_NOCB_CPU
 
 /*
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 7ef3b596e45ff..19b915380a6fc 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -223,6 +223,86 @@ static void panic_on_rcu_stall(void)
 		panic("RCU Stall\n");
 }
 
+#ifdef CONFIG_RCU_FAST_NO_HZ
+
+static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+
+	sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c",
+		rdp->last_accelerate & 0xffff, jiffies & 0xffff,
+		".l"[rdp->all_lazy],
+		".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)],
+		".D"[!rdp->tick_nohz_enabled_snap]);
+}
+
+#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
+{
+	*cp = '\0';
+}
+
+#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+/*
+ * Print out diagnostic information for the specified stalled CPU.
+ *
+ * If the specified CPU is aware of the current RCU grace period, then
+ * print the number of scheduling clock interrupts the CPU has taken
+ * during the time that it has been aware.  Otherwise, print the number
+ * of RCU grace periods that this CPU is ignorant of, for example, "1"
+ * if the CPU was aware of the previous grace period.
+ *
+ * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
+ */
+static void print_cpu_stall_info(int cpu)
+{
+	unsigned long delta;
+	char fast_no_hz[72];
+	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+	char *ticks_title;
+	unsigned long ticks_value;
+
+	/*
+	 * We could be printing a lot while holding a spinlock.  Avoid
+	 * triggering hard lockup.
+	 */
+	touch_nmi_watchdog();
+
+	ticks_value = rcu_seq_ctr(rcu_state.gp_seq - rdp->gp_seq);
+	if (ticks_value) {
+		ticks_title = "GPs behind";
+	} else {
+		ticks_title = "ticks this GP";
+		ticks_value = rdp->ticks_this_gp;
+	}
+	print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
+	delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
+	pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
+	       cpu,
+	       "O."[!!cpu_online(cpu)],
+	       "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
+	       "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
+	       !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :
+			rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
+				"!."[!delta],
+	       ticks_value, ticks_title,
+	       rcu_dynticks_snap(rdp) & 0xfff,
+	       rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
+	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
+	       READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
+	       fast_no_hz);
+}
+
+/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
+static void zero_cpu_stall_ticks(struct rcu_data *rdp)
+{
+	rdp->ticks_this_gp = 0;
+	rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
+	WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+}
+
 static void print_other_cpu_stall(unsigned long gp_seq)
 {
 	int cpu;
-- 
GitLab


From e23344c2ca42d0083596bb39964675bef00ad691 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Sat, 12 Jan 2019 09:35:44 -0800
Subject: [PATCH 0224/1861] rcu: Organize functions in tree_stall.h

This commit does only code movement, removal of now-unneeded forward
declarations, and addition of comments.  It organizes the functions
that implement RCU CPU stall warnings for normal grace periods into
three categories:

1.	Control of RCU CPU stall warnings, including computing timeouts.

2.	Interaction of stall warnings with grace periods.

3.	Actual printing of the RCU CPU stall-warning messages.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.h       |   1 -
 kernel/rcu/tree_stall.h | 180 ++++++++++++++++++++++------------------
 2 files changed, 97 insertions(+), 84 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 49bf3b00bb503..099410dbcbe99 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -442,6 +442,5 @@ static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
 
 /* Forward declarations for tree_stall.h */
-static int rcu_print_task_stall(struct rcu_node *rnp);
 static void record_gp_stall_check_time(void);
 static void check_cpu_stall(struct rcu_data *rdp);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 19b915380a6fc..03ed47883d8ab 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -7,6 +7,9 @@
  * Author: Paul E. McKenney <paulmck@linux.ibm.com>
  */
 
+//////////////////////////////////////////////////////////////////////////////
+//
+// Controlling CPU stall warnings, including delay calculation.
 
 /* panic() on RCU Stall sysctl. */
 int sysctl_panic_on_rcu_stall __read_mostly;
@@ -17,6 +20,7 @@ int sysctl_panic_on_rcu_stall __read_mostly;
 #define RCU_STALL_DELAY_DELTA	       0
 #endif
 
+/* Limit-check stall timeouts specified at boottime and runtime. */
 int rcu_jiffies_till_stall_check(void)
 {
 	int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
@@ -36,6 +40,7 @@ int rcu_jiffies_till_stall_check(void)
 }
 EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
 
+/* Don't do RCU CPU stall warnings during long sysrq printouts. */
 void rcu_sysrq_start(void)
 {
 	if (!rcu_cpu_stall_suppress)
@@ -48,6 +53,7 @@ void rcu_sysrq_end(void)
 		rcu_cpu_stall_suppress = 0;
 }
 
+/* Don't print RCU CPU stall warnings during a kernel panic. */
 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
 {
 	rcu_cpu_stall_suppress = 1;
@@ -65,6 +71,78 @@ static int __init check_cpu_stall_init(void)
 }
 early_initcall(check_cpu_stall_init);
 
+/* If so specified via sysctl, panic, yielding cleaner stall-warning output. */
+static void panic_on_rcu_stall(void)
+{
+	if (sysctl_panic_on_rcu_stall)
+		panic("RCU Stall\n");
+}
+
+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+	WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Interaction with RCU grace periods
+
+/* Start of new grace period, so record stall time (and forcing times). */
+static void record_gp_stall_check_time(void)
+{
+	unsigned long j = jiffies;
+	unsigned long j1;
+
+	rcu_state.gp_start = j;
+	j1 = rcu_jiffies_till_stall_check();
+	/* Record ->gp_start before ->jiffies_stall. */
+	smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
+	rcu_state.jiffies_resched = j + j1 / 2;
+	rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
+}
+
+/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
+static void zero_cpu_stall_ticks(struct rcu_data *rdp)
+{
+	rdp->ticks_this_gp = 0;
+	rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
+	WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+}
+
+/*
+ * If too much time has passed in the current grace period, and if
+ * so configured, go kick the relevant kthreads.
+ */
+static void rcu_stall_kick_kthreads(void)
+{
+	unsigned long j;
+
+	if (!rcu_kick_kthreads)
+		return;
+	j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
+	if (time_after(jiffies, j) && rcu_state.gp_kthread &&
+	    (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
+		WARN_ONCE(1, "Kicking %s grace-period kthread\n",
+			  rcu_state.name);
+		rcu_ftrace_dump(DUMP_ALL);
+		wake_up_process(rcu_state.gp_kthread);
+		WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Printing RCU CPU stall warnings
+
 #ifdef CONFIG_PREEMPT
 
 /*
@@ -137,43 +215,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 }
 #endif /* #else #ifdef CONFIG_PREEMPT */
 
-static void record_gp_stall_check_time(void)
-{
-	unsigned long j = jiffies;
-	unsigned long j1;
-
-	rcu_state.gp_start = j;
-	j1 = rcu_jiffies_till_stall_check();
-	/* Record ->gp_start before ->jiffies_stall. */
-	smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
-	rcu_state.jiffies_resched = j + j1 / 2;
-	rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
-}
-
-/*
- * Complain about starvation of grace-period kthread.
- */
-static void rcu_check_gp_kthread_starvation(void)
-{
-	struct task_struct *gpk = rcu_state.gp_kthread;
-	unsigned long j;
-
-	j = jiffies - READ_ONCE(rcu_state.gp_activity);
-	if (j > 2 * HZ) {
-		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
-		       rcu_state.name, j,
-		       (long)rcu_seq_current(&rcu_state.gp_seq),
-		       READ_ONCE(rcu_state.gp_flags),
-		       gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
-		       gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
-		if (gpk) {
-			pr_err("RCU grace-period kthread stack dump:\n");
-			sched_show_task(gpk);
-			wake_up_process(gpk);
-		}
-	}
-}
-
 /*
  * Dump stacks of all tasks running on stalled CPUs.  First try using
  * NMIs, but fall back to manual remote stack tracing on architectures
@@ -196,33 +237,6 @@ static void rcu_dump_cpu_stacks(void)
 	}
 }
 
-/*
- * If too much time has passed in the current grace period, and if
- * so configured, go kick the relevant kthreads.
- */
-static void rcu_stall_kick_kthreads(void)
-{
-	unsigned long j;
-
-	if (!rcu_kick_kthreads)
-		return;
-	j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
-	if (time_after(jiffies, j) && rcu_state.gp_kthread &&
-	    (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
-		WARN_ONCE(1, "Kicking %s grace-period kthread\n",
-			  rcu_state.name);
-		rcu_ftrace_dump(DUMP_ALL);
-		wake_up_process(rcu_state.gp_kthread);
-		WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
-	}
-}
-
-static void panic_on_rcu_stall(void)
-{
-	if (sysctl_panic_on_rcu_stall)
-		panic("RCU Stall\n");
-}
-
 #ifdef CONFIG_RCU_FAST_NO_HZ
 
 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
@@ -295,12 +309,26 @@ static void print_cpu_stall_info(int cpu)
 	       fast_no_hz);
 }
 
-/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
-static void zero_cpu_stall_ticks(struct rcu_data *rdp)
+/* Complain about starvation of grace-period kthread.  */
+static void rcu_check_gp_kthread_starvation(void)
 {
-	rdp->ticks_this_gp = 0;
-	rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
-	WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+	struct task_struct *gpk = rcu_state.gp_kthread;
+	unsigned long j;
+
+	j = jiffies - READ_ONCE(rcu_state.gp_activity);
+	if (j > 2 * HZ) {
+		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
+		       rcu_state.name, j,
+		       (long)rcu_seq_current(&rcu_state.gp_seq),
+		       READ_ONCE(rcu_state.gp_flags),
+		       gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
+		       gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
+		if (gpk) {
+			pr_err("RCU grace-period kthread stack dump:\n");
+			sched_show_task(gpk);
+			wake_up_process(gpk);
+		}
+	}
 }
 
 static void print_other_cpu_stall(unsigned long gp_seq)
@@ -488,17 +516,3 @@ static void check_cpu_stall(struct rcu_data *rdp)
 		print_other_cpu_stall(gs2);
 	}
 }
-
-/**
- * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
- *
- * Set the stall-warning timeout way off into the future, thus preventing
- * any RCU CPU stall-warning messages from appearing in the current set of
- * RCU grace periods.
- *
- * The caller must disable hard irqs.
- */
-void rcu_cpu_stall_reset(void)
-{
-	WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
-}
-- 
GitLab


From 7ac1907c9e7ba5f80d3c298fe9d2dbf620566a49 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Mon, 14 Jan 2019 10:19:20 -0800
Subject: [PATCH 0225/1861] rcu: Move irq-disabled stall-warning checking to
 tree_stall.h

The rcu_iw_handler() function's sole purpose in life is to indicate
whether a stalled CPU had interrupts disabled, so it belongs in
kernel/rcu/tree_stall.h.  This commit therefore makes that move,
clarifying its header comment while in the area.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c       | 21 ---------------------
 kernel/rcu/tree.h       |  1 +
 kernel/rcu/tree_stall.h | 20 ++++++++++++++++++++
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 001dd05f6e385..929531ed168c0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1031,27 +1031,6 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
 	return 0;
 }
 
-/*
- * Handler for the irq_work request posted when a grace period has
- * gone on for too long, but not yet long enough for an RCU CPU
- * stall warning.  Set state appropriately, but just complain if
- * there is unexpected state on entry.
- */
-static void rcu_iw_handler(struct irq_work *iwp)
-{
-	struct rcu_data *rdp;
-	struct rcu_node *rnp;
-
-	rdp = container_of(iwp, struct rcu_data, rcu_iw);
-	rnp = rdp->mynode;
-	raw_spin_lock_rcu_node(rnp);
-	if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
-		rdp->rcu_iw_gp_seq = rnp->gp_seq;
-		rdp->rcu_iw_pending = false;
-	}
-	raw_spin_unlock_rcu_node(rnp);
-}
-
 /*
  * Return true if the specified CPU has passed through a quiescent
  * state by virtue of being in or having passed through an dynticks
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 099410dbcbe99..f882ce3ca5a56 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -443,4 +443,5 @@ static void rcu_dynticks_task_exit(void);
 
 /* Forward declarations for tree_stall.h */
 static void record_gp_stall_check_time(void);
+static void rcu_iw_handler(struct irq_work *iwp);
 static void check_cpu_stall(struct rcu_data *rdp);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 03ed47883d8ab..526e223e41ce2 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -139,6 +139,26 @@ static void rcu_stall_kick_kthreads(void)
 	}
 }
 
+/*
+ * Handler for the irq_work request posted about halfway into the RCU CPU
+ * stall timeout, and used to detect excessive irq disabling.  Set state
+ * appropriately, but just complain if there is unexpected state on entry.
+ */
+static void rcu_iw_handler(struct irq_work *iwp)
+{
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+
+	rdp = container_of(iwp, struct rcu_data, rcu_iw);
+	rnp = rdp->mynode;
+	raw_spin_lock_rcu_node(rnp);
+	if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
+		rdp->rcu_iw_gp_seq = rnp->gp_seq;
+		rdp->rcu_iw_pending = false;
+	}
+	raw_spin_unlock_rcu_node(rnp);
+}
+
 //////////////////////////////////////////////////////////////////////////////
 //
 // Printing RCU CPU stall warnings
-- 
GitLab


From b51bcbbf16ef0ea352e8b924dd8638112e4037a5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 15 Jan 2019 07:01:33 -0800
Subject: [PATCH 0226/1861] rcu: Move forward-progress checkers into
 tree_stall.h

This commit further consolidates stall-warning functionality by moving
forward-progress checkers into kernel/rcu/tree_stall.h, updating a
comment or two while in the area.  More specifically, this commit moves
show_rcu_gp_kthreads(), rcu_check_gp_start_stall(), rcu_fwd_progress_check(),
sysrq_rcu, sysrq_show_rcu(), sysrq_rcudump_op, and rcu_sysrq_init() from
kernel/rcu/tree.c to kernel/rcu/tree_stall.h.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree.c       | 166 --------------------------------------
 kernel/rcu/tree.h       |   2 +
 kernel/rcu/tree_stall.h | 171 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 173 insertions(+), 166 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 929531ed168c0..4cb1ebd93b0ce 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -102,9 +102,6 @@ int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
 /* Number of rcu_nodes at specified level. */
 int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
 int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
-/* Commandeer a sysrq key to dump RCU's tree. */
-static bool sysrq_rcu;
-module_param(sysrq_rcu, bool, 0444);
 
 /*
  * The rcu_scheduler_active variable is initialized to the value
@@ -510,74 +507,6 @@ static const char *gp_state_getname(short gs)
 	return gp_state_names[gs];
 }
 
-/*
- * Show the state of the grace-period kthreads.
- */
-void show_rcu_gp_kthreads(void)
-{
-	int cpu;
-	unsigned long j;
-	unsigned long ja;
-	unsigned long jr;
-	unsigned long jw;
-	struct rcu_data *rdp;
-	struct rcu_node *rnp;
-
-	j = jiffies;
-	ja = j - READ_ONCE(rcu_state.gp_activity);
-	jr = j - READ_ONCE(rcu_state.gp_req_activity);
-	jw = j - READ_ONCE(rcu_state.gp_wake_time);
-	pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
-		rcu_state.name, gp_state_getname(rcu_state.gp_state),
-		rcu_state.gp_state,
-		rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
-		ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
-		(long)READ_ONCE(rcu_state.gp_seq),
-		(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
-		READ_ONCE(rcu_state.gp_flags));
-	rcu_for_each_node_breadth_first(rnp) {
-		if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
-			continue;
-		pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
-			rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
-			(long)rnp->gp_seq_needed);
-		if (!rcu_is_leaf_node(rnp))
-			continue;
-		for_each_leaf_node_possible_cpu(rnp, cpu) {
-			rdp = per_cpu_ptr(&rcu_data, cpu);
-			if (rdp->gpwrap ||
-			    ULONG_CMP_GE(rcu_state.gp_seq,
-					 rdp->gp_seq_needed))
-				continue;
-			pr_info("\tcpu %d ->gp_seq_needed %ld\n",
-				cpu, (long)rdp->gp_seq_needed);
-		}
-	}
-	/* sched_show_task(rcu_state.gp_kthread); */
-}
-EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
-
-/* Dump grace-period-request information due to commandeered sysrq. */
-static void sysrq_show_rcu(int key)
-{
-	show_rcu_gp_kthreads();
-}
-
-static struct sysrq_key_op sysrq_rcudump_op = {
-	.handler = sysrq_show_rcu,
-	.help_msg = "show-rcu(y)",
-	.action_msg = "Show RCU tree",
-	.enable_mask = SYSRQ_ENABLE_DUMP,
-};
-
-static int __init rcu_sysrq_init(void)
-{
-	if (sysrq_rcu)
-		return register_sysrq_key('y', &sysrq_rcudump_op);
-	return 0;
-}
-early_initcall(rcu_sysrq_init);
-
 /*
  * Send along grace-period-related data for rcutorture diagnostics.
  */
@@ -2323,101 +2252,6 @@ void rcu_force_quiescent_state(void)
 }
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 
-/*
- * This function checks for grace-period requests that fail to motivate
- * RCU to come out of its idle mode.
- */
-void
-rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
-			 const unsigned long gpssdelay)
-{
-	unsigned long flags;
-	unsigned long j;
-	struct rcu_node *rnp_root = rcu_get_root();
-	static atomic_t warned = ATOMIC_INIT(0);
-
-	if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() ||
-	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
-		return;
-	j = jiffies; /* Expensive access, and in common case don't get here. */
-	if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
-	    time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
-	    atomic_read(&warned))
-		return;
-
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	j = jiffies;
-	if (rcu_gp_in_progress() ||
-	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
-	    time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
-	    time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
-	    atomic_read(&warned)) {
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-		return;
-	}
-	/* Hold onto the leaf lock to make others see warned==1. */
-
-	if (rnp_root != rnp)
-		raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
-	j = jiffies;
-	if (rcu_gp_in_progress() ||
-	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
-	    time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
-	    time_before(j, rcu_state.gp_activity + gpssdelay) ||
-	    atomic_xchg(&warned, 1)) {
-		raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-		return;
-	}
-	WARN_ON(1);
-	if (rnp_root != rnp)
-		raw_spin_unlock_rcu_node(rnp_root);
-	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	show_rcu_gp_kthreads();
-}
-
-/*
- * Do a forward-progress check for rcutorture.  This is normally invoked
- * due to an OOM event.  The argument "j" gives the time period during
- * which rcutorture would like progress to have been made.
- */
-void rcu_fwd_progress_check(unsigned long j)
-{
-	unsigned long cbs;
-	int cpu;
-	unsigned long max_cbs = 0;
-	int max_cpu = -1;
-	struct rcu_data *rdp;
-
-	if (rcu_gp_in_progress()) {
-		pr_info("%s: GP age %lu jiffies\n",
-			__func__, jiffies - rcu_state.gp_start);
-		show_rcu_gp_kthreads();
-	} else {
-		pr_info("%s: Last GP end %lu jiffies ago\n",
-			__func__, jiffies - rcu_state.gp_end);
-		preempt_disable();
-		rdp = this_cpu_ptr(&rcu_data);
-		rcu_check_gp_start_stall(rdp->mynode, rdp, j);
-		preempt_enable();
-	}
-	for_each_possible_cpu(cpu) {
-		cbs = rcu_get_n_cbs_cpu(cpu);
-		if (!cbs)
-			continue;
-		if (max_cpu < 0)
-			pr_info("%s: callbacks", __func__);
-		pr_cont(" %d: %lu", cpu, cbs);
-		if (cbs <= max_cbs)
-			continue;
-		max_cbs = cbs;
-		max_cpu = cpu;
-	}
-	if (max_cpu >= 0)
-		pr_cont("\n");
-}
-EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
-
 /* Perform RCU core processing work for the current CPU.  */
 static __latent_entropy void rcu_core(struct softirq_action *unused)
 {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f882ce3ca5a56..e253d11af3c49 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -445,3 +445,5 @@ static void rcu_dynticks_task_exit(void);
 static void record_gp_stall_check_time(void);
 static void rcu_iw_handler(struct irq_work *iwp);
 static void check_cpu_stall(struct rcu_data *rdp);
+static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
+				     const unsigned long gpssdelay);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 526e223e41ce2..9e3db08d02bc1 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -536,3 +536,174 @@ static void check_cpu_stall(struct rcu_data *rdp)
 		print_other_cpu_stall(gs2);
 	}
 }
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// RCU forward-progress mechanisms, including of callback invocation.
+
+
+/*
+ * Show the state of the grace-period kthreads.
+ */
+void show_rcu_gp_kthreads(void)
+{
+	int cpu;
+	unsigned long j;
+	unsigned long ja;
+	unsigned long jr;
+	unsigned long jw;
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+
+	j = jiffies;
+	ja = j - READ_ONCE(rcu_state.gp_activity);
+	jr = j - READ_ONCE(rcu_state.gp_req_activity);
+	jw = j - READ_ONCE(rcu_state.gp_wake_time);
+	pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
+		rcu_state.name, gp_state_getname(rcu_state.gp_state),
+		rcu_state.gp_state,
+		rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
+		ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
+		(long)READ_ONCE(rcu_state.gp_seq),
+		(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
+		READ_ONCE(rcu_state.gp_flags));
+	rcu_for_each_node_breadth_first(rnp) {
+		if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
+			continue;
+		pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
+			rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
+			(long)rnp->gp_seq_needed);
+		if (!rcu_is_leaf_node(rnp))
+			continue;
+		for_each_leaf_node_possible_cpu(rnp, cpu) {
+			rdp = per_cpu_ptr(&rcu_data, cpu);
+			if (rdp->gpwrap ||
+			    ULONG_CMP_GE(rcu_state.gp_seq,
+					 rdp->gp_seq_needed))
+				continue;
+			pr_info("\tcpu %d ->gp_seq_needed %ld\n",
+				cpu, (long)rdp->gp_seq_needed);
+		}
+	}
+	/* sched_show_task(rcu_state.gp_kthread); */
+}
+EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
+
+/*
+ * This function checks for grace-period requests that fail to motivate
+ * RCU to come out of its idle mode.
+ */
+static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
+				     const unsigned long gpssdelay)
+{
+	unsigned long flags;
+	unsigned long j;
+	struct rcu_node *rnp_root = rcu_get_root();
+	static atomic_t warned = ATOMIC_INIT(0);
+
+	if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() ||
+	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
+		return;
+	j = jiffies; /* Expensive access, and in common case don't get here. */
+	if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
+	    time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
+	    atomic_read(&warned))
+		return;
+
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	j = jiffies;
+	if (rcu_gp_in_progress() ||
+	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
+	    time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
+	    time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
+	    atomic_read(&warned)) {
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		return;
+	}
+	/* Hold onto the leaf lock to make others see warned==1. */
+
+	if (rnp_root != rnp)
+		raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
+	j = jiffies;
+	if (rcu_gp_in_progress() ||
+	    ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
+	    time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
+	    time_before(j, rcu_state.gp_activity + gpssdelay) ||
+	    atomic_xchg(&warned, 1)) {
+		raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		return;
+	}
+	WARN_ON(1);
+	if (rnp_root != rnp)
+		raw_spin_unlock_rcu_node(rnp_root);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	show_rcu_gp_kthreads();
+}
+
+/*
+ * Do a forward-progress check for rcutorture.  This is normally invoked
+ * due to an OOM event.  The argument "j" gives the time period during
+ * which rcutorture would like progress to have been made.
+ */
+void rcu_fwd_progress_check(unsigned long j)
+{
+	unsigned long cbs;
+	int cpu;
+	unsigned long max_cbs = 0;
+	int max_cpu = -1;
+	struct rcu_data *rdp;
+
+	if (rcu_gp_in_progress()) {
+		pr_info("%s: GP age %lu jiffies\n",
+			__func__, jiffies - rcu_state.gp_start);
+		show_rcu_gp_kthreads();
+	} else {
+		pr_info("%s: Last GP end %lu jiffies ago\n",
+			__func__, jiffies - rcu_state.gp_end);
+		preempt_disable();
+		rdp = this_cpu_ptr(&rcu_data);
+		rcu_check_gp_start_stall(rdp->mynode, rdp, j);
+		preempt_enable();
+	}
+	for_each_possible_cpu(cpu) {
+		cbs = rcu_get_n_cbs_cpu(cpu);
+		if (!cbs)
+			continue;
+		if (max_cpu < 0)
+			pr_info("%s: callbacks", __func__);
+		pr_cont(" %d: %lu", cpu, cbs);
+		if (cbs <= max_cbs)
+			continue;
+		max_cbs = cbs;
+		max_cpu = cpu;
+	}
+	if (max_cpu >= 0)
+		pr_cont("\n");
+}
+EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
+
+/* Commandeer a sysrq key to dump RCU's tree. */
+static bool sysrq_rcu;
+module_param(sysrq_rcu, bool, 0444);
+
+/* Dump grace-period-request information due to commandeered sysrq. */
+static void sysrq_show_rcu(int key)
+{
+	show_rcu_gp_kthreads();
+}
+
+static struct sysrq_key_op sysrq_rcudump_op = {
+	.handler = sysrq_show_rcu,
+	.help_msg = "show-rcu(y)",
+	.action_msg = "Show RCU tree",
+	.enable_mask = SYSRQ_ENABLE_DUMP,
+};
+
+static int __init rcu_sysrq_init(void)
+{
+	if (sysrq_rcu)
+		return register_sysrq_key('y', &sysrq_rcudump_op);
+	return 0;
+}
+early_initcall(rcu_sysrq_init);
-- 
GitLab


From 6c70e9cd5f3c6d93f3e1da6d101073e898f39170 Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Fri, 8 Mar 2019 11:57:48 -0800
Subject: [PATCH 0227/1861] rcu: Fix nohz status in stall warning

The Documentation/RCU/stallwarn.txt file says that stall warnings
print "D" if dyntick-idle processing is enabled, but the code in
print_cpu_stall_fast_no_hz() prints "." instead.  This commit therefore
reverses the sense of the test to make the code match the documentation.

Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/tree_stall.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 9e3db08d02bc1..f65a73a973235 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -267,7 +267,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
 		rdp->last_accelerate & 0xffff, jiffies & 0xffff,
 		".l"[rdp->all_lazy],
 		".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)],
-		".D"[!rdp->tick_nohz_enabled_snap]);
+		".D"[!!rdp->tick_nohz_enabled_snap]);
 }
 
 #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-- 
GitLab


From 24aca4aea4f0179e0e56cf9ec610c27d07702945 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 22 Jan 2019 19:23:00 -0800
Subject: [PATCH 0228/1861] torture: Don't try to offline the last CPU

If there is only one online CPU, it doesn't make sense to try to offline
it, as any such attempt is guaranteed to fail.  This commit therefore
check for this condition and refuses to attempt the nonsensical.

Reported-by: Su Yue <suy.fnst@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Tested-By: Su Yue <suy.fnst@cn.fujitsu.com>
---
 kernel/torture.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/torture.c b/kernel/torture.c
index 8faa1a9aaeb97..17b2be9bde12a 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -88,6 +88,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
 
 	if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
 		return false;
+	if (num_online_cpus() <= 1)
+		return false;  /* Can't offline the last CPU. */
 
 	if (verbose > 1)
 		pr_alert("%s" TORTURE_FLAG
-- 
GitLab


From fef141f6195be5829c76ff061cc5263badc39a89 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Mon, 11 Feb 2019 07:14:09 -0800
Subject: [PATCH 0229/1861] tools/.../rcutorture: Convert to SPDX license
 identifier

Replace the license boiler plate with a SPDX license identifier.
While in the area, update an email address and add copyright notices.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 .../selftests/rcutorture/bin/configNR_CPUS.sh  | 17 ++---------------
 .../rcutorture/bin/config_override.sh          | 17 ++---------------
 .../selftests/rcutorture/bin/configcheck.sh    | 18 +++---------------
 .../selftests/rcutorture/bin/configinit.sh     | 17 ++---------------
 .../selftests/rcutorture/bin/cpus2use.sh       | 17 ++---------------
 .../selftests/rcutorture/bin/functions.sh      | 17 ++---------------
 .../testing/selftests/rcutorture/bin/jitter.sh | 17 ++---------------
 .../selftests/rcutorture/bin/kvm-build.sh      | 17 ++---------------
 .../rcutorture/bin/kvm-find-errors.sh          |  5 +++++
 .../rcutorture/bin/kvm-recheck-lock.sh         | 17 ++---------------
 .../rcutorture/bin/kvm-recheck-rcu.sh          | 17 ++---------------
 .../bin/kvm-recheck-rcuperf-ftrace.sh          | 17 ++---------------
 .../rcutorture/bin/kvm-recheck-rcuperf.sh      | 17 ++---------------
 .../selftests/rcutorture/bin/kvm-recheck.sh    | 17 ++---------------
 .../selftests/rcutorture/bin/kvm-test-1-run.sh | 17 ++---------------
 tools/testing/selftests/rcutorture/bin/kvm.sh  | 17 ++---------------
 .../selftests/rcutorture/bin/mkinitrd.sh       | 15 +--------------
 .../selftests/rcutorture/bin/parse-build.sh    | 17 ++---------------
 .../selftests/rcutorture/bin/parse-console.sh  | 17 ++---------------
 .../rcutorture/configs/lock/ver_functions.sh   | 17 ++---------------
 .../rcutorture/configs/rcu/ver_functions.sh    | 17 ++---------------
 .../configs/rcuperf/ver_functions.sh           | 17 ++---------------
 22 files changed, 47 insertions(+), 314 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
index 43540f1828cc9..2deea2169fc2d 100755
--- a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
+++ b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Extract the number of CPUs expected from the specified Kconfig-file
 # fragment by checking CONFIG_SMP and CONFIG_NR_CPUS.  If the specified
@@ -7,23 +8,9 @@
 #
 # Usage: configNR_CPUS.sh config-frag
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2013
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 cf=$1
 if test ! -r $cf
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
index ef7fcbac3d421..90016c359e839 100755
--- a/tools/testing/selftests/rcutorture/bin/config_override.sh
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # config_override.sh base override
 #
@@ -6,23 +7,9 @@
 # that conflict with any in override, concatenating what remains and
 # sending the result to standard output.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2017
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 base=$1
 if test -r $base
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index 197deece7c7c1..5b25524d03660 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -1,23 +1,11 @@
 #!/bin/bash
-# Usage: configcheck.sh .config .config-template
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
+# SPDX-License-Identifier: GPL-2.0+
 #
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
+# Usage: configcheck.sh .config .config-template
 #
 # Copyright (C) IBM Corporation, 2011
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 T=${TMPDIR-/tmp}/abat-chk-config.sh.$$
 trap 'rm -rf $T' 0
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 65541c21a5444..40359486b3a80 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Usage: configinit.sh config-spec-file build-output-dir results-dir
 #
@@ -14,23 +15,9 @@
 # for example, "O=/tmp/foo".  If this argument is omitted, the .config
 # file will be generated directly in the current directory.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2013
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 T=${TMPDIR-/tmp}/configinit.sh.$$
 trap 'rm -rf $T' 0
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index bb99cde3f5f97..ff71022127031 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -1,26 +1,13 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Get an estimate of how CPU-hoggy to be.
 #
 # Usage: cpus2use.sh
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2013
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
 idlecpus=`mpstat | tail -1 | \
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 65f6655026f0e..6bcb8b5b2ff22 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -1,24 +1,11 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Shell functions for the rest of the scripts.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2013
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 # bootparam_hotplug_cpu bootparam-string
 #
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 3633828375e3f..435b609339854 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Alternate sleeping and spinning on randomly selected CPUs.  The purpose
 # of this script is to inflict random OS jitter on a concurrently running
@@ -11,23 +12,9 @@
 # sleepmax: Maximum microseconds to sleep, defaults to one second.
 # spinmax: Maximum microseconds to spin, defaults to one millisecond.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2016
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 me=$(($1 * 1000))
 duration=$2
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 9115fcdb5617c..c27a0bbb9c02e 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -1,26 +1,13 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Build a kvm-ready Linux kernel from the tree in the current directory.
 #
 # Usage: kvm-build.sh config-template build-dir resdir
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2011
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 config_template=${1}
 if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 98f650c9bf54a..8426fe1f15eeb 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Invoke a text editor on all console.log files for all runs with diagnostics,
 # that is, on all such files having a console.log.diags counterpart.
@@ -10,6 +11,10 @@
 #
 # The "directory" above should end with the date/time directory, for example,
 # "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+#
+# Copyright (C) IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
 
 rundir="${1}"
 if test -z "$rundir" -o ! -d "$rundir"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
index 2de92f43ee8c2..f3a7a5e2b89d4 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -1,26 +1,13 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Analyze a given results directory for locktorture progress.
 #
 # Usage: kvm-recheck-lock.sh resdir
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2014
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 i="$1"
 if test -d "$i" -a -r "$i"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 0fa8a61ccb7b2..2a7f3f4756a74 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -1,26 +1,13 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Analyze a given results directory for rcutorture progress.
 #
 # Usage: kvm-recheck-rcu.sh resdir
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2014
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 i="$1"
 if test -d "$i" -a -r "$i"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
index 8948f7926b21f..7d3c2be66c644 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Analyze a given results directory for rcuperf performance measurements,
 # looking for ftrace data.  Exits with 0 if data was found, analyzed, and
@@ -7,23 +8,9 @@
 #
 # Usage: kvm-recheck-rcuperf-ftrace.sh resdir
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2016
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 i="$1"
 . functions.sh
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
index ccebf772fa1e5..db0375a57f281 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
@@ -1,26 +1,13 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Analyze a given results directory for rcuperf performance measurements.
 #
 # Usage: kvm-recheck-rcuperf.sh resdir
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2016
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 i="$1"
 if test -d "$i" -a -r "$i"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index c9bab57a77eba..2adde6aaafdba 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Given the results directories for previous KVM-based torture runs,
 # check the build and console output for errors.  Given a directory
@@ -6,23 +7,9 @@
 #
 # Usage: kvm-recheck.sh resdir ...
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2011
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
 . functions.sh
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 58ca758a5786f..0eb1ec16d78a1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Run a kvm-based test of the specified tree on the specified configs.
 # Fully automated run and error checking, no graphics console.
@@ -20,23 +21,9 @@
 #
 # More sophisticated argument parsing is clearly needed.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2011
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$
 trap 'rm -rf $T' 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 19864f1cb27a4..8f1e337b9b54e 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Run a series of tests under KVM.  By default, this series is specified
 # by the relevant CFLIST file, but can be overridden by the --configs
@@ -6,23 +7,9 @@
 #
 # Usage: kvm.sh [ options ]
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2011
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 scriptname=$0
 args="$*"
diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 83552bb007b42..6fa9bd1ddc094 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -1,21 +1,8 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Create an initrd directory if one does not already exist.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2013
 #
 # Author: Connor Shu <Connor.Shu@ibm.com>
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 24fe5f822b28d..0701b3bf6adea 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Check the build output from an rcutorture run for goodness.
 # The "file" is a pathname on the local system, and "title" is
@@ -8,23 +9,9 @@
 #
 # Usage: parse-build.sh file title
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2011
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 F=$1
 title=$2
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 84933f6aed778..4508373a922fe 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Check the console output from an rcutorture run for oopses.
 # The "file" is a pathname on the local system, and "title" is
@@ -6,23 +7,9 @@
 #
 # Usage: parse-console.sh file title
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2011
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 T=${TMPDIR-/tmp}/parse-console.sh.$$
 file="$1"
diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
index 80eb646e13199..d3e4b2971f922 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
@@ -1,24 +1,11 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Kernel-version-dependent shell functions for the rest of the scripts.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2014
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 # locktorture_param_onoff bootparam-string config-file
 #
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index 7bab8246392bb..effa415f9b928 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -1,24 +1,11 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Kernel-version-dependent shell functions for the rest of the scripts.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2013
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 # rcutorture_param_n_barrier_cbs bootparam-string
 #
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
index d36b8fd6f0fc9..777d5b0c190fb 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
@@ -1,24 +1,11 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
 #
 # Torture-suite-dependent shell functions for the rest of the scripts.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
 # Copyright (C) IBM Corporation, 2015
 #
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
 # per_version_boot_params bootparam-string config-file seconds
 #
-- 
GitLab


From a3b0e1e59ef1757488ef05b66bc376eaf7b06ada Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 28 Feb 2019 15:06:13 -0800
Subject: [PATCH 0230/1861] rcutorture: Make rcutorture_extend_mask() comment
 match the code

The code actually rarely uses more than one type of RCU read-side
protection, as is actually desired given that we need some reasonable
probability of preempting RCU read-side critical sections, which cannot
happen with multiple types of protection.  This comment therefore adjusts
the comment.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index f14d1b18a74fc..2453229ba15a7 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1160,7 +1160,7 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
 	unsigned long randmask2 = randmask1 >> 3;
 
 	WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
-	/* Most of the time lots of bits, half the time only one bit. */
+	/* Mostly only one bit (need preemption!), sometimes lots of bits. */
 	if (!(randmask1 & 0x7))
 		mask = mask & randmask2;
 	else
-- 
GitLab


From f47cb1bb0da23162f4c17e0c0023df4889ecb492 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 28 Feb 2019 15:59:43 -0800
Subject: [PATCH 0231/1861] rcutorture: Remove ->ext_irq_conflict field

Back when there was a separate RCU-bh flavor, the ->ext_irq_conflict
field was used to prevent executing local_bh_enable() while interrupts
were disabled.  However, there is no longer an RCU-bh flavor, so this
commit removes the no-longer-needed ->ext_irq_conflict field.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/rcutorture.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 2453229ba15a7..21ab3c7eb221d 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -299,7 +299,6 @@ struct rcu_torture_ops {
 	int irq_capable;
 	int can_boost;
 	int extendables;
-	int ext_irq_conflict;
 	const char *name;
 };
 
@@ -1170,10 +1169,6 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
 	    ((!(mask & RCUTORTURE_RDR_BH) && (oldmask & RCUTORTURE_RDR_BH)) ||
 	     (!(mask & RCUTORTURE_RDR_RBH) && (oldmask & RCUTORTURE_RDR_RBH))))
 		mask |= RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
-	if ((mask & RCUTORTURE_RDR_IRQ) &&
-	    !(mask & cur_ops->ext_irq_conflict) &&
-	    (oldmask & cur_ops->ext_irq_conflict))
-		mask |= cur_ops->ext_irq_conflict; /* Or if readers object. */
 	return mask ?: RCUTORTURE_RDR_RCU;
 }
 
-- 
GitLab


From d44ac1bebc47e46d62019808a893582f56496a98 Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Sat, 9 Mar 2019 00:40:45 +0530
Subject: [PATCH 0232/1861] rcutorture: Fix expected forward progress duration
 in OOM notifier

The rcutorture_oom_notify() function has a misplaced close parenthesis
that results in increasingly long delays in rcu_fwd_progress_check()'s
checking for various RCU forward-progress problems.  This commit therefore
puts the parenthesis in the right place.

Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 21ab3c7eb221d..b42682b94cb73 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1843,7 +1843,7 @@ static int rcutorture_oom_notify(struct notifier_block *self,
 	WARN(1, "%s invoked upon OOM during forward-progress testing.\n",
 	     __func__);
 	rcu_torture_fwd_cb_hist();
-	rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwd_startat) / 2));
+	rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwd_startat)) / 2);
 	WRITE_ONCE(rcu_fwd_emergency_stop, true);
 	smp_mb(); /* Emergency stop before free and wait to avoid hangs. */
 	pr_info("%s: Freed %lu RCU callbacks.\n",
-- 
GitLab


From b813afae7ab6a5e91b4e16cc567331d9c2ae1f04 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 21 Mar 2019 09:27:28 -0700
Subject: [PATCH 0233/1861] rcutorture: Fix cleanup path for invalid
 torture_type strings

If the specified rcutorture.torture_type is not in the rcu_torture_init()
function's torture_ops[] array, rcutorture prints some console messages
and then invokes rcu_torture_cleanup() to set state so that a future
torture test can run.  However, rcu_torture_cleanup() also attempts to
end the test that didn't actually start, and in doing so relies on the
value of cur_ops, a value that is not particularly relevant in this case.
This can result in confusing output or even follow-on failures due to
attempts to use facilities that have not been properly initialized.

This commit therefore sets the value of cur_ops to NULL in this case
and inserts a check near the beginning of rcu_torture_cleanup(),
thus avoiding relying on an irrelevant cur_ops value.

Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/rcutorture.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index b42682b94cb73..e3c0f57ab0aaf 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2089,6 +2089,10 @@ rcu_torture_cleanup(void)
 			cur_ops->cb_barrier();
 		return;
 	}
+	if (!cur_ops) {
+		torture_cleanup_end();
+		return;
+	}
 
 	rcu_torture_barrier_cleanup();
 	torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
@@ -2262,6 +2266,7 @@ rcu_torture_init(void)
 		pr_cont("\n");
 		WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST));
 		firsterr = -EINVAL;
+		cur_ops = NULL;
 		goto unwind;
 	}
 	if (cur_ops->fqs == NULL && fqs_duration != 0) {
-- 
GitLab


From ad092c027713a68a34168942a5ef422e42e039f4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 21 Mar 2019 10:26:41 -0700
Subject: [PATCH 0234/1861] rcuperf: Fix cleanup path for invalid perf_type
 strings

If the specified rcuperf.perf_type is not in the rcu_perf_init()
function's perf_ops[] array, rcuperf prints some console messages and
then invokes rcu_perf_cleanup() to set state so that a future torture
test can run.  However, rcu_perf_cleanup() also attempts to end the
test that didn't actually start, and in doing so relies on the value
of cur_ops, a value that is not particularly relevant in this case.
This can result in confusing output or even follow-on failures due to
attempts to use facilities that have not been properly initialized.

This commit therefore sets the value of cur_ops to NULL in this case and
inserts a check near the beginning of rcu_perf_cleanup(), thus avoiding
relying on an irrelevant cur_ops value.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 kernel/rcu/rcuperf.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index c297611528744..7a6890b23c5f5 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -494,6 +494,10 @@ rcu_perf_cleanup(void)
 
 	if (torture_cleanup_begin())
 		return;
+	if (!cur_ops) {
+		torture_cleanup_end();
+		return;
+	}
 
 	if (reader_tasks) {
 		for (i = 0; i < nrealreaders; i++)
@@ -614,6 +618,7 @@ rcu_perf_init(void)
 		pr_cont("\n");
 		WARN_ON(!IS_MODULE(CONFIG_RCU_PERF_TEST));
 		firsterr = -EINVAL;
+		cur_ops = NULL;
 		goto unwind;
 	}
 	if (cur_ops->init)
-- 
GitLab


From a9d6938ddb7f892552013b93e4842fc1a538628d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 21 Mar 2019 13:30:01 -0700
Subject: [PATCH 0235/1861] locktorture: NULL cxt.lwsa and cxt.lrsa to allow
 bad-arg detection

Currently, lock_torture_cleanup() uses the values of cxt.lwsa and cxt.lrsa
to detect bad parameters that prevented locktorture from initializing,
let alone running.  In this case, lock_torture_cleanup() does no cleanup
aside from invoking torture_cleanup_begin() and torture_cleanup_end(),
as required to permit future torture tests to run.  However, this
heuristic fails if the run with bad parameters was preceded by a previous
run that actually ran:  In this case, both cxt.lwsa and cxt.lrsa will
remain non-zero, which means that the current lock_torture_cleanup()
invocation will be unable to detect the fact that it should skip cleanup,
which can result in charming outcomes such as double frees.

This commit therefore NULLs out both cxt.lwsa and cxt.lrsa at the end
of any run that actually ran.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Josh Triplett <josh@joshtriplett.org>
---
 kernel/locking/locktorture.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index ad40a2617063c..80a463d31a8d9 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -829,7 +829,9 @@ static void lock_torture_cleanup(void)
 						"End of test: SUCCESS");
 
 	kfree(cxt.lwsa);
+	cxt.lwsa = NULL;
 	kfree(cxt.lrsa);
+	cxt.lrsa = NULL;
 
 end:
 	torture_cleanup_end();
-- 
GitLab


From 164a4daaeaecb69b0373f20eead9626026352963 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 26 Mar 2019 13:09:33 -0700
Subject: [PATCH 0236/1861] torture: Suppress false-positive
 CONFIG_INITRAMFS_SOURCE complaint

The scripting must supply the CONFIG_INITRAMFS_SOURCE Kconfig option
so that kbuild can find the desired initrd, but the configcheck.sh
script gets confused by this option because it takes a string instead
of the expected y/n/m.  This causes checkconfig.sh to complain about
CONFIG_INITRAMFS_SOURCE in the torture-test output (though not in the
summary).  As more people use rcutorture, the resulting confusion is
an increasing concern.

This commit therefore suppresses this false-positive warning by filtering
CONFIG_INITRAMFS_SOURCE from within the checkconfig.sh script.

Reported-by: Joel Fernandes <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 tools/testing/selftests/rcutorture/bin/configcheck.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index 5b25524d03660..31584cee84d71 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -14,6 +14,7 @@ mkdir $T
 cat $1 > $T/.config
 
 cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
+grep -v '^CONFIG_INITRAMFS_SOURCE' |
 awk	'
 {
 		print "if grep -q \"" $0 "\" < '"$T/.config"'";
-- 
GitLab


From b3ccbbce1e455b8454d3935eb9ae0a5f18939e24 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 25 Feb 2019 11:20:05 -0800
Subject: [PATCH 0237/1861] i40e: fix i40e_ptp_adjtime when given a negative
 delta

Commit 0ac30ce43323 ("i40e: fix up 32 bit timespec references",
2017-07-26) claims to be cleaning up references to 32-bit timespecs.

The actual contents of the commit make no sense, as it converts a call
to timespec64_add into timespec64_add_ns. This would seem ok, if (a) the
change was documented in the commit message, and (b) timespec64_add_ns
supported negative numbers.

timespec64_add_ns doesn't work with signed deltas, because the
implementation is based around iter_div_u64_rem. This change resulted in
a regression where i40e_ptp_adjtime would interpret small negative
adjustments as large positive additions, resulting in incorrect
behavior.

This commit doesn't appear to fix anything, is not well explained, and
introduces a bug, so lets just revert it.

Reverts: 0ac30ce43323 ("i40e: fix up 32 bit timespec references", 2017-07-26)
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 5fb4353c742b9..31575c0bb884f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -146,12 +146,13 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-	struct timespec64 now;
+	struct timespec64 now, then;
 
+	then = ns_to_timespec64(delta);
 	mutex_lock(&pf->tmreg_lock);
 
 	i40e_ptp_read(pf, &now, NULL);
-	timespec64_add_ns(&now, delta);
+	now = timespec64_add(now, then);
 	i40e_ptp_write(pf, (const struct timespec64 *)&now);
 
 	mutex_unlock(&pf->tmreg_lock);
-- 
GitLab


From dabb8338be533c18f50255cf39ff4f66d4dabdbe Mon Sep 17 00:00:00 2001
From: Arvind Sankar <niveditas98@gmail.com>
Date: Sat, 2 Mar 2019 11:01:17 -0500
Subject: [PATCH 0238/1861] igb: Fix WARN_ONCE on runtime suspend

The runtime_suspend device callbacks are not supposed to save
configuration state or change the power state. Commit fb29f76cc566
("igb: Fix an issue that PME is not enabled during runtime suspend")
changed the driver to not save configuration state during runtime
suspend, however the driver callback still put the device into a
low-power state. This causes a warning in the pci pm core and results in
pci_pm_runtime_suspend not calling pci_save_state or pci_finish_runtime_suspend.

Fix this by not changing the power state either, leaving that to pci pm
core, and make the same change for suspend callback as well.

Also move a couple of defines into the appropriate header file instead
of inline in the .c file.

Fixes: fb29f76cc566 ("igb: Fix an issue that PME is not enabled during runtime suspend")
Signed-off-by: Arvind Sankar <niveditas98@gmail.com>
Reviewed-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/igb/e1000_defines.h    |  2 +
 drivers/net/ethernet/intel/igb/igb_main.c     | 57 +++----------------
 2 files changed, 10 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 01fcfc6f34151..d2e2c50ce2579 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -194,6 +194,8 @@
 /* enable link status from external LINK_0 and LINK_1 pins */
 #define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
 #define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
+#define E1000_CTRL_ADVD3WUC 0x00100000  /* D3 WUC */
+#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */
 #define E1000_CTRL_SDP0_DIR 0x00400000  /* SDP0 Data direction */
 #define E1000_CTRL_SDP1_DIR 0x00800000  /* SDP1 Data direction */
 #define E1000_CTRL_RST      0x04000000  /* Global reset */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 69b230c53fed5..3269d8e94744f 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8740,9 +8740,7 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ctrl, rctl, status;
 	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
-#ifdef CONFIG_PM
-	int retval = 0;
-#endif
+	bool wake;
 
 	rtnl_lock();
 	netif_device_detach(netdev);
@@ -8755,14 +8753,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
 	igb_clear_interrupt_scheme(adapter);
 	rtnl_unlock();
 
-#ifdef CONFIG_PM
-	if (!runtime) {
-		retval = pci_save_state(pdev);
-		if (retval)
-			return retval;
-	}
-#endif
-
 	status = rd32(E1000_STATUS);
 	if (status & E1000_STATUS_LU)
 		wufc &= ~E1000_WUFC_LNKC;
@@ -8779,10 +8769,6 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
 		}
 
 		ctrl = rd32(E1000_CTRL);
-		/* advertise wake from D3Cold */
-		#define E1000_CTRL_ADVD3WUC 0x00100000
-		/* phy power management enable */
-		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
 		ctrl |= E1000_CTRL_ADVD3WUC;
 		wr32(E1000_CTRL, ctrl);
 
@@ -8796,12 +8782,15 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
 		wr32(E1000_WUFC, 0);
 	}
 
-	*enable_wake = wufc || adapter->en_mng_pt;
-	if (!*enable_wake)
+	wake = wufc || adapter->en_mng_pt;
+	if (!wake)
 		igb_power_down_link(adapter);
 	else
 		igb_power_up_link(adapter);
 
+	if (enable_wake)
+		*enable_wake = wake;
+
 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
 	 * would have already happened in close and is redundant.
 	 */
@@ -8844,22 +8833,7 @@ static void igb_deliver_wake_packet(struct net_device *netdev)
 
 static int __maybe_unused igb_suspend(struct device *dev)
 {
-	int retval;
-	bool wake;
-	struct pci_dev *pdev = to_pci_dev(dev);
-
-	retval = __igb_shutdown(pdev, &wake, 0);
-	if (retval)
-		return retval;
-
-	if (wake) {
-		pci_prepare_to_sleep(pdev);
-	} else {
-		pci_wake_from_d3(pdev, false);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
-
-	return 0;
+	return __igb_shutdown(to_pci_dev(dev), NULL, 0);
 }
 
 static int __maybe_unused igb_resume(struct device *dev)
@@ -8930,22 +8904,7 @@ static int __maybe_unused igb_runtime_idle(struct device *dev)
 
 static int __maybe_unused igb_runtime_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int retval;
-	bool wake;
-
-	retval = __igb_shutdown(pdev, &wake, 1);
-	if (retval)
-		return retval;
-
-	if (wake) {
-		pci_prepare_to_sleep(pdev);
-	} else {
-		pci_wake_from_d3(pdev, false);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
-
-	return 0;
+	return __igb_shutdown(to_pci_dev(dev), NULL, 1);
 }
 
 static int __maybe_unused igb_runtime_resume(struct device *dev)
-- 
GitLab


From 6e846239e5487cbb89ac8192d5f11437d010130e Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 28 Feb 2019 15:36:09 +0000
Subject: [PATCH 0239/1861] EDAC/amd64: Add Family 17h Model 30h PCI IDs

Add the new Family 17h Model 30h PCI IDs to the AMD64 EDAC module.

This also fixes a probe failure that appeared when some other PCI IDs
for Family 17h Model 30h were added to the AMD NB code.

Fixes: be3518a16ef2 (x86/amd_nb: Add PCI device IDs for family 17h, model 30h)
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Kim Phillips <kim.phillips@amd.com>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190228153558.127292-1-Yazen.Ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 13 +++++++++++++
 drivers/edac/amd64_edac.h |  3 +++
 2 files changed, 16 insertions(+)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6ea98575a402e..98e8da9d9f5b8 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2211,6 +2211,15 @@ static struct amd64_family_type family_types[] = {
 			.dbam_to_cs		= f17_base_addr_to_cs_size,
 		}
 	},
+	[F17_M30H_CPUS] = {
+		.ctl_name = "F17h_M30h",
+		.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
+		.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
+		.ops = {
+			.early_channel_count	= f17_early_channel_count,
+			.dbam_to_cs		= f17_base_addr_to_cs_size,
+		}
+	},
 };
 
 /*
@@ -3203,6 +3212,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
 			fam_type = &family_types[F17_M10H_CPUS];
 			pvt->ops = &family_types[F17_M10H_CPUS].ops;
 			break;
+		} else if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
+			fam_type = &family_types[F17_M30H_CPUS];
+			pvt->ops = &family_types[F17_M30H_CPUS].ops;
+			break;
 		}
 		/* fall through */
 	case 0x18:
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 4242f8e39c18f..de8dbb0b42b55 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -117,6 +117,8 @@
 #define PCI_DEVICE_ID_AMD_17H_DF_F6	0x1466
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee
+#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490
+#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
 
 /*
  * Function 1 - Address Map
@@ -284,6 +286,7 @@ enum amd_families {
 	F16_M30H_CPUS,
 	F17_CPUS,
 	F17_M10H_CPUS,
+	F17_M30H_CPUS,
 	NUM_FAMILIES,
 };
 
-- 
GitLab


From 4d30d2bc3c23e63c2608bc5b03b0960490d5b740 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 28 Feb 2019 15:36:10 +0000
Subject: [PATCH 0240/1861] EDAC/amd64: Use a macro for iterating over Unified
 Memory Controllers

Define and use a macro for looping over the number of Unified Memory
Controllers.

No functional change.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Kim Phillips <kim.phillips@amd.com>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190228153558.127292-2-Yazen.Ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 98e8da9d9f5b8..e4fd459d807a0 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -449,6 +449,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
 #define for_each_chip_select_mask(i, dct, pvt) \
 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
 
+#define for_each_umc(i) \
+	for (i = 0; i < NUM_UMCS; i++)
+
 /*
  * @input_addr is an InputAddr associated with the node given by mci. Return the
  * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
@@ -722,7 +725,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
 	if (pvt->umc) {
 		u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
 
-		for (i = 0; i < NUM_UMCS; i++) {
+		for_each_umc(i) {
 			if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
 				continue;
 
@@ -811,7 +814,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
 	struct amd64_umc *umc;
 	u32 i, tmp, umc_base;
 
-	for (i = 0; i < NUM_UMCS; i++) {
+	for_each_umc(i) {
 		umc_base = get_umc_base(i);
 		umc = &pvt->umc[i];
 
@@ -1388,7 +1391,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt)
 	int i, channels = 0;
 
 	/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
-	for (i = 0; i < NUM_UMCS; i++)
+	for_each_umc(i)
 		channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
 
 	amd64_info("MCT channel count: %d\n", channels);
@@ -2612,7 +2615,7 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
 	if (pvt->umc) {
 		u8 i;
 
-		for (i = 0; i < NUM_UMCS; i++) {
+		for_each_umc(i) {
 			/* Check enabled channels only: */
 			if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
 			    (pvt->umc[i].ecc_ctrl & BIT(7))) {
@@ -2648,7 +2651,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
 	u32 i, umc_base;
 
 	/* Read registers from each UMC */
-	for (i = 0; i < NUM_UMCS; i++) {
+	for_each_umc(i) {
 
 		umc_base = get_umc_base(i);
 		umc = &pvt->umc[i];
@@ -3061,7 +3064,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
 	if (boot_cpu_data.x86 >= 0x17) {
 		u8 umc_en_mask = 0, ecc_en_mask = 0;
 
-		for (i = 0; i < NUM_UMCS; i++) {
+		for_each_umc(i) {
 			u32 base = get_umc_base(i);
 
 			/* Only check enabled UMCs. */
@@ -3114,7 +3117,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
 {
 	u8 i, ecc_en = 1, cpk_en = 1;
 
-	for (i = 0; i < NUM_UMCS; i++) {
+	for_each_umc(i) {
 		if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
 			ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
 			cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
-- 
GitLab


From bdcee7747f5c490297665af0e1e0fbeb4368804d Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 28 Feb 2019 15:36:10 +0000
Subject: [PATCH 0241/1861] EDAC/amd64: Support more than two Unified Memory
 Controllers

The first few models of Family 17h all had 2 Unified Memory Controllers
per Die, so this was treated as a fixed value. However, future systems
may have more Unified Memory Controllers per Die.

Related to this, the channel number and base address of a Unified Memory
Controller were found by matching on fixed, known values. However,
current and future systems follow this pattern for the channel number
and base address of a Unified Memory Controller: 0xYXXXXX, where Y is
the channel number. So matching on hardcoded values is not necessary.

Set the number of Unified Memory Controllers at driver init time based
on the family/model. Also, update the functions that find the channel
number and base address of a Unified Memory Controller to support more
than two.

 [ bp: Move num_umcs into the .c file and simplify comment. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Kim Phillips <kim.phillips@amd.com>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190228153558.127292-3-Yazen.Ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 47 +++++++++++++++++++++++++--------------
 drivers/edac/amd64_edac.h |  6 ++---
 2 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e4fd459d807a0..81dca957fce06 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -18,6 +18,9 @@ static struct msr __percpu *msrs;
 /* Per-node stuff */
 static struct ecc_settings **ecc_stngs;
 
+/* Number of Unified Memory Controllers */
+static u8 num_umcs;
+
 /*
  * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
  * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
@@ -450,7 +453,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
 	for (i = 0; i < pvt->csels[dct].m_cnt; i++)
 
 #define for_each_umc(i) \
-	for (i = 0; i < NUM_UMCS; i++)
+	for (i = 0; i < num_umcs; i++)
 
 /*
  * @input_addr is an InputAddr associated with the node given by mci. Return the
@@ -2476,18 +2479,14 @@ static inline void decode_bus_error(int node_id, struct mce *m)
  * To find the UMC channel represented by this bank we need to match on its
  * instance_id. The instance_id of a bank is held in the lower 32 bits of its
  * IPID.
+ *
+ * Currently, we can derive the channel number by looking at the 6th nibble in
+ * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
+ * number.
  */
-static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+static int find_umc_channel(struct mce *m)
 {
-	u32 umc_instance_id[] = {0x50f00, 0x150f00};
-	u32 instance_id = m->ipid & GENMASK(31, 0);
-	int i, channel = -1;
-
-	for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
-		if (umc_instance_id[i] == instance_id)
-			channel = i;
-
-	return channel;
+	return (m->ipid & GENMASK(31, 0)) >> 20;
 }
 
 static void decode_umc_error(int node_id, struct mce *m)
@@ -2509,11 +2508,7 @@ static void decode_umc_error(int node_id, struct mce *m)
 	if (m->status & MCI_STATUS_DEFERRED)
 		ecc_type = 3;
 
-	err.channel = find_umc_channel(pvt, m);
-	if (err.channel < 0) {
-		err.err_code = ERR_CHANNEL;
-		goto log_error;
-	}
+	err.channel = find_umc_channel(m);
 
 	if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
 		err.err_code = ERR_NORM_ADDR;
@@ -3252,6 +3247,22 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
 	NULL
 };
 
+/* Set the number of Unified Memory Controllers in the system. */
+static void compute_num_umcs(void)
+{
+	u8 model = boot_cpu_data.x86_model;
+
+	if (boot_cpu_data.x86 < 0x17)
+		return;
+
+	if (model >= 0x30 && model <= 0x3f)
+		num_umcs = 8;
+	else
+		num_umcs = 2;
+
+	edac_dbg(1, "Number of UMCs: %x", num_umcs);
+}
+
 static int init_one_instance(unsigned int nid)
 {
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
@@ -3276,7 +3287,7 @@ static int init_one_instance(unsigned int nid)
 		goto err_free;
 
 	if (pvt->fam >= 0x17) {
-		pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
+		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
 		if (!pvt->umc) {
 			ret = -ENOMEM;
 			goto err_free;
@@ -3497,6 +3508,8 @@ static int __init amd64_edac_init(void)
 	if (!msrs)
 		goto err_free;
 
+	compute_num_umcs();
+
 	for (i = 0; i < amd_nb_num(); i++) {
 		err = probe_one_instance(i);
 		if (err) {
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index de8dbb0b42b55..88ca6dc2d5df8 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -274,8 +274,6 @@
 
 #define UMC_SDP_INIT			BIT(31)
 
-#define NUM_UMCS			2
-
 enum amd_families {
 	K8_CPUS = 0,
 	F10_CPUS,
@@ -399,8 +397,8 @@ struct err_info {
 
 static inline u32 get_umc_base(u8 channel)
 {
-	/* ch0: 0x50000, ch1: 0x150000 */
-	return 0x50000 + (!!channel << 20);
+	/* chY: 0xY50000 */
+	return 0x50000 + (channel << 20);
 }
 
 static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
-- 
GitLab


From 869adc4316ea348e3c52af2494d9b1f6bd68abbd Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Mon, 25 Mar 2019 20:33:30 +0000
Subject: [PATCH 0242/1861] EDAC/amd64: Set maximum channel layer size
 depending on family

The AMD64 EDAC module currently hardcodes the EDAC channel layer size
count to two. Future AMD systems may have more channels than this.

Set the EDAC channel layer size equal to the maximum number of channels
possible for the system. On Family 17h and later, this is set in the
num_umcs variable. Older systems will continue to use two as the
default.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190325203319.7603-1-Yazen.Ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 81dca957fce06..54acd3a7acdc1 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3326,8 +3326,14 @@ static int init_one_instance(unsigned int nid)
 	 * Always allocate two channels since we can have setups with DIMMs on
 	 * only one channel. Also, this simplifies handling later for the price
 	 * of a couple of KBs tops.
+	 *
+	 * On Fam17h+, the number of controllers may be greater than two. So set
+	 * the size equal to the maximum number of UMCs.
 	 */
-	layers[1].size = 2;
+	if (pvt->fam >= 0x17)
+		layers[1].size = num_umcs;
+	else
+		layers[1].size = 2;
 	layers[1].is_virt_csrow = false;
 
 	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
-- 
GitLab


From 7835961d377b75ab9ae77f715e378fcb72508306 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 28 Feb 2019 15:36:11 +0000
Subject: [PATCH 0243/1861] EDAC/amd64: Recognize x16 symbol size

Future AMD systems may support x16 symbol sizes.

Recognize if a system is using x16 symbol size. Also, simplify the print
statement.

Note that a x16 syndrome vector table is not necessary like with x4 or
x8 syndromes. This is because systems that support x16 symbol sizes are
SMCA systems and in that case, the syndrome can be directly extracted
from the MCA_SYND[Syndrome] field.

 [ bp: massage. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Kim Phillips <kim.phillips@amd.com>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190228153558.127292-4-Yazen.Ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 21 ++++++++++-----------
 drivers/edac/amd64_edac.h |  2 +-
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 54acd3a7acdc1..1457919cb3b59 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -900,8 +900,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
 
 	edac_dbg(1, "  DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
 
-	amd64_info("using %s syndromes.\n",
-			((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
+	amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
 }
 
 /*
@@ -2612,17 +2611,17 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
 
 		for_each_umc(i) {
 			/* Check enabled channels only: */
-			if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
-			    (pvt->umc[i].ecc_ctrl & BIT(7))) {
-				pvt->ecc_sym_sz = 8;
-				break;
+			if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
+				if (pvt->umc[i].ecc_ctrl & BIT(9)) {
+					pvt->ecc_sym_sz = 16;
+					return;
+				} else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
+					pvt->ecc_sym_sz = 8;
+					return;
+				}
 			}
 		}
-
-		return;
-	}
-
-	if (pvt->fam >= 0x10) {
+	} else if (pvt->fam >= 0x10) {
 		u32 tmp;
 
 		amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 88ca6dc2d5df8..8f66472f7adc2 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -364,7 +364,7 @@ struct amd64_pvt {
 	u32 dct_sel_hi;		/* DRAM Controller Select High */
 	u32 online_spare;	/* On-Line spare Reg */
 
-	/* x4 or x8 syndromes in use */
+	/* x4, x8, or x16 syndromes in use */
 	u8 ecc_sym_sz;
 
 	/* place to store error injection parameters prior to issue */
-- 
GitLab


From 0a227af521d6df5286550b62f4b591417170b4ea Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 28 Feb 2019 15:36:11 +0000
Subject: [PATCH 0244/1861] EDAC/amd64: Support more than two controllers for
 chip select handling

The struct chip_select array that's used for saving chip select bases
and masks is fixed at length of two. There should be one struct
chip_select for each controller, so this array should be increased to
support systems that may have more than two controllers.

Increase the size of the struct chip_select array to eight, which is the
largest number of controllers per die currently supported on AMD
systems.

Also, carve out the Family 17h+ reading of the bases/masks into a
separate function. This effectively reverts the original bases/masks
reading code to before Family 17h support was added.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Kim Phillips <kim.phillips@amd.com>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190228153558.127292-5-Yazen.Ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 113 ++++++++++++++++++++------------------
 drivers/edac/amd64_edac.h |   5 +-
 2 files changed, 62 insertions(+), 56 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 1457919cb3b59..d89998dcf7f56 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -914,89 +914,94 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
 	} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
+	} else if (pvt->fam >= 0x17) {
+		int umc;
+
+		for_each_umc(umc) {
+			pvt->csels[umc].b_cnt = 8;
+			pvt->csels[umc].m_cnt = 4;
+		}
+
 	} else {
 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
 	}
 }
 
+static void read_umc_base_mask(struct amd64_pvt *pvt)
+{
+	int cs, umc;
+
+	for_each_umc(umc) {
+		u32 umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR;
+		u32 umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
+
+		for_each_chip_select(cs, 0, pvt) {
+			u32 *base = &pvt->csels[umc].csbases[cs];
+			u32 *mask = &pvt->csels[umc].csmasks[cs];
+
+			u32 base_reg = umc_base_reg + (cs * 4);
+			u32 mask_reg = umc_mask_reg + ((cs >> 1) * 4);
+
+			if (!amd_smn_read(pvt->mc_node_id, base_reg, base))
+				edac_dbg(0, "  DCSB%d[%d]=0x%08x reg: 0x%x\n",
+					 umc, cs, *base, base_reg);
+
+			if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask))
+				edac_dbg(0, "    DCSM%d[%d]=0x%08x reg: 0x%x\n",
+					 umc, cs, *mask, mask_reg);
+		}
+	}
+}
+
 /*
  * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
  */
 static void read_dct_base_mask(struct amd64_pvt *pvt)
 {
-	int base_reg0, base_reg1, mask_reg0, mask_reg1, cs;
+	int cs;
 
 	prep_chip_selects(pvt);
 
-	if (pvt->umc) {
-		base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR;
-		base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR;
-		mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK;
-		mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK;
-	} else {
-		base_reg0 = DCSB0;
-		base_reg1 = DCSB1;
-		mask_reg0 = DCSM0;
-		mask_reg1 = DCSM1;
-	}
+	if (pvt->umc)
+		return read_umc_base_mask(pvt);
 
 	for_each_chip_select(cs, 0, pvt) {
-		int reg0   = base_reg0 + (cs * 4);
-		int reg1   = base_reg1 + (cs * 4);
+		int reg0   = DCSB0 + (cs * 4);
+		int reg1   = DCSB1 + (cs * 4);
 		u32 *base0 = &pvt->csels[0].csbases[cs];
 		u32 *base1 = &pvt->csels[1].csbases[cs];
 
-		if (pvt->umc) {
-			if (!amd_smn_read(pvt->mc_node_id, reg0, base0))
-				edac_dbg(0, "  DCSB0[%d]=0x%08x reg: 0x%x\n",
-					 cs, *base0, reg0);
+		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
+			edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
+				 cs, *base0, reg0);
 
-			if (!amd_smn_read(pvt->mc_node_id, reg1, base1))
-				edac_dbg(0, "  DCSB1[%d]=0x%08x reg: 0x%x\n",
-					 cs, *base1, reg1);
-		} else {
-			if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
-				edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
-					 cs, *base0, reg0);
-
-			if (pvt->fam == 0xf)
-				continue;
+		if (pvt->fam == 0xf)
+			continue;
 
-			if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
-				edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
-					 cs, *base1, (pvt->fam == 0x10) ? reg1
-								: reg0);
-		}
+		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
+			edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
+				 cs, *base1, (pvt->fam == 0x10) ? reg1
+							: reg0);
 	}
 
 	for_each_chip_select_mask(cs, 0, pvt) {
-		int reg0   = mask_reg0 + (cs * 4);
-		int reg1   = mask_reg1 + (cs * 4);
+		int reg0   = DCSM0 + (cs * 4);
+		int reg1   = DCSM1 + (cs * 4);
 		u32 *mask0 = &pvt->csels[0].csmasks[cs];
 		u32 *mask1 = &pvt->csels[1].csmasks[cs];
 
-		if (pvt->umc) {
-			if (!amd_smn_read(pvt->mc_node_id, reg0, mask0))
-				edac_dbg(0, "    DCSM0[%d]=0x%08x reg: 0x%x\n",
-					 cs, *mask0, reg0);
-
-			if (!amd_smn_read(pvt->mc_node_id, reg1, mask1))
-				edac_dbg(0, "    DCSM1[%d]=0x%08x reg: 0x%x\n",
-					 cs, *mask1, reg1);
-		} else {
-			if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
-				edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
-					 cs, *mask0, reg0);
+		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
+			edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
+				 cs, *mask0, reg0);
 
-			if (pvt->fam == 0xf)
-				continue;
+		if (pvt->fam == 0xf)
+			continue;
 
-			if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
-				edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
-					 cs, *mask1, (pvt->fam == 0x10) ? reg1
-								: reg0);
-		}
+		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
+			edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
+				 cs, *mask1, (pvt->fam == 0x10) ? reg1
+							: reg0);
 	}
 }
 
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8f66472f7adc2..4dce6a2ac75f9 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -96,6 +96,7 @@
 /* Hardware limit on ChipSelect rows per MC and processors per system */
 #define NUM_CHIPSELECTS			8
 #define DRAM_RANGES			8
+#define NUM_CONTROLLERS			8
 
 #define ON true
 #define OFF false
@@ -351,8 +352,8 @@ struct amd64_pvt {
 	u32 dbam0;		/* DRAM Base Address Mapping reg for DCT0 */
 	u32 dbam1;		/* DRAM Base Address Mapping reg for DCT1 */
 
-	/* one for each DCT */
-	struct chip_select csels[2];
+	/* one for each DCT/UMC */
+	struct chip_select csels[NUM_CONTROLLERS];
 
 	/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
 	struct dram_range ranges[DRAM_RANGES];
-- 
GitLab


From fc00c6a416381010c4a721a4142ddd0260d68f20 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 28 Feb 2019 15:36:12 +0000
Subject: [PATCH 0245/1861] EDAC/amd64: Adjust printed chip select sizes when
 interleaved

AMD systems may support chip select interleaving. However, on family
17h+ this was not taken into account when printing the chip select
sizes.

Add support to detect if chip selects are interleaved on family 17h+,
and adjust the sizes accordingly.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Kim Phillips <kim.phillips@amd.com>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190228153558.127292-6-Yazen.Ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index d89998dcf7f56..4c0239aeff2fc 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -787,6 +787,22 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
 		 (dclr & BIT(15)) ?  "yes" : "no");
 }
 
+/*
+ * The Address Mask should be a contiguous set of bits in the non-interleaved
+ * case. So to check for CS interleaving, find the most- and least-significant
+ * bits of the mask, generate a contiguous bitmask, and compare the two.
+ */
+static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs)
+{
+	u32 mask = pvt->csels[ctrl].csmasks[cs >> 1];
+	u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1;
+	u32 test_mask = GENMASK(msb, lsb);
+
+	edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask);
+
+	return mask ^ test_mask;
+}
+
 static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
 {
 	int dimm, size0, size1, cs0, cs1;
@@ -803,8 +819,19 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
 		size1 = 0;
 		cs1 = dimm * 2 + 1;
 
-		if (csrow_enabled(cs1, ctrl, pvt))
-			size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+		if (csrow_enabled(cs1, ctrl, pvt)) {
+			/*
+			 * CS interleaving is only supported if both CSes have
+			 * the same amount of memory. Because they are
+			 * interleaved, it will look like both CSes have the
+			 * full amount of memory. Save the size for both as
+			 * half the amount we found on CS0, if interleaved.
+			 */
+			if (f17_cs_interleaved(pvt, ctrl, cs1))
+				size1 = size0 = (size0 >> 1);
+			else
+				size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+		}
 
 		amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
 				cs0,	size0,
-- 
GitLab


From 7ec52b9df7d7472240fa96223185894b1897aeb0 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Fri, 15 Mar 2019 09:45:15 +0100
Subject: [PATCH 0246/1861] ixgbe: fix mdio bus registration

The ixgbe ignores errors returned from mdiobus_register() and leaves
adapter->mii_bus non-NULL and MDIO bus state as MDIOBUS_ALLOCATED.
This triggers a BUG from mdiobus_unregister() during ixgbe_remove() call.

Fixes: 8fa10ef01260 ("ixgbe: register a mdiobus")
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index cc4907f9ff02c..2fb97967961c4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -905,13 +905,12 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
 	struct pci_dev *pdev = adapter->pdev;
 	struct device *dev = &adapter->netdev->dev;
 	struct mii_bus *bus;
+	int err = -ENODEV;
 
-	adapter->mii_bus = devm_mdiobus_alloc(dev);
-	if (!adapter->mii_bus)
+	bus = devm_mdiobus_alloc(dev);
+	if (!bus)
 		return -ENOMEM;
 
-	bus = adapter->mii_bus;
-
 	switch (hw->device_id) {
 	/* C3000 SoCs */
 	case IXGBE_DEV_ID_X550EM_A_KR:
@@ -949,12 +948,15 @@ s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
 	 */
 	hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22;
 
-	return mdiobus_register(bus);
+	err = mdiobus_register(bus);
+	if (!err) {
+		adapter->mii_bus = bus;
+		return 0;
+	}
 
 ixgbe_no_mii_bus:
 	devm_mdiobus_free(dev, bus);
-	adapter->mii_bus = NULL;
-	return -ENODEV;
+	return err;
 }
 
 /**
-- 
GitLab


From f669d24f3dd00beab452c0fc9257f6a942ffca9b Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@kpanic.de>
Date: Thu, 21 Mar 2019 13:45:35 +0100
Subject: [PATCH 0247/1861] i40e: fix WoL support check

The current check for WoL on i40e is broken. Code comment says only
magic packet is supported, so only check for that.

Fixes: 540a152da762 (i40e/ixgbe/igb: fail on new WoL flag setting WAKE_MAGICSECURE)

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 4c885801fa269..7874d0ec7fb0e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2573,8 +2573,7 @@ static int i40e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 		return -EOPNOTSUPP;
 
 	/* only magic packet is supported */
-	if (wol->wolopts && (wol->wolopts != WAKE_MAGIC)
-			  | (wol->wolopts != WAKE_FILTER))
+	if (wol->wolopts & ~WAKE_MAGIC)
 		return -EOPNOTSUPP;
 
 	/* is this a new value? */
-- 
GitLab


From 01ca667133d019edc9f0a1f70a272447c84ec41f Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Thu, 21 Mar 2019 22:42:23 +0800
Subject: [PATCH 0248/1861] fm10k: Fix a potential NULL pointer dereference

Syzkaller report this:

kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN PTI
CPU: 0 PID: 4378 Comm: syz-executor.0 Tainted: G         C        5.0.0+ #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:__lock_acquire+0x95b/0x3200 kernel/locking/lockdep.c:3573
Code: 00 0f 85 28 1e 00 00 48 81 c4 08 01 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 4c 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 cc 24 00 00 49 81 7d 00 e0 de 03 a6 41 bc 00 00
RSP: 0018:ffff8881e3c07a40 EFLAGS: 00010002
RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000000000000010 RSI: 0000000000000000 RDI: 0000000000000080
RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
R10: ffff8881e3c07d98 R11: ffff8881c7f21f80 R12: 0000000000000001
R13: 0000000000000080 R14: 0000000000000000 R15: 0000000000000001
FS:  00007fce2252e700(0000) GS:ffff8881f2400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fffc7eb0228 CR3: 00000001e5bea002 CR4: 00000000007606f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 lock_acquire+0xff/0x2c0 kernel/locking/lockdep.c:4211
 __mutex_lock_common kernel/locking/mutex.c:925 [inline]
 __mutex_lock+0xdf/0x1050 kernel/locking/mutex.c:1072
 drain_workqueue+0x24/0x3f0 kernel/workqueue.c:2934
 destroy_workqueue+0x23/0x630 kernel/workqueue.c:4319
 __do_sys_delete_module kernel/module.c:1018 [inline]
 __se_sys_delete_module kernel/module.c:961 [inline]
 __x64_sys_delete_module+0x30c/0x480 kernel/module.c:961
 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fce2252dc58 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0
RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000140
RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fce2252e6bc
R13: 00000000004bcca9 R14: 00000000006f6b48 R15: 00000000ffffffff

If alloc_workqueue fails, it should return -ENOMEM, otherwise may
trigger this NULL pointer dereference while unloading drivers.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 0a38c17a21a0 ("fm10k: Remove create_workqueue")
Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 5a0419421511f..ecef949f3baae 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -41,6 +41,8 @@ static int __init fm10k_init_module(void)
 	/* create driver workqueue */
 	fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
 					  fm10k_driver_name);
+	if (!fm10k_workqueue)
+		return -ENOMEM;
 
 	fm10k_dbg_init();
 
-- 
GitLab


From 738c06d0e4562e0acf9f2c7438a22b2d5afc67aa Mon Sep 17 00:00:00 2001
From: KT Liao <kt.liao@emc.com.tw>
Date: Tue, 26 Mar 2019 17:28:32 -0700
Subject: [PATCH 0249/1861] Input: elan_i2c - add hardware ID for multiple
 Lenovo laptops

There are many Lenovo laptops which need elan_i2c support, this patch adds
relevant IDs to the Elan driver so that touchpads are recognized.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_core.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 628ef617bb2f7..f9525d6f0bfe8 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1339,21 +1339,46 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0600", 0 },
 	{ "ELAN0601", 0 },
 	{ "ELAN0602", 0 },
+	{ "ELAN0603", 0 },
+	{ "ELAN0604", 0 },
 	{ "ELAN0605", 0 },
+	{ "ELAN0606", 0 },
+	{ "ELAN0607", 0 },
 	{ "ELAN0608", 0 },
 	{ "ELAN0609", 0 },
 	{ "ELAN060B", 0 },
 	{ "ELAN060C", 0 },
+	{ "ELAN060F", 0 },
+	{ "ELAN0610", 0 },
 	{ "ELAN0611", 0 },
 	{ "ELAN0612", 0 },
+	{ "ELAN0615", 0 },
+	{ "ELAN0616", 0 },
 	{ "ELAN0617", 0 },
 	{ "ELAN0618", 0 },
+	{ "ELAN0619", 0 },
+	{ "ELAN061A", 0 },
+	{ "ELAN061B", 0 },
 	{ "ELAN061C", 0 },
 	{ "ELAN061D", 0 },
 	{ "ELAN061E", 0 },
+	{ "ELAN061F", 0 },
 	{ "ELAN0620", 0 },
 	{ "ELAN0621", 0 },
 	{ "ELAN0622", 0 },
+	{ "ELAN0623", 0 },
+	{ "ELAN0624", 0 },
+	{ "ELAN0625", 0 },
+	{ "ELAN0626", 0 },
+	{ "ELAN0627", 0 },
+	{ "ELAN0628", 0 },
+	{ "ELAN0629", 0 },
+	{ "ELAN062A", 0 },
+	{ "ELAN062B", 0 },
+	{ "ELAN062C", 0 },
+	{ "ELAN062D", 0 },
+	{ "ELAN0631", 0 },
+	{ "ELAN0632", 0 },
 	{ "ELAN1000", 0 },
 	{ }
 };
-- 
GitLab


From 07ba9e7be423423043c5090a2f395c0da26e1b3d Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 18 Jan 2019 11:18:17 -0800
Subject: [PATCH 0250/1861] Input: document meanings of KEY_SCREEN and KEY_ZOOM

It is hard to say what KEY_SCREEN and KEY_ZOOM mean, but historically DVB
folks have used them to indicate switch to full screen mode. Later, they
converged on using KEY_ZOOM to switch into full screen mode and KEY)SCREEN
to control aspect ratio (see Documentation/media/uapi/rc/rc-tables.rst).

Let's commit to these uses, and define:

- KEY_FULL_SCREEN (and make KEY_ZOOM its alias)
- KEY_ASPECT_RATIO (and make KEY_SCREEN its alias)

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/uapi/linux/input-event-codes.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index ae366b87426ac..bc5054e51bef8 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -439,10 +439,12 @@
 #define KEY_TITLE		0x171
 #define KEY_SUBTITLE		0x172
 #define KEY_ANGLE		0x173
-#define KEY_ZOOM		0x174
+#define KEY_FULL_SCREEN		0x174	/* AC View Toggle */
+#define KEY_ZOOM		KEY_FULL_SCREEN
 #define KEY_MODE		0x175
 #define KEY_KEYBOARD		0x176
-#define KEY_SCREEN		0x177
+#define KEY_ASPECT_RATIO	0x177	/* HUTRR37: Aspect */
+#define KEY_SCREEN		KEY_ASPECT_RATIO
 #define KEY_PC			0x178	/* Media Select Computer */
 #define KEY_TV			0x179	/* Media Select TV */
 #define KEY_TV2			0x17a	/* Media Select Cable */
-- 
GitLab


From 2291da5b4d305a6506d915d5bef85dedd7c94882 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 18 Jan 2019 13:37:40 -0800
Subject: [PATCH 0251/1861] [media] doc-rst: switch to new names for Full
 Screen/Aspect keys

We defined better names for keys to activate full screen mode or
change aspect ratio (while keeping the existing keycodes to avoid
breaking userspace), so let's use them in the document.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/media/uapi/rc/rc-tables.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/media/uapi/rc/rc-tables.rst b/Documentation/media/uapi/rc/rc-tables.rst
index c8ae9479f842e..57797e56f45e0 100644
--- a/Documentation/media/uapi/rc/rc-tables.rst
+++ b/Documentation/media/uapi/rc/rc-tables.rst
@@ -616,7 +616,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 78
 
-       -  ``KEY_SCREEN``
+       -  ``KEY_ASPECT_RATIO``
 
        -  Select screen aspect ratio
 
@@ -624,7 +624,7 @@ the remote via /dev/input/event devices.
 
     -  .. row 79
 
-       -  ``KEY_ZOOM``
+       -  ``KEY_FULL_SCREEN``
 
        -  Put device into zoom/full screen mode
 
-- 
GitLab


From f7b3d85aa7a31a90c3ef5b0992604db339a615ab Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 18 Jan 2019 13:44:23 -0800
Subject: [PATCH 0252/1861] HID: input: fix mapping of aspect ratio key

According to HUTRR37 usage 0x6d from the consumer usage page corresponds
to action that selects the next available supported aspect ratio option
on a device which outputs or displays video. However KEY_ZOOM means
activate "Full Screen" mode, KEY_ASPECT_RATIO should be used instead.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/hid/hid-input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index d6fab57984874..def58c6aa8353 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -891,7 +891,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x06a: map_key_clear(KEY_GREEN);		break;
 		case 0x06b: map_key_clear(KEY_BLUE);		break;
 		case 0x06c: map_key_clear(KEY_YELLOW);		break;
-		case 0x06d: map_key_clear(KEY_ZOOM);		break;
+		case 0x06d: map_key_clear(KEY_ASPECT_RATIO);	break;
 
 		case 0x06f: map_key_clear(KEY_BRIGHTNESSUP);		break;
 		case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN);		break;
-- 
GitLab


From 96dd86871e1fffbc39e4fa61c9c75ec54ee9af0f Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 18 Jan 2019 13:59:08 -0800
Subject: [PATCH 0253/1861] HID: input: add mapping for Expose/Overview key
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to HUTRR77 usage 0x29f from the consumer page is reserved for
the Desktop application to present all running user’s application windows.
Linux defines KEY_SCALE to request Compiz Scale (Expose) mode, so let's
add the mapping.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/hid/hid-input.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index def58c6aa8353..5f800e7b04f2c 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1030,6 +1030,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT);	break;
 		case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL);	break;
 
+		case 0x29f: map_key_clear(KEY_SCALE);		break;
+
 		default: map_key_clear(KEY_UNKNOWN);
 		}
 		break;
-- 
GitLab


From 7975a1d6a7afeb3eb61c971a153d24dd8fa032f3 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 18 Jan 2019 14:05:52 -0800
Subject: [PATCH 0254/1861] HID: input: add mapping for keyboard Brightness
 Up/Down/Toggle keys

According to HUTRR73 usages 0x79, 0x7a and 0x7c from the consumer page
correspond to Brightness Up/Down/Toggle keys, so let's add the mappings.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/hid/hid-input.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 5f800e7b04f2c..cebe8a8cce2e9 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -900,6 +900,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX);		break;
 		case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO);		break;
 
+		case 0x079: map_key_clear(KEY_KBDILLUMUP);	break;
+		case 0x07a: map_key_clear(KEY_KBDILLUMDOWN);	break;
+		case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE);	break;
+
 		case 0x082: map_key_clear(KEY_VIDEO_NEXT);	break;
 		case 0x083: map_key_clear(KEY_LAST);		break;
 		case 0x084: map_key_clear(KEY_ENTER);		break;
-- 
GitLab


From afbbaa1bc0011d28f7604f9a7f0532f997c6f45e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 18 Jan 2019 14:20:27 -0800
Subject: [PATCH 0255/1861] HID: input: add mapping for "Full Screen" key

According to HUT 1.12 usage 0x232 from the consumer page is reserved for
switching application between full screen and windowed mode, so let's add
the mapping.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/hid/hid-input.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index cebe8a8cce2e9..ecb1b6f268531 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1014,6 +1014,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x22d: map_key_clear(KEY_ZOOMIN);		break;
 		case 0x22e: map_key_clear(KEY_ZOOMOUT);		break;
 		case 0x22f: map_key_clear(KEY_ZOOMRESET);	break;
+		case 0x232: map_key_clear(KEY_FULL_SCREEN);	break;
 		case 0x233: map_key_clear(KEY_SCROLLUP);	break;
 		case 0x234: map_key_clear(KEY_SCROLLDOWN);	break;
 		case 0x238: map_rel(REL_HWHEEL);		break;
-- 
GitLab


From c01908a14bf735b871170092807c618bb9dae654 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 18 Jan 2019 14:35:45 -0800
Subject: [PATCH 0256/1861] HID: input: add mapping for "Toggle Display" key

According to HUT 1.12 usage 0xb5 from the generic desktop page is reserved
for switching between external and internal display, so let's add the
mapping.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/hid/hid-input.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index ecb1b6f268531..da76358cde06f 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -677,6 +677,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 			break;
 		}
 
+		if ((usage->hid & 0xf0) == 0xb0) {	/* SC - Display */
+			switch (usage->hid & 0xf) {
+			case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break;
+			default: goto ignore;
+			}
+			break;
+		}
+
 		/*
 		 * Some lazy vendors declare 255 usages for System Control,
 		 * leading to the creation of ABS_X|Y axis and too many others.
-- 
GitLab


From dbee9c9c45846f003ec2f819710c2f4835630a6a Mon Sep 17 00:00:00 2001
From: Alan Kao <alankao@andestech.com>
Date: Fri, 22 Mar 2019 14:37:04 +0800
Subject: [PATCH 0257/1861] riscv: fix accessing 8-byte variable from RV32

A memory save operation to 8-byte variable in RV32 is divided into
two sw instructions in the put_user macro.  The current fixup returns
execution flow to the second sw instead of the one after it.

This patch fixes this fixup code according to the load access part.

Signed-off-by: Alan Kao<alankao@andestech.com>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index a00168b980d2e..fb53a8089e769 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -300,7 +300,7 @@ do {								\
 		"	.balign 4\n"				\
 		"4:\n"						\
 		"	li %0, %6\n"				\
-		"	jump 2b, %1\n"				\
+		"	jump 3b, %1\n"				\
 		"	.previous\n"				\
 		"	.section __ex_table,\"a\"\n"		\
 		"	.balign " RISCV_SZPTR "\n"			\
-- 
GitLab


From 387181dcdb6c1ee254efab4744846a7a53d4c4cb Mon Sep 17 00:00:00 2001
From: Anup Patel <Anup.Patel@wdc.com>
Date: Tue, 26 Mar 2019 08:03:47 +0000
Subject: [PATCH 0258/1861] RISC-V: Always compile mm/init.c with cmodel=medany
 and notrace

The Linux RISC-V 32bit kernel is broken after we moved setup_vm() from
kernel/setup.c to mm/init.c because Linux RISC-V 32bit kernel by default
uses cmodel=medlow which results in a non-position-independent setup_vm().

This patch fixes Linux RISC-V 32bit kernel booting by:
1. Forcing cmodel=medany for mm/init.c
2. Moving remaing MM-related stuff va_pa_offset, pfn_base and
   empty_zero_page from kernel/setup.c to mm/init.c

Further, the setup_vm() cannot handle GCC instrumentation for FTRACE so
we disable it for mm/init.c by not using "-pg" compiler flag.

Fixes: 6f1e9e946f0b ("RISC-V: Move setup_vm() to mm/init.c")
Suggested-by: Christoph Hellwig <hch@lst.de>
Suggested-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Anup Patel <anup.patel@wdc.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/Makefile |  3 ---
 arch/riscv/kernel/setup.c  |  8 --------
 arch/riscv/mm/Makefile     |  6 ++++++
 arch/riscv/mm/init.c       | 28 ++++++++++++++++++++++++++++
 4 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f13f7f276639d..598568168d351 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -4,7 +4,6 @@
 
 ifdef CONFIG_FTRACE
 CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_setup.o = -pg
 endif
 
 extra-y += head.o
@@ -29,8 +28,6 @@ obj-y	+= vdso.o
 obj-y	+= cacheinfo.o
 obj-y	+= vdso/
 
-CFLAGS_setup.o := -mcmodel=medany
-
 obj-$(CONFIG_FPU)		+= fpu.o
 obj-$(CONFIG_SMP)		+= smpboot.o
 obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index ecb654f6a79ef..540a331d13769 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -48,14 +48,6 @@ struct screen_info screen_info = {
 };
 #endif
 
-unsigned long va_pa_offset;
-EXPORT_SYMBOL(va_pa_offset);
-unsigned long pfn_base;
-EXPORT_SYMBOL(pfn_base);
-
-unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
-EXPORT_SYMBOL(empty_zero_page);
-
 /* The lucky hart to first increment this variable will boot the other cores */
 atomic_t hart_lottery;
 unsigned long boot_cpu_hartid;
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index eb22ab49b3e00..b68aac7018031 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -1,3 +1,9 @@
+
+CFLAGS_init.o := -mcmodel=medany
+ifdef CONFIG_FTRACE
+CFLAGS_REMOVE_init.o = -pg
+endif
+
 obj-y += init.o
 obj-y += fault.o
 obj-y += extable.o
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b379a75ac6a67..5fd8c922e1c22 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -25,6 +25,10 @@
 #include <asm/pgtable.h>
 #include <asm/io.h>
 
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
+							__page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
 static void __init zone_sizes_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
@@ -143,6 +147,11 @@ void __init setup_bootmem(void)
 	}
 }
 
+unsigned long va_pa_offset;
+EXPORT_SYMBOL(va_pa_offset);
+unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
 
@@ -172,6 +181,25 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
 	}
 }
 
+/*
+ * setup_vm() is called from head.S with MMU-off.
+ *
+ * Following requirements should be honoured for setup_vm() to work
+ * correctly:
+ * 1) It should use PC-relative addressing for accessing kernel symbols.
+ *    To achieve this we always use GCC cmodel=medany.
+ * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
+ *    so disable compiler instrumentation when FTRACE is enabled.
+ *
+ * Currently, the above requirements are honoured by using custom CFLAGS
+ * for init.o in mm/Makefile.
+ */
+
+#ifndef __riscv_cmodel_medany
+#error "setup_vm() is called from head.S before relocate so it should "
+	"not use absolute addressing."
+#endif
+
 asmlinkage void __init setup_vm(void)
 {
 	extern char _start;
-- 
GitLab


From 14bc29646639650e38f061fca6f644706cc25034 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Mon, 7 Jan 2019 14:09:34 +0100
Subject: [PATCH 0259/1861] drm/omap: fix typo

Fix spelling mistake: "lenght" -> "length"

Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190107130934.9997-1-mcroce@redhat.com
---
 drivers/gpu/drm/omapdrm/dss/hdmi4_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
index 813ba42f27539..e384b95ad8573 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c
@@ -708,7 +708,7 @@ int hdmi4_audio_config(struct hdmi_core_data *core, struct hdmi_wp_data *wp,
 	else
 		acore.i2s_cfg.justification = HDMI_AUDIO_JUSTIFY_RIGHT;
 	/*
-	 * The I2S input word length is twice the lenght given in the IEC-60958
+	 * The I2S input word length is twice the length given in the IEC-60958
 	 * status word. If the word size is greater than
 	 * 20 bits, increment by one.
 	 */
-- 
GitLab


From 36a1da15b5df493241b0011d2185fdd724ac1ed1 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 26 Mar 2019 08:14:37 -0700
Subject: [PATCH 0260/1861] drm/omap: hdmi4_cec: Fix CEC clock handling for PM

If CONFIG_OMAP4_DSS_HDMI_CEC is enabled in .config, deeper SoC idle
states are blocked because the CEC clock gets always enabled on init.

Let's fix the issue by moving the CEC clock handling to happen later in
hdmi_cec_adap_enable() as suggested by Hans Verkuil <hverkuil@xs4all.nl>.
This way the CEC clock gets only enabled when needed. This can be tested
by doing cec-ctl --playback to enable the CEC, and doing cec-ctl --clear
to disable it.

Let's also fix the typo for "divider" in the comments while at it.

Fixes: 8d7f934df8d8 ("omapdrm: hdmi4_cec: add OMAP4 HDMI CEC support")
Suggested-by: Hans Verkuil <hverkuil@xs4all.nl>
Cc: Hans Verkuil <hverkuil@xs4all.nl>
Cc: Jyri Sarha <jsarha@ti.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190326151438.32414-1-tony@atomide.com
---
 drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 26 ++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
index 340383150fb98..ebf9c96d43eee 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
@@ -175,6 +175,7 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
 		REG_FLD_MOD(core->base, HDMI_CORE_SYS_INTR_UNMASK4, 0, 3, 3);
 		hdmi_wp_clear_irqenable(core->wp, HDMI_IRQ_CORE);
 		hdmi_wp_set_irqstatus(core->wp, HDMI_IRQ_CORE);
+		REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0);
 		hdmi4_core_disable(core);
 		return 0;
 	}
@@ -182,16 +183,24 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
 	if (err)
 		return err;
 
+	/*
+	 * Initialize CEC clock divider: CEC needs 2MHz clock hence
+	 * set the divider to 24 to get 48/24=2MHz clock
+	 */
+	REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0x18, 5, 0);
+
 	/* Clear TX FIFO */
 	if (!hdmi_cec_clear_tx_fifo(adap)) {
 		pr_err("cec-%s: could not clear TX FIFO\n", adap->name);
-		return -EIO;
+		err = -EIO;
+		goto err_disable_clk;
 	}
 
 	/* Clear RX FIFO */
 	if (!hdmi_cec_clear_rx_fifo(adap)) {
 		pr_err("cec-%s: could not clear RX FIFO\n", adap->name);
-		return -EIO;
+		err = -EIO;
+		goto err_disable_clk;
 	}
 
 	/* Clear CEC interrupts */
@@ -236,6 +245,12 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
 		hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, temp);
 	}
 	return 0;
+
+err_disable_clk:
+	REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0);
+	hdmi4_core_disable(core);
+
+	return err;
 }
 
 static int hdmi_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr)
@@ -333,11 +348,8 @@ int hdmi4_cec_init(struct platform_device *pdev, struct hdmi_core_data *core,
 		return ret;
 	core->wp = wp;
 
-	/*
-	 * Initialize CEC clock divider: CEC needs 2MHz clock hence
-	 * set the devider to 24 to get 48/24=2MHz clock
-	 */
-	REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0x18, 5, 0);
+	/* Disable clock initially, hdmi_cec_adap_enable() manages it */
+	REG_FLD_MOD(core->wp->base, HDMI_WP_CLK, 0, 5, 0);
 
 	ret = cec_register_adapter(core->adap, &pdev->dev);
 	if (ret < 0) {
-- 
GitLab


From f19501aa07f18268ab14f458b51c1c6b7f72a134 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 12 Mar 2019 10:09:38 -0700
Subject: [PATCH 0261/1861] x86/mce: Fix machine_check_poll() tests for error
 types

There has been a lurking "TBD" in the machine check poll routine ever
since it was first split out from the machine check handler. The
potential issue is that the poll routine may have just begun a read from
the STATUS register in a machine check bank when the hardware logs an
error in that bank and signals a machine check.

That race used to be pretty small back when machine checks were
broadcast, but the addition of local machine check means that the poll
code could continue running and clear the error from the bank before the
local machine check handler on another CPU gets around to reading it.

Fix the code to be sure to only process errors that need to be processed
in the poll code, leaving other logged errors alone for the machine
check handler to find and process.

 [ bp: Massage a bit and flip the "== 0" check to the usual !(..) test. ]

Fixes: b79109c3bbcf ("x86, mce: separate correct machine check poller and fatal exception handler")
Fixes: ed7290d0ee8f ("x86, mce: implement new status bits")
Reported-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Link: https://lkml.kernel.org/r/20190312170938.GA23035@agluck-desk
---
 arch/x86/kernel/cpu/mce/core.c | 44 ++++++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b7fb541a4873f..e558ca77cfe80 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -712,19 +712,49 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 
 		barrier();
 		m.status = mce_rdmsrl(msr_ops.status(i));
+
+		/* If this entry is not valid, ignore it */
 		if (!(m.status & MCI_STATUS_VAL))
 			continue;
 
 		/*
-		 * Uncorrected or signalled events are handled by the exception
-		 * handler when it is enabled, so don't process those here.
-		 *
-		 * TBD do the same check for MCI_STATUS_EN here?
+		 * If we are logging everything (at CPU online) or this
+		 * is a corrected error, then we must log it.
 		 */
-		if (!(flags & MCP_UC) &&
-		    (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
-			continue;
+		if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
+			goto log_it;
+
+		/*
+		 * Newer Intel systems that support software error
+		 * recovery need to make additional checks. Other
+		 * CPUs should skip over uncorrected errors, but log
+		 * everything else.
+		 */
+		if (!mca_cfg.ser) {
+			if (m.status & MCI_STATUS_UC)
+				continue;
+			goto log_it;
+		}
+
+		/* Log "not enabled" (speculative) errors */
+		if (!(m.status & MCI_STATUS_EN))
+			goto log_it;
+
+		/*
+		 * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
+		 * UC == 1 && PCC == 0 && S == 0
+		 */
+		if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
+			goto log_it;
+
+		/*
+		 * Skip anything else. Presumption is that our read of this
+		 * bank is racing with a machine check. Leave the log alone
+		 * for do_machine_check() to deal with it.
+		 */
+		continue;
 
+log_it:
 		error_seen = true;
 
 		mce_read_aux(&m, i);
-- 
GitLab


From 006c077041dc73b9490fffc4c6af5befe0687110 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Fri, 27 Jul 2018 16:40:09 -0500
Subject: [PATCH 0262/1861] x86/mce: Handle varying MCA bank counts

Linux reads MCG_CAP[Count] to find the number of MCA banks visible to a
CPU. Currently, this number is the same for all CPUs and a warning is
shown if there is a difference. The number of banks is overwritten with
the MCG_CAP[Count] value of each following CPU that boots.

According to the Intel SDM and AMD APM, the MCG_CAP[Count] value gives
the number of banks that are available to a "processor implementation".
The AMD BKDGs/PPRs further clarify that this value is per core. This
value has historically been the same for every core in the system, but
that is not an architectural requirement.

Future AMD systems may have different MCG_CAP[Count] values per core,
so the assumption that all CPUs will have the same MCG_CAP[Count] value
will no longer be valid.

Also, the first CPU to boot will allocate the struct mce_banks[] array
using the number of banks based on its MCG_CAP[Count] value. The machine
check handler and other functions use the global number of banks to
iterate and index into the mce_banks[] array. So it's possible to use an
out-of-bounds index on an asymmetric system where a following CPU sees a
MCG_CAP[Count] value greater than its predecessors.

Thus, allocate the mce_banks[] array to the maximum number of banks.
This will avoid the potential out-of-bounds index since the value of
mca_cfg.banks is capped to MAX_NR_BANKS.

Set the value of mca_cfg.banks equal to the max of the previous value
and the value for the current CPU. This way mca_cfg.banks will always
represent the max number of banks detected on any CPU in the system.

This will ensure that all CPUs will access all the banks that are
visible to them. A CPU that can access fewer than the max number of
banks will find the registers of the extra banks to be read-as-zero.

Furthermore, print the resulting number of MCA banks in use. Do this in
mcheck_late_init() so that the final value is printed after all CPUs
have been initialized.

Finally, get bank count from target CPU when doing injection with mce-inject
module.

 [ bp: Remove out-of-bounds example, passify and cleanup commit message. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20180727214009.78289-1-Yazen.Ghannam@amd.com
---
 arch/x86/kernel/cpu/mce/core.c   | 22 +++++++---------------
 arch/x86/kernel/cpu/mce/inject.c | 14 +++++++-------
 2 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index e558ca77cfe80..c3498732ba287 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1481,13 +1481,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
 static int __mcheck_cpu_mce_banks_init(void)
 {
 	int i;
-	u8 num_banks = mca_cfg.banks;
 
-	mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL);
+	mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
 	if (!mce_banks)
 		return -ENOMEM;
 
-	for (i = 0; i < num_banks; i++) {
+	for (i = 0; i < MAX_NR_BANKS; i++) {
 		struct mce_bank *b = &mce_banks[i];
 
 		b->ctl = -1ULL;
@@ -1501,28 +1500,19 @@ static int __mcheck_cpu_mce_banks_init(void)
  */
 static int __mcheck_cpu_cap_init(void)
 {
-	unsigned b;
 	u64 cap;
+	u8 b;
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 
 	b = cap & MCG_BANKCNT_MASK;
-	if (!mca_cfg.banks)
-		pr_info("CPU supports %d MCE banks\n", b);
-
-	if (b > MAX_NR_BANKS) {
-		pr_warn("Using only %u machine check banks out of %u\n",
-			MAX_NR_BANKS, b);
+	if (WARN_ON_ONCE(b > MAX_NR_BANKS))
 		b = MAX_NR_BANKS;
-	}
 
-	/* Don't support asymmetric configurations today */
-	WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
-	mca_cfg.banks = b;
+	mca_cfg.banks = max(mca_cfg.banks, b);
 
 	if (!mce_banks) {
 		int err = __mcheck_cpu_mce_banks_init();
-
 		if (err)
 			return err;
 	}
@@ -2481,6 +2471,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
 
 static int __init mcheck_late_init(void)
 {
+	pr_info("Using %d MCE banks\n", mca_cfg.banks);
+
 	if (mca_cfg.recovery)
 		static_branch_inc(&mcsafe_key);
 
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 8492ef7d90150..3f82afd0f46f2 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -46,8 +46,6 @@
 static struct mce i_mce;
 static struct dentry *dfs_inj;
 
-static u8 n_banks;
-
 #define MAX_FLAG_OPT_SIZE	4
 #define NBCFG			0x44
 
@@ -570,9 +568,15 @@ err:
 static int inj_bank_set(void *data, u64 val)
 {
 	struct mce *m = (struct mce *)data;
+	u8 n_banks;
+	u64 cap;
+
+	/* Get bank count on target CPU so we can handle non-uniform values. */
+	rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
+	n_banks = cap & MCG_BANKCNT_MASK;
 
 	if (val >= n_banks) {
-		pr_err("Non-existent MCE bank: %llu\n", val);
+		pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
 		return -EINVAL;
 	}
 
@@ -665,10 +669,6 @@ static struct dfs_node {
 static int __init debugfs_init(void)
 {
 	unsigned int i;
-	u64 cap;
-
-	rdmsrl(MSR_IA32_MCG_CAP, cap);
-	n_banks = cap & MCG_BANKCNT_MASK;
 
 	dfs_inj = debugfs_create_dir("mce-inject", NULL);
 	if (!dfs_inj)
-- 
GitLab


From 2bafa1e9625400bec4c840a168d70ba52607a58d Mon Sep 17 00:00:00 2001
From: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Date: Tue, 26 Mar 2019 09:55:54 -0700
Subject: [PATCH 0263/1861] HID: quirks: Fix keyboard + touchpad on Lenovo Miix
 630

Similar to commit edfc3722cfef ("HID: quirks: Fix keyboard + touchpad on
Toshiba Click Mini not working"), the Lenovo Miix 630 has a combo
keyboard/touchpad device with vid:pid of 04F3:0400, which is shared with
Elan touchpads.  The combo on the Miix 630 has an ACPI id of QTEC0001,
which is not claimed by the elan_i2c driver, so key on that similar to
what was done for the Toshiba Click Mini.

Signed-off-by: Jeffrey Hugo <jeffrey.l.hugo@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-quirks.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 1148d8c0816a3..77ffba48cc737 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -715,7 +715,6 @@ static const struct hid_device_id hid_ignore_list[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DEALEXTREAME, USB_DEVICE_ID_DEALEXTREAME_RADIO_SI4701) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) },
-	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) },
@@ -996,6 +995,10 @@ bool hid_ignore(struct hid_device *hdev)
 		if (hdev->product == 0x0401 &&
 		    strncmp(hdev->name, "ELAN0800", 8) != 0)
 			return true;
+		/* Same with product id 0x0400 */
+		if (hdev->product == 0x0400 &&
+		    strncmp(hdev->name, "QTEC0001", 8) != 0)
+			return true;
 		break;
 	}
 
-- 
GitLab


From 9ec71c1cdbdd6c4ac0150a51d64e06c5d1bd207e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 26 Mar 2019 22:00:06 -0700
Subject: [PATCH 0264/1861] libbpf: fix btf_dedup equivalence check handling of
 different kinds

btf_dedup_is_equiv() used to compare btf_type->info fields, before doing
kind-specific equivalence check. This comparsion implicitly verified
that candidate and canonical types are of the same kind. With enum fwd
resolution logic this check couldn't be done generically anymore, as for
enums info contains vlen, which differs between enum fwd and
fully-defined enum, so this check was subsumed by kind-specific
equivalence checks.

This change caused btf_dedup_is_equiv() to let through VOID vs other
types check to reach switch, which was never meant to be handing VOID
kind, as VOID kind is always pre-resolved to itself and is only
equivalent to itself, which is checked early in btf_dedup_is_equiv().

This change adds back BTF kind equality check in place of more generic
btf_type->info check, still defering further kind-specific checks to
a per-kind switch.

Fixes: 9768095ba97c ("btf: resolve enum fwds in btf_dedup")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 87e3020ac1bc8..cf119c9b6f270 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -2107,6 +2107,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		return fwd_kind == real_kind;
 	}
 
+	if (cand_kind != canon_kind)
+		return 0;
+
 	switch (cand_kind) {
 	case BTF_KIND_INT:
 		return btf_equal_int(cand_type, canon_type);
-- 
GitLab


From eb76899ce749507e09cad6816f32cede14a9b7ee Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 26 Mar 2019 22:00:07 -0700
Subject: [PATCH 0265/1861] selftests/bpf: add btf_dedup test for VOID
 equivalence check

This patch adds specific test exposing bug in btf_dedup_is_equiv() when
comparing candidate VOID type to a non-VOID canonical type. It's
important for canonical type to be anonymous, otherwise name equality
check will do the right thing and will exit early.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_btf.c | 47 ++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 23e3b314ca603..ec5794e4205bc 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5776,6 +5776,53 @@ const struct btf_dedup_test dedup_tests[] = {
 		.dedup_table_size = 1, /* force hash collisions */
 	},
 },
+{
+	.descr = "dedup: void equiv check",
+	/*
+	 * // CU 1:
+	 * struct s {
+	 *	struct {} *x;
+	 * };
+	 * // CU 2:
+	 * struct s {
+	 *	int *x;
+	 * };
+	 */
+	.input = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_STRUCT_ENC(0, 0, 1),				/* [1] struct {}  */
+			BTF_PTR_ENC(1),						/* [2] ptr -> [1] */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 8),			/* [3] struct s   */
+				BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			/* CU 2 */
+			BTF_PTR_ENC(0),						/* [4] ptr -> void */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 8),			/* [5] struct s   */
+				BTF_MEMBER_ENC(NAME_NTH(2), 4, 0),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0s\0x"),
+	},
+	.expect = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_STRUCT_ENC(0, 0, 1),				/* [1] struct {}  */
+			BTF_PTR_ENC(1),						/* [2] ptr -> [1] */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 8),			/* [3] struct s   */
+				BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+			/* CU 2 */
+			BTF_PTR_ENC(0),						/* [4] ptr -> void */
+			BTF_STRUCT_ENC(NAME_NTH(1), 1, 8),			/* [5] struct s   */
+				BTF_MEMBER_ENC(NAME_NTH(2), 4, 0),
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0s\0x"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+		.dedup_table_size = 1, /* force hash collisions */
+	},
+},
 {
 	.descr = "dedup: all possible kinds (no duplicates)",
 	.input = {
-- 
GitLab


From 025c65e119bf58b610549ca359c9ecc5dee6a8d2 Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Tue, 26 Mar 2019 13:20:43 +0100
Subject: [PATCH 0266/1861] xfrm: Honor original L3 slave device in xfrmi
 policy lookup

If an xfrmi is associated to a vrf layer 3 master device,
xfrm_policy_check() fails after traffic decapsulation. The input
interface is replaced by the layer 3 master device, and hence
xfrmi_decode_session() can't match the xfrmi anymore to satisfy
policy checking.

Extend ingress xfrmi lookup to honor the original layer 3 slave
device, allowing xfrm interfaces to operate within a vrf domain.

Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h        |  3 ++-
 net/xfrm/xfrm_interface.c | 17 ++++++++++++++---
 net/xfrm/xfrm_policy.c    |  2 +-
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 902437dfbce77..c9b0b2b5d672f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -295,7 +295,8 @@ struct xfrm_replay {
 };
 
 struct xfrm_if_cb {
-	struct xfrm_if	*(*decode_session)(struct sk_buff *skb);
+	struct xfrm_if	*(*decode_session)(struct sk_buff *skb,
+					   unsigned short family);
 };
 
 void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dbb3c1945b5c9..85fec98676d34 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -70,17 +70,28 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
 	return NULL;
 }
 
-static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
+					    unsigned short family)
 {
 	struct xfrmi_net *xfrmn;
-	int ifindex;
 	struct xfrm_if *xi;
+	int ifindex = 0;
 
 	if (!secpath_exists(skb) || !skb->dev)
 		return NULL;
 
+	switch (family) {
+	case AF_INET6:
+		ifindex = inet6_sdif(skb);
+		break;
+	case AF_INET:
+		ifindex = inet_sdif(skb);
+		break;
+	}
+	if (!ifindex)
+		ifindex = skb->dev->ifindex;
+
 	xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
-	ifindex = skb->dev->ifindex;
 
 	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
 		if (ifindex == xi->dev->ifindex &&
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8d1a898d0ba56..a6b58df7a70f6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3313,7 +3313,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	ifcb = xfrm_if_get_cb();
 
 	if (ifcb) {
-		xi = ifcb->decode_session(skb);
+		xi = ifcb->decode_session(skb, family);
 		if (xi) {
 			if_id = xi->p.if_id;
 			net = xi->net;
-- 
GitLab


From 486fa92df4707b5df58d6508728bdb9321a59766 Mon Sep 17 00:00:00 2001
From: Aditya Pakki <pakki001@umn.edu>
Date: Mon, 25 Mar 2019 16:55:27 -0500
Subject: [PATCH 0267/1861] libnvdimm/btt: Fix a kmemdup failure check

In case kmemdup fails, the fix releases resources and returns to
avoid the NULL pointer dereference.

Signed-off-by: Aditya Pakki <pakki001@umn.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt_devs.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index b72a303176c70..9486acc08402d 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -198,14 +198,15 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
 		return NULL;
 
 	nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
-	if (nd_btt->id < 0) {
-		kfree(nd_btt);
-		return NULL;
-	}
+	if (nd_btt->id < 0)
+		goto out_nd_btt;
 
 	nd_btt->lbasize = lbasize;
-	if (uuid)
+	if (uuid) {
 		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+		if (!uuid)
+			goto out_put_id;
+	}
 	nd_btt->uuid = uuid;
 	dev = &nd_btt->dev;
 	dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
@@ -220,6 +221,13 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
 		return NULL;
 	}
 	return dev;
+
+out_put_id:
+	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
+
+out_nd_btt:
+	kfree(nd_btt);
+	return NULL;
 }
 
 struct device *nd_btt_create(struct nd_region *nd_region)
-- 
GitLab


From 662d66466637862ef955f7f6e78a286d8cf0ebef Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Mon, 18 Mar 2019 09:55:19 -0700
Subject: [PATCH 0268/1861] IB/hfi1: Failed to drain send queue when QP is put
 into error state

When a QP is put into error state, all pending requests in the send work
queue should be drained. The following sequence of events could lead to a
failure, causing a request to hang:

(1) The QP builds a packet and tries to send through SDMA engine.
    However, PIO engine is still busy. Consequently, this packet is put on
    the QP's tx list and the QP is put on the PIO waiting list. The field
    qp->s_flags is set with HFI1_S_WAIT_PIO_DRAIN;

(2) The QP is put into error state by the user application and
    notify_error_qp() is called, which removes the QP from the PIO waiting
    list and the packet from the QP's tx list. In addition, qp->s_flags is
    cleared of RVT_S_ANY_WAIT_IO bits, which does not include
    HFI1_S_WAIT_PIO_DRAIN bit;

(3) The hfi1_schdule_send() function is called to drain the QP's send
    queue. Subsequently, hfi1_do_send() is called. Since the flag bit
    HFI1_S_WAIT_PIO_DRAIN is set in qp->s_flags, hfi1_send_ok() fails.  As
    a result, hfi1_do_send() bails out without draining any request from
    the send queue;

(4) The PIO engine completes the sending and tries to wake up any QP on
    its waiting list. But the QP has been removed from the PIO waiting
    list and therefore is kept in sleep forever.

The fix is to clear qp->s_flags of HFI1_S_ANY_WAIT_IO bits in step (2).
HFI1_S_ANY_WAIT_IO includes RVT_S_ANY_WAIT_IO and HFI1_S_WAIT_PIO_DRAIN.

Fixes: 2e2ba09e48b7 ("IB/rdmavt, IB/hfi1: Create device dependent s_flags")
Cc: <stable@vger.kernel.org> # 4.19.x+
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/qp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9b643c2409cf8..64889eda17354 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -898,7 +898,7 @@ void notify_error_qp(struct rvt_qp *qp)
 		if (!list_empty(&priv->s_iowait.list) &&
 		    !(qp->s_flags & RVT_S_BUSY) &&
 		    !(priv->s_flags & RVT_S_BUSY)) {
-			qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+			qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
 			list_del_init(&priv->s_iowait.list);
 			priv->s_iowait.lock = NULL;
 			rvt_put_qp(qp);
-- 
GitLab


From 93b289b9aff66eca7575b09f36f5abbeca8e6167 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Mon, 18 Mar 2019 09:55:29 -0700
Subject: [PATCH 0269/1861] IB/hfi1: Clear the IOWAIT pending bits when QP is
 put into error state

When a QP is put into error state, it may be waiting for send engine
resources. In this case, the QP will be removed from the send engine's
waiting list, but its IOWAIT pending bits are not cleared. This will
normally not have any major impact as the QP is being destroyed.  However,
the QP still needs to wind down its operations, such as draining the send
queue by scheduling the send engine. Clearing the pending bits will avoid
any potential complications. In addition, if the QP will eventually hang,
clearing the pending bits can help debugging by presenting a consistent
picture if the user dumps the qp_stats.

This patch clears a QP's IOWAIT_PENDING_IB and IO_PENDING_TID bits in
priv->s_iowait.flags in this case.

Fixes: 5da0fc9dbf89 ("IB/hfi1: Prepare resource waits for dual leg")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/qp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 64889eda17354..eba300330a027 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -899,6 +899,8 @@ void notify_error_qp(struct rvt_qp *qp)
 		    !(qp->s_flags & RVT_S_BUSY) &&
 		    !(priv->s_flags & RVT_S_BUSY)) {
 			qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
+			iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+			iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
 			list_del_init(&priv->s_iowait.list);
 			priv->s_iowait.lock = NULL;
 			rvt_put_qp(qp);
-- 
GitLab


From a8639a79e85c18c16c10089edd589c7948f19bbd Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Mon, 18 Mar 2019 09:55:39 -0700
Subject: [PATCH 0270/1861] IB/hfi1: Eliminate opcode tests on mr deref

When an old ack_queue entry is used to store an incoming request, it may
need to clean up the old entry if it is still referencing the
MR. Originally only RDMA READ request needed to reference MR on the
responder side and therefore the opcode was tested when cleaning up the
old entry. The introduction of tid rdma specific operations in the
ack_queue makes the specific opcode tests wrong.  Multiple opcodes (RDMA
READ, TID RDMA READ, and TID RDMA WRITE) may need MR ref cleanup.

Remove the opcode specific tests associated with the ack_queue.

Fixes: f48ad614c100 ("IB/hfi1: Move driver out of staging")
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/rc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index e6726c1ab8669..5991211d72bdd 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -3088,7 +3088,7 @@ send_last:
 			update_ack_queue(qp, next);
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
-		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+		if (e->rdma_sge.mr) {
 			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
@@ -3166,7 +3166,7 @@ send_last:
 			update_ack_queue(qp, next);
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
-		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+		if (e->rdma_sge.mr) {
 			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
-- 
GitLab


From d0294344470e6b52d097aa7369173f32d11f2f52 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Mon, 18 Mar 2019 09:55:49 -0700
Subject: [PATCH 0271/1861] IB/hfi1: Fix the allocation of RSM table

The receive side mapping (RSM) on hfi1 hardware is a special
matching mechanism to direct an incoming packet to a given
hardware receive context. It has 4 instances of matching capabilities
(RSM0 - RSM3) that share the same RSM table (RMT). The RMT has a total of
256 entries, each of which points to a receive context.

Currently, three instances of RSM have been used:
1. RSM0 by QOS;
2. RSM1 by PSM FECN;
3. RSM2 by VNIC.

Each RSM instance should reserve enough entries in RMT to function
properly. Since both PSM and VNIC could allocate any receive context
between dd->first_dyn_alloc_ctxt and dd->num_rcv_contexts, PSM FECN must
reserve enough RMT entries to cover the entire receive context index
range (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt) instead of only
the user receive contexts allocated for PSM
(dd->num_user_contexts). Consequently, the sizing of
dd->num_user_contexts in set_up_context_variables is incorrect.

Fixes: 2280740f01ae ("IB/hfi1: Virtual Network Interface Controller (VNIC) HW support")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/chip.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 612f04190ed83..9784c6c0d2ecf 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -13232,7 +13232,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	int total_contexts;
 	int ret;
 	unsigned ngroups;
-	int qos_rmt_count;
+	int rmt_count;
 	int user_rmt_reduced;
 	u32 n_usr_ctxts;
 	u32 send_contexts = chip_send_contexts(dd);
@@ -13294,10 +13294,20 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 		n_usr_ctxts = rcv_contexts - total_contexts;
 	}
 
-	/* each user context requires an entry in the RMT */
-	qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
-	if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
-		user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
+	/*
+	 * The RMT entries are currently allocated as shown below:
+	 * 1. QOS (0 to 128 entries);
+	 * 2. FECN for PSM (num_user_contexts + num_vnic_contexts);
+	 * 3. VNIC (num_vnic_contexts).
+	 * It should be noted that PSM FECN oversubscribe num_vnic_contexts
+	 * entries of RMT because both VNIC and PSM could allocate any receive
+	 * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
+	 * and PSM FECN must reserve an RMT entry for each possible PSM receive
+	 * context.
+	 */
+	rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
+	if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
+		user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
 		dd_dev_err(dd,
 			   "RMT size is reducing the number of user receive contexts from %u to %d\n",
 			   n_usr_ctxts,
@@ -14285,9 +14295,11 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
 	u64 reg;
 	int i, idx, regoff, regidx;
 	u8 offset;
+	u32 total_cnt;
 
 	/* there needs to be enough room in the map table */
-	if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) {
+	total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
+	if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
 		dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
 		return;
 	}
@@ -14341,7 +14353,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
 	/* add rule 1 */
 	add_rsm_rule(dd, RSM_INS_FECN, &rrd);
 
-	rmt->used += dd->num_user_contexts;
+	rmt->used += total_cnt;
 }
 
 /* Initialize RSM for VNIC */
-- 
GitLab


From 1abe186ed8a6593069bc122da55fc684383fdc1c Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 19 Mar 2019 11:24:36 +0200
Subject: [PATCH 0272/1861] IB/mlx5: Reset access mask when looping inside page
 fault handler

If page-fault handler spans multiple MRs then the access mask needs to
be reset before each MR handling or otherwise write access will be
granted to mapped pages instead of read-only.

Cc: <stable@vger.kernel.org> # 3.19
Fixes: 7bdf65d411c1 ("IB/mlx5: Handle page faults")
Reported-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index c20bfc41ecf18..0aa10ebda5d9a 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -585,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 	struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
 	bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
 	bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
-	u64 access_mask = ODP_READ_ALLOWED_BIT;
+	u64 access_mask;
 	u64 start_idx, page_mask;
 	struct ib_umem_odp *odp;
 	size_t size;
@@ -607,6 +607,7 @@ next_mr:
 	page_shift = mr->umem->page_shift;
 	page_mask = ~(BIT(page_shift) - 1);
 	start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+	access_mask = ODP_READ_ALLOWED_BIT;
 
 	if (prefetch && !downgrade && !mr->umem->writable) {
 		/* prefetch with write-access must
-- 
GitLab


From 1755ecedc485e8a898ac27b90be664784ddb6b57 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 9 Jan 2019 14:50:29 -0800
Subject: [PATCH 0273/1861] doc/kprobes: Update obsolete RCU update functions

The RCU flavors have been consolidated, so this commit replaces mentions
of the now-obsolete synchronize_sched() function with synchronize_rcu().

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: <linux-doc@vger.kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 Documentation/kprobes.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 10f4499e677c0..ee60e519438aa 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -243,10 +243,10 @@ Optimization
 ^^^^^^^^^^^^
 
 The Kprobe-optimizer doesn't insert the jump instruction immediately;
-rather, it calls synchronize_sched() for safety first, because it's
+rather, it calls synchronize_rcu() for safety first, because it's
 possible for a CPU to be interrupted in the middle of executing the
-optimized region [3]_.  As you know, synchronize_sched() can ensure
-that all interruptions that were active when synchronize_sched()
+optimized region [3]_.  As you know, synchronize_rcu() can ensure
+that all interruptions that were active when synchronize_rcu()
 was called are done, but only if CONFIG_PREEMPT=n.  So, this version
 of kprobe optimization supports only kernels with CONFIG_PREEMPT=n [4]_.
 
-- 
GitLab


From ad51c46eec739c18be24178a30b47801b10e0357 Mon Sep 17 00:00:00 2001
From: Chengming Gui <Jack.Gui@amd.com>
Date: Thu, 21 Mar 2019 13:26:28 +0800
Subject: [PATCH 0274/1861] drm/amd/amdgpu: fix PCIe dpm feature issue (v3)

use pcie_bandwidth_available to get real link state
to update pcie table.

v2: fix incorrect initialized return value
v3: expand the fetching method about the link width to all asics.

Signed-off-by: Chengming Gui <Jack.Gui@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4f8fb4ecde341..ac0d646a7b743 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3625,6 +3625,7 @@ static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
 	struct pci_dev *pdev = adev->pdev;
 	enum pci_bus_speed cur_speed;
 	enum pcie_link_width cur_width;
+	u32 ret = 1;
 
 	*speed = PCI_SPEED_UNKNOWN;
 	*width = PCIE_LNK_WIDTH_UNKNOWN;
@@ -3632,6 +3633,10 @@ static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
 	while (pdev) {
 		cur_speed = pcie_get_speed_cap(pdev);
 		cur_width = pcie_get_width_cap(pdev);
+		ret = pcie_bandwidth_available(adev->pdev, NULL,
+						       NULL, &cur_width);
+		if (!ret)
+			cur_width = PCIE_LNK_WIDTH_RESRV;
 
 		if (cur_speed != PCI_SPEED_UNKNOWN) {
 			if (*speed == PCI_SPEED_UNKNOWN)
-- 
GitLab


From 6f5d29ff1a643d52128efb4b6c4f4d4074e32e10 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Sat, 23 Mar 2019 01:02:44 +0800
Subject: [PATCH 0275/1861] drm/amd/powerplay: add ECC feature bit

It's OK to have this feature bit with old SMU firmwares.
But the feature should be disabled on them.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c  | 10 +++++++++-
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h  |  1 +
 drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h |  3 ++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 9aa7bec1b5fe6..5e3ffcc5b14a0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -91,6 +91,12 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
 	 *   MP0CLK DS
 	 */
 	data->registry_data.disallowed_features = 0xE0041C00;
+	/* ECC feature should be disabled on old SMUs */
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion);
+	hwmgr->smu_version = smum_get_argument(hwmgr);
+	if (hwmgr->smu_version < 0x282100)
+		data->registry_data.disallowed_features |= FEATURE_ECC_MASK;
+
 	data->registry_data.od_state_in_dc_support = 0;
 	data->registry_data.thermal_support = 1;
 	data->registry_data.skip_baco_hardware = 0;
@@ -357,6 +363,7 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 	data->smu_features[GNLD_DS_MP1CLK].smu_feature_id = FEATURE_DS_MP1CLK_BIT;
 	data->smu_features[GNLD_DS_MP0CLK].smu_feature_id = FEATURE_DS_MP0CLK_BIT;
 	data->smu_features[GNLD_XGMI].smu_feature_id = FEATURE_XGMI_BIT;
+	data->smu_features[GNLD_ECC].smu_feature_id = FEATURE_ECC_BIT;
 
 	for (i = 0; i < GNLD_FEATURES_MAX; i++) {
 		data->smu_features[i].smu_feature_bitmap =
@@ -3020,7 +3027,8 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf)
 				"FCLK_DS",
 				"MP1CLK_DS",
 				"MP0CLK_DS",
-				"XGMI"};
+				"XGMI",
+				"ECC"};
 	static const char *output_title[] = {
 				"FEATURES",
 				"BITMASK",
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
index a5bc758ae0972..ac2a3118a0ae7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
@@ -80,6 +80,7 @@ enum {
 	GNLD_DS_MP1CLK,
 	GNLD_DS_MP0CLK,
 	GNLD_XGMI,
+	GNLD_ECC,
 
 	GNLD_FEATURES_MAX
 };
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
index 63d5cf6915496..b90089a4fb6a4 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
@@ -99,7 +99,7 @@
 #define FEATURE_DS_MP1CLK_BIT           30
 #define FEATURE_DS_MP0CLK_BIT           31
 #define FEATURE_XGMI_BIT                32
-#define FEATURE_SPARE_33_BIT            33
+#define FEATURE_ECC_BIT                 33
 #define FEATURE_SPARE_34_BIT            34
 #define FEATURE_SPARE_35_BIT            35
 #define FEATURE_SPARE_36_BIT            36
@@ -166,6 +166,7 @@
 #define FEATURE_DS_MP1CLK_MASK          (1 << FEATURE_DS_MP1CLK_BIT          )
 #define FEATURE_DS_MP0CLK_MASK          (1 << FEATURE_DS_MP0CLK_BIT          )
 #define FEATURE_XGMI_MASK               (1 << FEATURE_XGMI_BIT               )
+#define FEATURE_ECC_MASK                (1ULL << FEATURE_ECC_BIT                )
 
 #define DPM_OVERRIDE_DISABLE_SOCCLK_PID             0x00000001
 #define DPM_OVERRIDE_DISABLE_UCLK_PID               0x00000002
-- 
GitLab


From aaaba51bf1618958c91728607c63264655c545ef Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Sat, 23 Mar 2019 02:02:24 +0800
Subject: [PATCH 0276/1861] drm/amd/powerplay: correct data type to avoid
 overflow

Avoid left shift overflow.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
index b90089a4fb6a4..195c4ae670585 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
@@ -165,7 +165,7 @@
 #define FEATURE_DS_FCLK_MASK            (1 << FEATURE_DS_FCLK_BIT            )
 #define FEATURE_DS_MP1CLK_MASK          (1 << FEATURE_DS_MP1CLK_BIT          )
 #define FEATURE_DS_MP0CLK_MASK          (1 << FEATURE_DS_MP0CLK_BIT          )
-#define FEATURE_XGMI_MASK               (1 << FEATURE_XGMI_BIT               )
+#define FEATURE_XGMI_MASK               (1ULL << FEATURE_XGMI_BIT               )
 #define FEATURE_ECC_MASK                (1ULL << FEATURE_ECC_BIT                )
 
 #define DPM_OVERRIDE_DISABLE_SOCCLK_PID             0x00000001
-- 
GitLab


From db64a2f43c1bc22c5ff2d22606000b8c3587d0ec Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Tue, 26 Mar 2019 17:57:53 +0800
Subject: [PATCH 0277/1861] drm/amd/powerplay: fix possible hang with 3+ 4K
 monitors

If DAL requires to force MCLK high, the FCLK will be
forced to high also.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 5e3ffcc5b14a0..23b5b94a4939a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3470,6 +3470,7 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
 	struct vega20_single_dpm_table *dpm_table;
 	bool vblank_too_short = false;
 	bool disable_mclk_switching;
+	bool disable_fclk_switching;
 	uint32_t i, latency;
 
 	disable_mclk_switching = ((1 < hwmgr->display_config->num_display) &&
@@ -3545,13 +3546,20 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
 	if (hwmgr->display_config->nb_pstate_switch_disable)
 		dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
 
+	if ((disable_mclk_switching &&
+	    (dpm_table->dpm_state.hard_min_level == dpm_table->dpm_levels[dpm_table->count - 1].value)) ||
+	     hwmgr->display_config->min_mem_set_clock / 100 >= dpm_table->dpm_levels[dpm_table->count - 1].value)
+		disable_fclk_switching = true;
+	else
+		disable_fclk_switching = false;
+
 	/* fclk */
 	dpm_table = &(data->dpm_table.fclk_table);
 	dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[0].value;
 	dpm_table->dpm_state.soft_max_level = VG20_CLOCK_MAX_DEFAULT;
 	dpm_table->dpm_state.hard_min_level = dpm_table->dpm_levels[0].value;
 	dpm_table->dpm_state.hard_max_level = VG20_CLOCK_MAX_DEFAULT;
-	if (hwmgr->display_config->nb_pstate_switch_disable)
+	if (hwmgr->display_config->nb_pstate_switch_disable || disable_fclk_switching)
 		dpm_table->dpm_state.soft_min_level = dpm_table->dpm_levels[dpm_table->count - 1].value;
 
 	/* vclk */
-- 
GitLab


From ab0cb022c8fd6f465f40f1cbee7d71c6280b6c74 Mon Sep 17 00:00:00 2001
From: Paul Hsieh <paul.hsieh@amd.com>
Date: Mon, 18 Mar 2019 18:04:05 +0800
Subject: [PATCH 0278/1861] drm/amd/display: VBIOS can't be light up HDMI when
 restart system

[Why]
VBIOS will not post pixel rate > 340MHz.
If driver set pixel rate > 340MHz and do restart bottom, VBIOS can't
post HDMI monitor due to monitor is stay in HDMI2.0 state.

[How]
Program Scrambling_Enable and TMDS_Bit_Clock_Ratio when disable stream.

Signed-off-by: Paul Hsieh <paul.hsieh@amd.com>
Reviewed-by: Charlene Liu <Charlene.Liu@amd.com>
Acked-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Acked-by: Harry Wentland <Harry.Wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 4eba3c4800b63..ea18e9c2d8cea 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -2660,12 +2660,18 @@ void core_link_enable_stream(
 void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)
 {
 	struct dc  *core_dc = pipe_ctx->stream->ctx->dc;
+	struct dc_stream_state *stream = pipe_ctx->stream;
 
 	core_dc->hwss.blank_stream(pipe_ctx);
 
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		deallocate_mst_payload(pipe_ctx);
 
+	if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
+		dal_ddc_service_write_scdc_data(
+			stream->link->ddc, 0,
+			stream->timing.flags.LTE_340MCSC_SCRAMBLE);
+
 	core_dc->hwss.disable_stream(pipe_ctx, option);
 
 	disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
-- 
GitLab


From 5ceaeb99ffb4dc002d20f6ac243c19a85e2c7a76 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 23 Mar 2019 19:41:32 +0100
Subject: [PATCH 0279/1861] net: dsa: mv88e6xxx: fix few issues in
 mv88e6390x_port_set_cmode

This patches fixes few issues in mv88e6390x_port_set_cmode().

1. When entering the function the old cmode may be 0, in this case
   mv88e6390x_serdes_get_lane() returns -ENODEV. As result we bail
   out and have no chance to set a new mode. Therefore deal properly
   with -ENODEV.

2. Once we have disabled power and irq, let's set the cached cmode to 0.
   This reflects the actual status and is cleaner if we bail out with an
   error in the following function calls.

3. The cached cmode is used by mv88e6390x_serdes_get_lane(),
   mv88e6390_serdes_power_lane() and mv88e6390_serdes_irq_enable().
   Currently we set the cached mode to the new one at the very end of
   the function only, means until then we use the old one what may be
   wrong.

4. When calling mv88e6390_serdes_irq_enable() we use the lane value
   belonging to the old cmode. Get the lane belonging to the new cmode
   before calling this function.

It's hard to provide a good "Fixes" tag because quite a few smaller
changes have been done to the code in question recently.

Fixes: d235c48b40d3 ("net: dsa: mv88e6xxx: power serdes on/off for 10G interfaces on 6390X")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/port.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index dce84a2a65c71..c44b2822e4dd0 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -427,18 +427,22 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		return 0;
 
 	lane = mv88e6390x_serdes_get_lane(chip, port);
-	if (lane < 0)
+	if (lane < 0 && lane != -ENODEV)
 		return lane;
 
-	if (chip->ports[port].serdes_irq) {
-		err = mv88e6390_serdes_irq_disable(chip, port, lane);
+	if (lane >= 0) {
+		if (chip->ports[port].serdes_irq) {
+			err = mv88e6390_serdes_irq_disable(chip, port, lane);
+			if (err)
+				return err;
+		}
+
+		err = mv88e6390x_serdes_power(chip, port, false);
 		if (err)
 			return err;
 	}
 
-	err = mv88e6390x_serdes_power(chip, port, false);
-	if (err)
-		return err;
+	chip->ports[port].cmode = 0;
 
 	if (cmode) {
 		err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
@@ -452,6 +456,12 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		if (err)
 			return err;
 
+		chip->ports[port].cmode = cmode;
+
+		lane = mv88e6390x_serdes_get_lane(chip, port);
+		if (lane < 0)
+			return lane;
+
 		err = mv88e6390x_serdes_power(chip, port, true);
 		if (err)
 			return err;
@@ -463,8 +473,6 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		}
 	}
 
-	chip->ports[port].cmode = cmode;
-
 	return 0;
 }
 
-- 
GitLab


From 0b91bce1ebfc797ff3de60c8f4a1e6219a8a3187 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 25 Mar 2019 14:18:06 +0100
Subject: [PATCH 0280/1861] net: datagram: fix unbounded loop in
 __skb_try_recv_datagram()

Christoph reported a stall while peeking datagram with an offset when
busy polling is enabled. __skb_try_recv_datagram() uses as the loop
termination condition 'queue empty'. When peeking, the socket
queue can be not empty, even when no additional packets are received.

Address the issue explicitly checking for receive queue changes,
as currently done by __skb_wait_for_more_packets().

Fixes: 2b5cd0dfa384 ("net: Change return type of sk_busy_loop from bool to void")
Reported-and-tested-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index b2651bb6d2a31..e657289db4ac4 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -279,7 +279,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 			break;
 
 		sk_busy_loop(sk, flags & MSG_DONTWAIT);
-	} while (!skb_queue_empty(&sk->sk_receive_queue));
+	} while (sk->sk_receive_queue.prev != *last);
 
 	error = -EAGAIN;
 
-- 
GitLab


From 79706ced7a982ebc60c2663a07ff4003847b8be6 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 25 Mar 2019 14:35:30 -0700
Subject: [PATCH 0281/1861] MAINTAINERS: Fix documentation file name for PHY
 Library

MAINTAINERS still pointed to phy.txt after moving this file into the rst
format, fix this.

Reported-by: Joe Perches <joe@perches.com>
Fixes: 25fe02d00a1e ("Documentation: net: phy: switch documentation to rst format")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3e5a5d263f299..1fdc8970e8161 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5833,7 +5833,7 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-bus-mdio
 F:	Documentation/devicetree/bindings/net/mdio*
-F:	Documentation/networking/phy.txt
+F:	Documentation/networking/phy.rst
 F:	drivers/net/phy/
 F:	drivers/of/of_mdio.c
 F:	drivers/of/of_net.c
-- 
GitLab


From b5f9bd15b88563b55a99ed588416881367a0ce5f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 26 Mar 2019 13:50:14 +0800
Subject: [PATCH 0282/1861] ila: Fix rhashtable walker list corruption

ila_xlat_nl_cmd_flush uses rhashtable walkers allocated from the
stack but it never frees them.  This corrupts the walker list of
the hash table.

This patch fixes it.

Reported-by: syzbot+dae72a112334aa65a159@syzkaller.appspotmail.com
Fixes: b6e71bdebb12 ("ila: Flush netlink command to clear xlat...")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ila/ila_xlat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 79d2e43c05c5e..5fc1f4e0c0cf0 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -417,6 +417,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
 
 done:
 	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
 	return ret;
 }
 
-- 
GitLab


From 669efc76b317b3aa550ffbf0b79d064cb00a5f96 Mon Sep 17 00:00:00 2001
From: Xi Wang <wangxi11@huawei.com>
Date: Tue, 26 Mar 2019 14:53:49 +0800
Subject: [PATCH 0283/1861] net: hns3: fix compile error

Currently, the rules for configuring search paths in Kbuild have
changed, this will lead some erros when compiling hns3 with the
following command:

make O=DIR M=drivers/net/ethernet/hisilicon/hns3

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c:11:10:
fatal error: hnae3.h: No such file or directory

This patch fix it by adding $(srctree)/ prefix to the serach paths.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index fffe8c1c45d39..0fb61d440d3bb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -3,7 +3,7 @@
 # Makefile for the HISILICON network device drivers.
 #
 
-ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 
 obj-$(CONFIG_HNS3_HCLGE) += hclge.o
 hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o  hclge_debugfs.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
index fb93bbd358455..6193f8fa7cf34 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
@@ -3,7 +3,7 @@
 # Makefile for the HISILICON network device drivers.
 #
 
-ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 
 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o
 hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o
\ No newline at end of file
-- 
GitLab


From 7f07e5f1f778605e98cf2156d4db1ff3a3a1a74a Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Tue, 26 Mar 2019 11:48:57 +0200
Subject: [PATCH 0284/1861] net: mii: Fix PAUSE cap advertisement from
 linkmode_adv_to_lcl_adv_t() helper

With a recent link mode advertisement code update this helper
providing local pause capability translation used for flow
control link mode negotiation got broken.
For eth drivers using this helper, the issue is apparent only
if either PAUSE or ASYM_PAUSE is being advertised.

Fixes: 3c1bcc8614db ("net: ethernet: Convert phydev advertize and supported from u32 to link mode")
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mii.h b/include/linux/mii.h
index 6fee8b1a44008..5cd824c1c0caa 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -469,7 +469,7 @@ static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising)
 	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
 			      advertising))
 		lcl_adv |= ADVERTISE_PAUSE_CAP;
-	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
 			      advertising))
 		lcl_adv |= ADVERTISE_PAUSE_ASYM;
 
-- 
GitLab


From b3e208069477588c06f4d5d986164b435bb06e6d Mon Sep 17 00:00:00 2001
From: Dean Nelson <dnelson@redhat.com>
Date: Tue, 26 Mar 2019 11:53:19 -0400
Subject: [PATCH 0285/1861] thunderx: enable page recycling for non-XDP case

Commit 773225388dae15e72790 ("net: thunderx: Optimize page recycling for XDP")
added code to nicvf_alloc_page() that inadvertently disables receive buffer
page recycling for the non-XDP case by always NULL'ng the page pointer.

This patch corrects two if-conditionals to allow for the recycling of non-XDP
mode pages by only setting the page pointer to NULL when the page is not ready
for recycling.

Fixes: 773225388dae ("net: thunderx: Optimize page recycling for XDP")
Signed-off-by: Dean Nelson <dnelson@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/cavium/thunder/nicvf_queues.c    | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 5b4d3badcb730..55dbf02c42af2 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -105,20 +105,19 @@ static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic,
 	/* Check if page can be recycled */
 	if (page) {
 		ref_count = page_ref_count(page);
-		/* Check if this page has been used once i.e 'put_page'
-		 * called after packet transmission i.e internal ref_count
-		 * and page's ref_count are equal i.e page can be recycled.
+		/* This page can be recycled if internal ref_count and page's
+		 * ref_count are equal, indicating that the page has been used
+		 * once for packet transmission. For non-XDP mode, internal
+		 * ref_count is always '1'.
 		 */
-		if (rbdr->is_xdp && (ref_count == pgcache->ref_count))
-			pgcache->ref_count--;
-		else
-			page = NULL;
-
-		/* In non-XDP mode, page's ref_count needs to be '1' for it
-		 * to be recycled.
-		 */
-		if (!rbdr->is_xdp && (ref_count != 1))
+		if (rbdr->is_xdp) {
+			if (ref_count == pgcache->ref_count)
+				pgcache->ref_count--;
+			else
+				page = NULL;
+		} else if (ref_count != 1) {
 			page = NULL;
+		}
 	}
 
 	if (!page) {
-- 
GitLab


From cd35ef91490ad8049dd180bb060aff7ee192eda9 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dnelson@redhat.com>
Date: Tue, 26 Mar 2019 11:53:26 -0400
Subject: [PATCH 0286/1861] thunderx: eliminate extra calls to put_page() for
 pages held for recycling

For the non-XDP case, commit 773225388dae15e72790 ("net: thunderx: Optimize
page recycling for XDP") added code to nicvf_free_rbdr() that, when releasing
the additional receive buffer page reference held for recycling, repeatedly
calls put_page() until the page's _refcount goes to zero. Which results in
the page being freed.

This is not okay if the page's _refcount was greater than 1 (in the non-XDP
case), because nicvf_free_rbdr() should not be subtracting more than what
nicvf_alloc_page() had previously added to the page's _refcount, which was
only 1 (in the non-XDP case).

This can arise if a received packet is still being processed and the receive
buffer (i.e., skb->head) has not yet been freed via skb_free_head() when
nicvf_free_rbdr() is spinning through the aforementioned put_page() loop.

If this should occur, when the received packet finishes processing and
skb_free_head() is called, various problems can ensue. Exactly what, depends on
whether the page has already been reallocated or not, anything from "BUG: Bad
page state ... ", to "Unable to handle kernel NULL pointer dereference ..." or
"Unable to handle kernel paging request...".

So this patch changes nicvf_free_rbdr() to only call put_page() once for pages
held for recycling (in the non-XDP case).

Fixes: 773225388dae ("net: thunderx: Optimize page recycling for XDP")
Signed-off-by: Dean Nelson <dnelson@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 55dbf02c42af2..e246f9733bb89 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -364,11 +364,10 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 	while (head < rbdr->pgcnt) {
 		pgcache = &rbdr->pgcache[head];
 		if (pgcache->page && page_ref_count(pgcache->page) != 0) {
-			if (!rbdr->is_xdp) {
-				put_page(pgcache->page);
-				continue;
+			if (rbdr->is_xdp) {
+				page_ref_sub(pgcache->page,
+					     pgcache->ref_count - 1);
 			}
-			page_ref_sub(pgcache->page, pgcache->ref_count - 1);
 			put_page(pgcache->page);
 		}
 		head++;
-- 
GitLab


From b4e9e931e9bb2f5b302ce66640832f5a3e57e8c4 Mon Sep 17 00:00:00 2001
From: Iuliana Prodan <iuliana.prodan@nxp.com>
Date: Fri, 22 Mar 2019 14:12:30 +0200
Subject: [PATCH 0287/1861] crypto: caam - fix copy of next buffer for xcbc and
 cmac
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a side effect of adding xcbc support, when the next_buffer is not
copied.
The issue occurs, when there is stored from previous state a blocksize
buffer and received, a less than blocksize, from user. In this case, the
nents for req->src is 0, and the next_buffer is not copied.
An example is:
{
	.tap	= { 17, 15, 8 },
	.psize	= 40,
	.np	= 3,
	.ksize	= 16,
}

Fixes: 12b8567f6fa4 ("crypto: caam - add support for xcbc(aes)")
Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamhash.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index b1eadc6652b5f..7205d9f4029e1 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -865,19 +865,18 @@ static int ahash_update_ctx(struct ahash_request *req)
 		if (ret)
 			goto unmap_ctx;
 
-		if (mapped_nents) {
+		if (mapped_nents)
 			sg_to_sec4_sg_last(req->src, mapped_nents,
 					   edesc->sec4_sg + sec4_sg_src_index,
 					   0);
-			if (*next_buflen)
-				scatterwalk_map_and_copy(next_buf, req->src,
-							 to_hash - *buflen,
-							 *next_buflen, 0);
-		} else {
+		else
 			sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index -
 					    1);
-		}
 
+		if (*next_buflen)
+			scatterwalk_map_and_copy(next_buf, req->src,
+						 to_hash - *buflen,
+						 *next_buflen, 0);
 		desc = edesc->hw_desc;
 
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-- 
GitLab


From 1017e0987117c32783ba7c10fe2e7ff1456ba1dc Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 26 Mar 2019 18:22:16 +0100
Subject: [PATCH 0288/1861] vrf: prevent adding upper devices

VRF devices don't work with upper devices. Currently, it's possible to
add a VRF device to a bridge or team, and to create macvlan, macsec, or
ipvlan devices on top of a VRF (bond and vlan are prevented respectively
by the lack of an ndo_set_mac_address op and the NETIF_F_VLAN_CHALLENGED
feature flag).

Fix this by setting the IFF_NO_RX_HANDLER flag (introduced in commit
f5426250a6ec ("net: introduce IFF_NO_RX_HANDLER")).

Cc: David Ahern <dsahern@gmail.com>
Fixes: 193125dbd8eb ("net: Introduce VRF device driver")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 7c1430ed02445..6d1a1abbed27e 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1273,6 +1273,7 @@ static void vrf_setup(struct net_device *dev)
 
 	/* default to no qdisc; user can add if desired */
 	dev->priv_flags |= IFF_NO_QUEUE;
+	dev->priv_flags |= IFF_NO_RX_HANDLER;
 
 	dev->min_mtu = 0;
 	dev->max_mtu = 0;
-- 
GitLab


From c8b1917c8987a6fa3695d479b4d60fbbbc3e537b Mon Sep 17 00:00:00 2001
From: Furquan Shaikh <furquan@google.com>
Date: Wed, 20 Mar 2019 15:28:44 -0700
Subject: [PATCH 0289/1861] ACPICA: Clear status of GPEs before enabling them

Commit 18996f2db918 ("ACPICA: Events: Stop unconditionally clearing
ACPI IRQs during suspend/resume") was added to stop clearing event
status bits unconditionally in the system-wide suspend and resume
paths. This was done because of an issue with a laptop lid appaering
to be closed even when it was used to wake up the system from suspend
(see https://bugzilla.kernel.org/show_bug.cgi?id=196249), which
happened because event status bits were cleared unconditionally on
system resume. Though this change fixed the issue in the resume path,
it introduced regressions in a few suspend paths.

First regression was reported and fixed in the S5 entry path by commit
fa85015c0d95 ("ACPICA: Clear status of all events when entering S5").
Next regression was reported and fixed for all legacy sleep paths by
commit f317c7dc12b7 ("ACPICA: Clear status of all events when entering
sleep states").  However, there still is a suspend-to-idle regression,
since suspend-to-idle does not follow the legacy sleep paths.

In the suspend-to-idle case, wakeup is enabled as part of device
suspend.  If the status bits of wakeup GPEs are set when they are
enabled, it causes a premature system wakeup to occur.

To address that problem, partially revert commit 18996f2db918 to
restore GPE status bits clearing before the GPE is enabled in
acpi_ev_enable_gpe().

Fixes: 18996f2db918 ("ACPICA: Events: Stop unconditionally clearing ACPI IRQs during suspend/resume")
Signed-off-by: Furquan Shaikh <furquan@google.com>
Cc: 4.17+ <stable@vger.kernel.org> # 4.17+
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/evgpe.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 62d3aa74277b4..5e9d7348c16f7 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -81,8 +81,12 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info)
 
 	ACPI_FUNCTION_TRACE(ev_enable_gpe);
 
-	/* Enable the requested GPE */
+	/* Clear the GPE status */
+	status = acpi_hw_clear_gpe(gpe_event_info);
+	if (ACPI_FAILURE(status))
+		return_ACPI_STATUS(status);
 
+	/* Enable the requested GPE */
 	status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE);
 	return_ACPI_STATUS(status);
 }
-- 
GitLab


From 4d720e2a8c5f5829ca6d79e02f653ca8b1470b8b Mon Sep 17 00:00:00 2001
From: Thomas Preston <thomas.preston@codethink.co.uk>
Date: Mon, 25 Mar 2019 16:53:38 +0000
Subject: [PATCH 0290/1861] Documentation: acpi: Add an example for PRP0001

Add an example for the magic PRP0001 device ID which allows matching
ACPI devices against drivers using OF Device Tree compatible property.

Signed-off-by: Thomas Preston <thomas.preston@codethink.co.uk>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/enumeration.txt | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index 7bcf9c3d9fbe2..1395b844649cc 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -410,6 +410,32 @@ Specifically, the device IDs returned by _HID and preceding PRP0001 in the _CID
 return package will be checked first.  Also in that case the bus type the device
 will be enumerated to depends on the device ID returned by _HID.
 
+For example, the following ACPI sample might be used to enumerate an lm75-type
+I2C temperature sensor and match it to the driver using the Device Tree
+namespace link:
+
+	Device (TMP0)
+	{
+		Name (_HID, "PRP0001")
+		Name (_DSD, Package() {
+			ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+			Package () {
+				Package (2) { "compatible", "ti,tmp75" },
+			}
+		})
+		Method (_CRS, 0, Serialized)
+		{
+			Name (SBUF, ResourceTemplate ()
+			{
+				I2cSerialBusV2 (0x48, ControllerInitiated,
+					400000, AddressingMode7Bit,
+					"\\_SB.PCI0.I2C1", 0x00,
+					ResourceConsumer, , Exclusive,)
+			})
+			Return (SBUF)
+		}
+	}
+
 It is valid to define device objects with a _HID returning PRP0001 and without
 the "compatible" property in the _DSD or a _CID as long as one of their
 ancestors provides a _DSD with a valid "compatible" property.  Such device
-- 
GitLab


From e7dfb6d04e4715be1f3eb2c60d97b753fd2e4516 Mon Sep 17 00:00:00 2001
From: David Engraf <david.engraf@sysgo.com>
Date: Mon, 11 Mar 2019 08:57:42 +0100
Subject: [PATCH 0291/1861] ARM: dts: at91: Fix typo in ISC_D0 on PC9

The function argument for the ISC_D0 on PC9 was incorrect. According to
the documentation it should be 'C' aka 3.

Signed-off-by: David Engraf <david.engraf@sysgo.com>
Reviewed-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Fixes: 7f16cb676c00 ("ARM: at91/dt: add sama5d2 pinmux")
Cc: <stable@vger.kernel.org> # v4.4+
---
 arch/arm/boot/dts/sama5d2-pinfunc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h
index 1c01a6f843d8a..28a2e45752fea 100644
--- a/arch/arm/boot/dts/sama5d2-pinfunc.h
+++ b/arch/arm/boot/dts/sama5d2-pinfunc.h
@@ -518,7 +518,7 @@
 #define PIN_PC9__GPIO			PINMUX_PIN(PIN_PC9, 0, 0)
 #define PIN_PC9__FIQ			PINMUX_PIN(PIN_PC9, 1, 3)
 #define PIN_PC9__GTSUCOMP		PINMUX_PIN(PIN_PC9, 2, 1)
-#define PIN_PC9__ISC_D0			PINMUX_PIN(PIN_PC9, 2, 1)
+#define PIN_PC9__ISC_D0			PINMUX_PIN(PIN_PC9, 3, 1)
 #define PIN_PC9__TIOA4			PINMUX_PIN(PIN_PC9, 4, 2)
 #define PIN_PC10			74
 #define PIN_PC10__GPIO			PINMUX_PIN(PIN_PC10, 0, 0)
-- 
GitLab


From 7a8e61f8478639072d402a26789055a4a4de8f77 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 23 Mar 2019 11:36:19 +0100
Subject: [PATCH 0292/1861] timekeeping: Force upper bound for setting
 CLOCK_REALTIME

Several people reported testing failures after setting CLOCK_REALTIME close
to the limits of the kernel internal representation in nanoseconds,
i.e. year 2262.

The failures are exposed in subsequent operations, i.e. when arming timers
or when the advancing CLOCK_MONOTONIC makes the calculation of
CLOCK_REALTIME overflow into negative space.

Now people start to paper over the underlying problem by clamping
calculations to the valid range, but that's just wrong because such
workarounds will prevent detection of real issues as well.

It is reasonable to force an upper bound for the various methods of setting
CLOCK_REALTIME. Year 2262 is the absolute upper bound. Assume a maximum
uptime of 30 years which is plenty enough even for esoteric embedded
systems. That results in an upper bound of year 2232 for setting the time.

Once that limit is reached in reality this limit is only a small part of
the problem space. But until then this stops people from trying to paper
over the problem at the wrong places.

Reported-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Reported-by: Hongbo Yao <yaohongbo@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1903231125480.2157@nanos.tec.linutronix.de
---
 include/linux/time64.h    | 21 +++++++++++++++++++++
 kernel/time/time.c        |  2 +-
 kernel/time/timekeeping.c |  6 +++---
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/include/linux/time64.h b/include/linux/time64.h
index f38d382ffec13..a620ee610b9f3 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -33,6 +33,17 @@ struct itimerspec64 {
 #define KTIME_MAX			((s64)~((u64)1 << 63))
 #define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
 
+/*
+ * Limits for settimeofday():
+ *
+ * To prevent setting the time close to the wraparound point time setting
+ * is limited so a reasonable uptime can be accomodated. Uptime of 30 years
+ * should be really sufficient, which means the cutoff is 2232. At that
+ * point the cutoff is just a small part of the larger problem.
+ */
+#define TIME_UPTIME_SEC_MAX		(30LL * 365 * 24 *3600)
+#define TIME_SETTOD_SEC_MAX		(KTIME_SEC_MAX - TIME_UPTIME_SEC_MAX)
+
 static inline int timespec64_equal(const struct timespec64 *a,
 				   const struct timespec64 *b)
 {
@@ -100,6 +111,16 @@ static inline bool timespec64_valid_strict(const struct timespec64 *ts)
 	return true;
 }
 
+static inline bool timespec64_valid_settod(const struct timespec64 *ts)
+{
+	if (!timespec64_valid(ts))
+		return false;
+	/* Disallow values which cause overflow issues vs. CLOCK_REALTIME */
+	if ((unsigned long long)ts->tv_sec >= TIME_SETTOD_SEC_MAX)
+		return false;
+	return true;
+}
+
 /**
  * timespec64_to_ns - Convert timespec64 to nanoseconds
  * @ts:		pointer to the timespec64 variable to be converted
diff --git a/kernel/time/time.c b/kernel/time/time.c
index c3f756f8534bb..86656bbac232e 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -171,7 +171,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz
 	static int firsttime = 1;
 	int error = 0;
 
-	if (tv && !timespec64_valid(tv))
+	if (tv && !timespec64_valid_settod(tv))
 		return -EINVAL;
 
 	error = security_settime64(tv, tz);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 540145da33dac..5716e28bfa3cc 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1221,7 +1221,7 @@ int do_settimeofday64(const struct timespec64 *ts)
 	unsigned long flags;
 	int ret = 0;
 
-	if (!timespec64_valid_strict(ts))
+	if (!timespec64_valid_settod(ts))
 		return -EINVAL;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1278,7 +1278,7 @@ static int timekeeping_inject_offset(const struct timespec64 *ts)
 	/* Make sure the proposed value is valid */
 	tmp = timespec64_add(tk_xtime(tk), *ts);
 	if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
-	    !timespec64_valid_strict(&tmp)) {
+	    !timespec64_valid_settod(&tmp)) {
 		ret = -EINVAL;
 		goto error;
 	}
@@ -1527,7 +1527,7 @@ void __init timekeeping_init(void)
 	unsigned long flags;
 
 	read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
-	if (timespec64_valid_strict(&wall_time) &&
+	if (timespec64_valid_settod(&wall_time) &&
 	    timespec64_to_ns(&wall_time) > 0) {
 		persistent_clock_exists = true;
 	} else if (timespec64_to_ns(&wall_time) != 0) {
-- 
GitLab


From 157c99c5a2956a9ab1ae12de0136a2d8a1b1a307 Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Tue, 26 Mar 2019 15:04:14 +0800
Subject: [PATCH 0293/1861] mmc: alcor: don't write data before command has
 completed

The alcor driver is setting up data transfer and submitting the associated
MMC command at the same time. While this works most of the time, it
occasionally causes problems upon write.

In the working case, after setting up the data transfer and submitting
the MMC command, an interrupt comes in a moment later with CMD_END and
WRITE_BUF_RDY bits set. The data transfer then happens without problem.

However, on occasion, the interrupt that arrives at that point only
has WRITE_BUF_RDY set. The hardware notifies that it's ready to write
data, but the associated MMC command is still running. Regardless, the
driver was proceeding to write data immediately, and that would then cause
another interrupt indicating data CRC error, and the write would fail.

Additionally, the transfer setup function alcor_trigger_data_transfer()
was being called 3 times for each write operation, which was confusing
and may be contributing to this issue.

Solve this by tweaking the driver behaviour to follow the sequence observed
in the original ampe_stor vendor driver:
 1. When starting request handling, write 0 to DATA_XFER_CTRL
 2. Submit the command
 3. Wait for CMD_END interrupt and then trigger data transfer
 4. For the PIO case, trigger the next step of the data transfer only
    upon the following DATA_END interrupt, which occurs after the block has
    been written.

I confirmed that the read path still works (DMA & PIO) and also now
presents more consistency with the operations performed by ampe_stor.

Signed-off-by: Daniel Drake <drake@endlessm.com>
Fixes: c5413ad815a6 ("mmc: add new Alcor Micro Cardreader SD/MMC driver")
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/alcor.c | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c
index 82a97866e0cf4..7c8f203f9a24d 100644
--- a/drivers/mmc/host/alcor.c
+++ b/drivers/mmc/host/alcor.c
@@ -48,7 +48,6 @@ struct alcor_sdmmc_host {
 	struct mmc_command *cmd;
 	struct mmc_data *data;
 	unsigned int dma_on:1;
-	unsigned int early_data:1;
 
 	struct mutex cmd_mutex;
 
@@ -144,8 +143,7 @@ static void alcor_data_set_dma(struct alcor_sdmmc_host *host)
 	host->sg_count--;
 }
 
-static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host,
-					bool early)
+static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host)
 {
 	struct alcor_pci_priv *priv = host->alcor_pci;
 	struct mmc_data *data = host->data;
@@ -155,13 +153,6 @@ static void alcor_trigger_data_transfer(struct alcor_sdmmc_host *host,
 		ctrl |= AU6601_DATA_WRITE;
 
 	if (data->host_cookie == COOKIE_MAPPED) {
-		if (host->early_data) {
-			host->early_data = false;
-			return;
-		}
-
-		host->early_data = early;
-
 		alcor_data_set_dma(host);
 		ctrl |= AU6601_DATA_DMA_MODE;
 		host->dma_on = 1;
@@ -231,6 +222,7 @@ static void alcor_prepare_sg_miter(struct alcor_sdmmc_host *host)
 static void alcor_prepare_data(struct alcor_sdmmc_host *host,
 			       struct mmc_command *cmd)
 {
+	struct alcor_pci_priv *priv = host->alcor_pci;
 	struct mmc_data *data = cmd->data;
 
 	if (!data)
@@ -248,7 +240,7 @@ static void alcor_prepare_data(struct alcor_sdmmc_host *host,
 	if (data->host_cookie != COOKIE_MAPPED)
 		alcor_prepare_sg_miter(host);
 
-	alcor_trigger_data_transfer(host, true);
+	alcor_write8(priv, 0, AU6601_DATA_XFER_CTRL);
 }
 
 static void alcor_send_cmd(struct alcor_sdmmc_host *host,
@@ -435,7 +427,7 @@ static int alcor_cmd_irq_done(struct alcor_sdmmc_host *host, u32 intmask)
 	if (!host->data)
 		return false;
 
-	alcor_trigger_data_transfer(host, false);
+	alcor_trigger_data_transfer(host);
 	host->cmd = NULL;
 	return true;
 }
@@ -456,7 +448,7 @@ static void alcor_cmd_irq_thread(struct alcor_sdmmc_host *host, u32 intmask)
 	if (!host->data)
 		alcor_request_complete(host, 1);
 	else
-		alcor_trigger_data_transfer(host, false);
+		alcor_trigger_data_transfer(host);
 	host->cmd = NULL;
 }
 
@@ -487,15 +479,9 @@ static int alcor_data_irq_done(struct alcor_sdmmc_host *host, u32 intmask)
 		break;
 	case AU6601_INT_READ_BUF_RDY:
 		alcor_trf_block_pio(host, true);
-		if (!host->blocks)
-			break;
-		alcor_trigger_data_transfer(host, false);
 		return 1;
 	case AU6601_INT_WRITE_BUF_RDY:
 		alcor_trf_block_pio(host, false);
-		if (!host->blocks)
-			break;
-		alcor_trigger_data_transfer(host, false);
 		return 1;
 	case AU6601_INT_DMA_END:
 		if (!host->sg_count)
@@ -508,8 +494,14 @@ static int alcor_data_irq_done(struct alcor_sdmmc_host *host, u32 intmask)
 		break;
 	}
 
-	if (intmask & AU6601_INT_DATA_END)
-		return 0;
+	if (intmask & AU6601_INT_DATA_END) {
+		if (!host->dma_on && host->blocks) {
+			alcor_trigger_data_transfer(host);
+			return 1;
+		} else {
+			return 0;
+		}
+	}
 
 	return 1;
 }
-- 
GitLab


From 379e2014c95b7a454713da822b8ef4ec51ab8a75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Wed, 27 Mar 2019 14:51:13 +0100
Subject: [PATCH 0294/1861] libbpf: add xsk.h to install_headers target
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xsk.h header file was missing from the install_headers target in
the Makefile. This patch simply adds xsk.h to the set of installed
headers.

Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
Reported-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 5bf8e52c41fca..279589c299836 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -222,6 +222,7 @@ install_headers:
 		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,libbpf.h,$(prefix)/include/bpf,644);
 		$(call do_install,btf.h,$(prefix)/include/bpf,644);
+		$(call do_install,xsk.h,$(prefix)/include/bpf,644);
 
 install: install_lib
 
-- 
GitLab


From 89dedaef49d36adc2bb5e7e4c38b52fa3013c7c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Wed, 27 Mar 2019 14:51:14 +0100
Subject: [PATCH 0295/1861] libbpf: add libelf dependency to shared library
 build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The DPDK project is moving forward with its AF_XDP PMD, and during
that process some libbpf issues surfaced [1]: When libbpf was built
as a shared library, libelf was not included in the linking phase.
Since libelf is an internal depedency to libbpf, libelf should be
included. This patch adds '-lelf' to resolve that.

  [1] https://patches.dpdk.org/patch/50704/#93571

Fixes: 1b76c13e4b36 ("bpf tools: Introduce 'bpf' library and add bpf feature check")
Suggested-by: Luca Boccassi <bluca@debian.org>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 279589c299836..7beec4d5b5225 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -177,7 +177,7 @@ $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
 
 $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN)
 	$(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \
-				    -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@
+				    -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
 	@ln -sf $(@F) $(OUTPUT)libbpf.so
 	@ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION)
 
-- 
GitLab


From 8543e437807970166c2b66b79935c9f4b0e6d1f9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 28 Mar 2019 16:44:28 +0100
Subject: [PATCH 0296/1861] bpf, libbpf: fix quiet install_headers

Both btf.h and xsk.h headers are not installed quietly due to
missing '\' for the call to QUIET_INSTALL. Lets fix it.

Before:

  # make install_headers
    INSTALL  headers
  if [ ! -d '''/usr/local/include/bpf' ]; then install -d -m 755 '''/usr/local/include/bpf'; fi; install btf.h -m 644 '''/usr/local/include/bpf';
  if [ ! -d '''/usr/local/include/bpf' ]; then install -d -m 755 '''/usr/local/include/bpf'; fi; install xsk.h -m 644 '''/usr/local/include/bpf';
  # ls /usr/local/include/bpf/
  bpf.h  btf.h  libbpf.h  xsk.h

After:

  # make install_headers
    INSTALL  headers
  # ls /usr/local/include/bpf/
  bpf.h  btf.h  libbpf.h  xsk.h

Fixes: a493f5f9d8c2 ("libbpf: Install btf.h with libbpf")
Fixes: 379e2014c95b ("libbpf: add xsk.h to install_headers target")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/lib/bpf/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 7beec4d5b5225..8e7c56e9590fb 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -220,8 +220,8 @@ install_lib: all_cmd
 install_headers:
 	$(call QUIET_INSTALL, headers) \
 		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
-		$(call do_install,libbpf.h,$(prefix)/include/bpf,644);
-		$(call do_install,btf.h,$(prefix)/include/bpf,644);
+		$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
+		$(call do_install,btf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,xsk.h,$(prefix)/include/bpf,644);
 
 install: install_lib
-- 
GitLab


From f35f06c35560a86e841631f0243b83a984dc11a9 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 26 Mar 2019 10:49:56 +0000
Subject: [PATCH 0297/1861] Btrfs: do not allow trimming when a fs is mounted
 with the nologreplay option

Whan a filesystem is mounted with the nologreplay mount option, which
requires it to be mounted in RO mode as well, we can not allow discard on
free space inside block groups, because log trees refer to extents that
are not pinned in a block group's free space cache (pinning the extents is
precisely the first phase of replaying a log tree).

So do not allow the fitrim ioctl to do anything when the filesystem is
mounted with the nologreplay option, because later it can be mounted RW
without that option, which causes log replay to happen and result in
either a failure to replay the log trees (leading to a mount failure), a
crash or some silent corruption.

Reported-by: Darrick J. Wong <darrick.wong@oracle.com>
Fixes: 96da09192cda ("btrfs: Introduce new mount option to disable tree log replay")
CC: stable@vger.kernel.org # 4.9+
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ec2d8919e7fb0..cd4e693406a0e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -501,6 +501,16 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	/*
+	 * If the fs is mounted with nologreplay, which requires it to be
+	 * mounted in RO mode as well, we can not allow discard on free space
+	 * inside block groups, because log trees refer to extents that are not
+	 * pinned in a block group's free space cache (pinning the extents is
+	 * precisely the first phase of replaying a log tree).
+	 */
+	if (btrfs_test_opt(fs_info, NOLOGREPLAY))
+		return -EROFS;
+
 	rcu_read_lock();
 	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
 				dev_list) {
-- 
GitLab


From ab8a6d821179ab9bea1a9179f535ccba6330c1ed Mon Sep 17 00:00:00 2001
From: Chong Qiao <qiaochong@loongson.cn>
Date: Thu, 28 Mar 2019 07:08:01 +0800
Subject: [PATCH 0298/1861] MIPS: KGDB: fix kgdb support for SMP platforms.

KGDB_call_nmi_hook is called by other cpu through smp call.
MIPS smp call is processed in ipi irq handler and regs is saved in
 handle_int.
So kgdb_call_nmi_hook get regs by get_irq_regs and regs will be passed
 to kgdb_cpu_enter.

Signed-off-by: Chong Qiao <qiaochong@loongson.cn>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: QiaoChong <qiaochong@loongson.cn>
---
 arch/mips/kernel/kgdb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 6e574c02e4c3b..ea781b29f7f17 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -33,6 +33,7 @@
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
 #include <linux/uaccess.h>
+#include <asm/irq_regs.h>
 
 static struct hard_trap_info {
 	unsigned char tt;	/* Trap type code for MIPS R3xxx and R4xxx */
@@ -214,7 +215,7 @@ void kgdb_call_nmi_hook(void *ignored)
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
 
-	kgdb_nmicallback(raw_smp_processor_id(), NULL);
+	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
 
 	set_fs(old_fs);
 }
-- 
GitLab


From e4952b0c2c0309bdbfc4c6cc0dd81e37450d74d0 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Thu, 28 Mar 2019 14:37:45 +0100
Subject: [PATCH 0299/1861] MIPS: SGI-IP27: Fix use of unchecked pointer in
 shutdown_bridge_irq

smatch complaint:

    arch/mips/sgi-ip27/ip27-irq.c:123 shutdown_bridge_irq()
    warn: variable dereferenced before check 'hd' (see line 121)

Fix it by removing local variable and use hd->pin directly.

Fixes: 69a07a41d908 ("MIPS: SGI-IP27: rework HUB interrupts")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/sgi-ip27/ip27-irq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 710a59764b01c..a32f843cdbe02 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -118,7 +118,6 @@ static void shutdown_bridge_irq(struct irq_data *d)
 {
 	struct hub_irq_data *hd = irq_data_get_irq_chip_data(d);
 	struct bridge_controller *bc;
-	int pin = hd->pin;
 
 	if (!hd)
 		return;
@@ -126,7 +125,7 @@ static void shutdown_bridge_irq(struct irq_data *d)
 	disable_hub_irq(d);
 
 	bc = hd->bc;
-	bridge_clr(bc, b_int_enable, (1 << pin));
+	bridge_clr(bc, b_int_enable, (1 << hd->pin));
 	bridge_read(bc, b_wid_tflush);
 }
 
-- 
GitLab


From 6e57d72a84db9f2e565992f76b6a6bc907f13b77 Mon Sep 17 00:00:00 2001
From: xiaofeis <xiaofeis@codeaurora.org>
Date: Wed, 27 Mar 2019 11:59:06 +0800
Subject: [PATCH 0300/1861] net: dsa: Implement flow_dissect callback for
 tag_qca

Add flow_dissect for qca tagged packet to get the right hash.

Signed-off-by: Xiaofei Shen <xiaofeis@codeaurora.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_qca.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index ed4f6dc26365b..85c22ada47449 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -98,8 +98,18 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	return skb;
 }
 
+static int qca_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+                                int *offset)
+{
+	*offset = QCA_HDR_LEN;
+	*proto = ((__be16 *)skb->data)[0];
+
+	return 0;
+}
+
 const struct dsa_device_ops qca_netdev_ops = {
 	.xmit	= qca_tag_xmit,
 	.rcv	= qca_tag_rcv,
+	.flow_dissect = qca_tag_flow_dissect,
 	.overhead = QCA_HDR_LEN,
 };
-- 
GitLab


From 6289d0facd9ebce4cc83e5da39e15643ee998dc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Wed, 27 Mar 2019 15:26:01 +0100
Subject: [PATCH 0301/1861] qmi_wwan: add Olicard 600
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a Qualcomm based device with a QMI function on interface 4.
It is mode switched from 2020:2030 using a standard eject message.

T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  6 Spd=480  MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=2020 ProdID=2031 Rev= 2.32
S:  Manufacturer=Mobile Connect
S:  Product=Mobile Connect
S:  SerialNumber=0123456789ABCDEF
C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E:  Ad=83(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E:  Ad=85(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E:  Ad=87(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
E:  Ad=89(I) Atr=03(Int.) MxPS=   8 Ivl=32ms
E:  Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none)
E:  Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 74bebbdb4b158..9195f3476b1d7 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1203,6 +1203,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
 	{QMI_FIXED_INTF(0x2001, 0x7e19, 4)},	/* D-Link DWM-221 B1 */
 	{QMI_FIXED_INTF(0x2001, 0x7e35, 4)},	/* D-Link DWM-222 */
+	{QMI_FIXED_INTF(0x2020, 0x2031, 4)},	/* Olicard 600 */
 	{QMI_FIXED_INTF(0x2020, 0x2033, 4)},	/* BroadMobi BM806U */
 	{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)},    /* Sierra Wireless MC7700 */
 	{QMI_FIXED_INTF(0x114f, 0x68a2, 8)},    /* Sierra Wireless MC7750 */
-- 
GitLab


From 355b98553789b646ed97ad801a619ff898471b92 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 27 Mar 2019 08:21:30 -0700
Subject: [PATCH 0302/1861] netns: provide pure entropy for net_hash_mix()

net_hash_mix() currently uses kernel address of a struct net,
and is used in many places that could be used to reveal this
address to a patient attacker, thus defeating KASLR, for
the typical case (initial net namespace, &init_net is
not dynamically allocated)

I believe the original implementation tried to avoid spending
too many cycles in this function, but security comes first.

Also provide entropy regardless of CONFIG_NET_NS.

Fixes: 0b4419162aa6 ("netns: introduce the net_hash_mix "salt" for hashes")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Amit Klein <aksecurity@gmail.com>
Reported-by: Benny Pinkas <benny@pinkas.net>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  1 +
 include/net/netns/hash.h    | 10 ++--------
 net/core/net_namespace.c    |  1 +
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index a68ced28d8f47..12689ddfc24c4 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -59,6 +59,7 @@ struct net {
 						 */
 	spinlock_t		rules_mod_lock;
 
+	u32			hash_mix;
 	atomic64_t		cookie_gen;
 
 	struct list_head	list;		/* list of network namespaces */
diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
index 16a842456189f..d9b665151f3d9 100644
--- a/include/net/netns/hash.h
+++ b/include/net/netns/hash.h
@@ -2,16 +2,10 @@
 #ifndef __NET_NS_HASH_H__
 #define __NET_NS_HASH_H__
 
-#include <asm/cache.h>
-
-struct net;
+#include <net/net_namespace.h>
 
 static inline u32 net_hash_mix(const struct net *net)
 {
-#ifdef CONFIG_NET_NS
-	return (u32)(((unsigned long)net) >> ilog2(sizeof(*net)));
-#else
-	return 0;
-#endif
+	return net->hash_mix;
 }
 #endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 17f36317363d1..7e6dcc6257011 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -304,6 +304,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 
 	refcount_set(&net->count, 1);
 	refcount_set(&net->passive, 1);
+	get_random_bytes(&net->hash_mix, sizeof(u32));
 	net->dev_base_seq = 1;
 	net->user_ns = user_ns;
 	idr_init(&net->netns_ids);
-- 
GitLab


From c8ba5b91a04e3e2643e48501c114108802f21cda Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 27 Mar 2019 11:38:38 -0700
Subject: [PATCH 0303/1861] nfp: validate the return code from dev_queue_xmit()

dev_queue_xmit() may return error codes as well as netdev_tx_t,
and it always consumes the skb.  Make sure we always return a
correct netdev_tx_t value.

Fixes: eadfa4c3be99 ("nfp: add stats and xmit helpers for representors")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index d2c803bb4e562..7b46fce2e81e0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -195,7 +195,7 @@ static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
 	ret = dev_queue_xmit(skb);
 	nfp_repr_inc_tx_stats(netdev, len, ret);
 
-	return ret;
+	return NETDEV_TX_OK;
 }
 
 static int nfp_repr_stop(struct net_device *netdev)
-- 
GitLab


From c3e1f7fff69c78169c8ac40cc74ac4307f74e36d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 27 Mar 2019 11:38:39 -0700
Subject: [PATCH 0304/1861] nfp: disable netpoll on representors

NFP reprs are software device on top of the PF's vNIC.
The comment above __dev_queue_xmit() sayeth:

 When calling this method, interrupts MUST be enabled.  This is because
 the BH enable code must have IRQs enabled so that it will not deadlock.

For netconsole we can't guarantee IRQ state, let's just
disable netpoll on representors to be on the safe side.

When the initial implementation of NFP reprs was added by the
commit 5de73ee46704 ("nfp: general representor implementation")
.ndo_poll_controller was required for netpoll to be enabled.

Fixes: ac3d9dd034e5 ("netpoll: make ndo_poll_controller() optional")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 7b46fce2e81e0..94d228c044963 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -383,7 +383,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 	netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
 	netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS;
 
-	netdev->priv_flags |= IFF_NO_QUEUE;
+	netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
 	netdev->features |= NETIF_F_LLTX;
 
 	if (nfp_app_has_tc(app)) {
-- 
GitLab


From f28cd2af22a0c134e4aa1c64a70f70d815d473fb Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Thu, 28 Mar 2019 07:36:00 +0100
Subject: [PATCH 0305/1861] openvswitch: fix flow actions reallocation

The flow action buffer can be resized if it's not big enough to contain
all the requested flow actions. However, this resize doesn't take into
account the new requested size, the buffer is only increased by a factor
of 2x. This might be not enough to contain the new data, causing a
buffer overflow, for example:

[   42.044472] =============================================================================
[   42.045608] BUG kmalloc-96 (Not tainted): Redzone overwritten
[   42.046415] -----------------------------------------------------------------------------

[   42.047715] Disabling lock debugging due to kernel taint
[   42.047716] INFO: 0x8bf2c4a5-0x720c0928. First byte 0x0 instead of 0xcc
[   42.048677] INFO: Slab 0xbc6d2040 objects=29 used=18 fp=0xdc07dec4 flags=0x2808101
[   42.049743] INFO: Object 0xd53a3464 @offset=2528 fp=0xccdcdebb

[   42.050747] Redzone 76f1b237: cc cc cc cc cc cc cc cc                          ........
[   42.051839] Object d53a3464: 6b 6b 6b 6b 6b 6b 6b 6b 0c 00 00 00 6c 00 00 00  kkkkkkkk....l...
[   42.053015] Object f49a30cc: 6c 00 0c 00 00 00 00 00 00 00 00 03 78 a3 15 f6  l...........x...
[   42.054203] Object acfe4220: 20 00 02 00 ff ff ff ff 00 00 00 00 00 00 00 00   ...............
[   42.055370] Object 21024e91: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[   42.056541] Object 070e04c3: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[   42.057797] Object 948a777a: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[   42.059061] Redzone 8bf2c4a5: 00 00 00 00                                      ....
[   42.060189] Padding a681b46e: 5a 5a 5a 5a 5a 5a 5a 5a                          ZZZZZZZZ

Fix by making sure the new buffer is properly resized to contain all the
requested data.

BugLink: https://bugs.launchpad.net/bugs/1813244
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 691da853bef5c..4bdf5e3ac2087 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2306,14 +2306,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
 
 	struct sw_flow_actions *acts;
 	int new_acts_size;
-	int req_size = NLA_ALIGN(attr_len);
+	size_t req_size = NLA_ALIGN(attr_len);
 	int next_offset = offsetof(struct sw_flow_actions, actions) +
 					(*sfa)->actions_len;
 
 	if (req_size <= (ksize(*sfa) - next_offset))
 		goto out;
 
-	new_acts_size = ksize(*sfa) * 2;
+	new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
 
 	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
 		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
-- 
GitLab


From cb66ddd156203daefb8d71158036b27b0e2caf63 Mon Sep 17 00:00:00 2001
From: Mao Wenan <maowenan@huawei.com>
Date: Thu, 28 Mar 2019 17:10:56 +0800
Subject: [PATCH 0306/1861] net: rds: force to destroy connection if t_sock is
 NULL in rds_tcp_kill_sock().

When it is to cleanup net namespace, rds_tcp_exit_net() will call
rds_tcp_kill_sock(), if t_sock is NULL, it will not call
rds_conn_destroy(), rds_conn_path_destroy() and rds_tcp_conn_free() to free
connection, and the worker cp_conn_w is not stopped, afterwards the net is freed in
net_drop_ns(); While cp_conn_w rds_connect_worker() will call rds_tcp_conn_path_connect()
and reference 'net' which has already been freed.

In rds_tcp_conn_path_connect(), rds_tcp_set_callbacks() will set t_sock = sock before
sock->ops->connect, but if connect() is failed, it will call
rds_tcp_restore_callbacks() and set t_sock = NULL, if connect is always
failed, rds_connect_worker() will try to reconnect all the time, so
rds_tcp_kill_sock() will never to cancel worker cp_conn_w and free the
connections.

Therefore, the condition !tc->t_sock is not needed if it is going to do
cleanup_net->rds_tcp_exit_net->rds_tcp_kill_sock, because tc->t_sock is always
NULL, and there is on other path to cancel cp_conn_w and free
connection. So this patch is to fix this.

rds_tcp_kill_sock():
...
if (net != c_net || !tc->t_sock)
...
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>

==================================================================
BUG: KASAN: use-after-free in inet_create+0xbcc/0xd28
net/ipv4/af_inet.c:340
Read of size 4 at addr ffff8003496a4684 by task kworker/u8:4/3721

CPU: 3 PID: 3721 Comm: kworker/u8:4 Not tainted 5.1.0 #11
Hardware name: linux,dummy-virt (DT)
Workqueue: krdsd rds_connect_worker
Call trace:
 dump_backtrace+0x0/0x3c0 arch/arm64/kernel/time.c:53
 show_stack+0x28/0x38 arch/arm64/kernel/traps.c:152
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x120/0x188 lib/dump_stack.c:113
 print_address_description+0x68/0x278 mm/kasan/report.c:253
 kasan_report_error mm/kasan/report.c:351 [inline]
 kasan_report+0x21c/0x348 mm/kasan/report.c:409
 __asan_report_load4_noabort+0x30/0x40 mm/kasan/report.c:429
 inet_create+0xbcc/0xd28 net/ipv4/af_inet.c:340
 __sock_create+0x4f8/0x770 net/socket.c:1276
 sock_create_kern+0x50/0x68 net/socket.c:1322
 rds_tcp_conn_path_connect+0x2b4/0x690 net/rds/tcp_connect.c:114
 rds_connect_worker+0x108/0x1d0 net/rds/threads.c:175
 process_one_work+0x6e8/0x1700 kernel/workqueue.c:2153
 worker_thread+0x3b0/0xdd0 kernel/workqueue.c:2296
 kthread+0x2f0/0x378 kernel/kthread.c:255
 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1117

Allocated by task 687:
 save_stack mm/kasan/kasan.c:448 [inline]
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc+0xd4/0x180 mm/kasan/kasan.c:553
 kasan_slab_alloc+0x14/0x20 mm/kasan/kasan.c:490
 slab_post_alloc_hook mm/slab.h:444 [inline]
 slab_alloc_node mm/slub.c:2705 [inline]
 slab_alloc mm/slub.c:2713 [inline]
 kmem_cache_alloc+0x14c/0x388 mm/slub.c:2718
 kmem_cache_zalloc include/linux/slab.h:697 [inline]
 net_alloc net/core/net_namespace.c:384 [inline]
 copy_net_ns+0xc4/0x2d0 net/core/net_namespace.c:424
 create_new_namespaces+0x300/0x658 kernel/nsproxy.c:107
 unshare_nsproxy_namespaces+0xa0/0x198 kernel/nsproxy.c:206
 ksys_unshare+0x340/0x628 kernel/fork.c:2577
 __do_sys_unshare kernel/fork.c:2645 [inline]
 __se_sys_unshare kernel/fork.c:2643 [inline]
 __arm64_sys_unshare+0x38/0x58 kernel/fork.c:2643
 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline]
 invoke_syscall arch/arm64/kernel/syscall.c:47 [inline]
 el0_svc_common+0x168/0x390 arch/arm64/kernel/syscall.c:83
 el0_svc_handler+0x60/0xd0 arch/arm64/kernel/syscall.c:129
 el0_svc+0x8/0xc arch/arm64/kernel/entry.S:960

Freed by task 264:
 save_stack mm/kasan/kasan.c:448 [inline]
 set_track mm/kasan/kasan.c:460 [inline]
 __kasan_slab_free+0x114/0x220 mm/kasan/kasan.c:521
 kasan_slab_free+0x10/0x18 mm/kasan/kasan.c:528
 slab_free_hook mm/slub.c:1370 [inline]
 slab_free_freelist_hook mm/slub.c:1397 [inline]
 slab_free mm/slub.c:2952 [inline]
 kmem_cache_free+0xb8/0x3a8 mm/slub.c:2968
 net_free net/core/net_namespace.c:400 [inline]
 net_drop_ns.part.6+0x78/0x90 net/core/net_namespace.c:407
 net_drop_ns net/core/net_namespace.c:406 [inline]
 cleanup_net+0x53c/0x6d8 net/core/net_namespace.c:569
 process_one_work+0x6e8/0x1700 kernel/workqueue.c:2153
 worker_thread+0x3b0/0xdd0 kernel/workqueue.c:2296
 kthread+0x2f0/0x378 kernel/kthread.c:255
 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1117

The buggy address belongs to the object at ffff8003496a3f80
 which belongs to the cache net_namespace of size 7872
The buggy address is located 1796 bytes inside of
 7872-byte region [ffff8003496a3f80, ffff8003496a5e40)
The buggy address belongs to the page:
page:ffff7e000d25a800 count:1 mapcount:0 mapping:ffff80036ce4b000
index:0x0 compound_mapcount: 0
flags: 0xffffe0000008100(slab|head)
raw: 0ffffe0000008100 dead000000000100 dead000000000200 ffff80036ce4b000
raw: 0000000000000000 0000000080040004 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8003496a4580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff8003496a4600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff8003496a4680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                   ^
 ffff8003496a4700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff8003496a4780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================

Fixes: 467fa15356ac("RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index fd26941746074..faf726e00e27c 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -608,7 +608,7 @@ static void rds_tcp_kill_sock(struct net *net)
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
-		if (net != c_net || !tc->t_sock)
+		if (net != c_net)
 			continue;
 		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
 			list_move_tail(&tc->t_tcp_node, &tmp_list);
-- 
GitLab


From 9a5a90d167b0e5fe3d47af16b68fd09ce64085cd Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@dlink.ru>
Date: Thu, 28 Mar 2019 18:23:04 +0300
Subject: [PATCH 0307/1861] net: core: netif_receive_skb_list: unlist skb
 before passing to pt->func

__netif_receive_skb_list_ptype() leaves skb->next poisoned before passing
it to pt_prev->func handler, what may produce (in certain cases, e.g. DSA
setup) crashes like:

[ 88.606777] CPU 0 Unable to handle kernel paging request at virtual address 0000000e, epc == 80687078, ra == 8052cc7c
[ 88.618666] Oops[#1]:
[ 88.621196] CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-rc2-dlink-00206-g4192a172-dirty #1473
[ 88.630885] $ 0 : 00000000 10000400 00000002 864d7850
[ 88.636709] $ 4 : 87c0ddf0 864d7800 87c0ddf0 00000000
[ 88.642526] $ 8 : 00000000 49600000 00000001 00000001
[ 88.648342] $12 : 00000000 c288617b dadbee27 25d17c41
[ 88.654159] $16 : 87c0ddf0 85cff080 80790000 fffffffd
[ 88.659975] $20 : 80797b20 ffffffff 00000001 864d7800
[ 88.665793] $24 : 00000000 8011e658
[ 88.671609] $28 : 80790000 87c0dbc0 87cabf00 8052cc7c
[ 88.677427] Hi : 00000003
[ 88.680622] Lo : 7b5b4220
[ 88.683840] epc : 80687078 vlan_dev_hard_start_xmit+0x1c/0x1a0
[ 88.690532] ra : 8052cc7c dev_hard_start_xmit+0xac/0x188
[ 88.696734] Status: 10000404	IEp
[ 88.700422] Cause : 50000008 (ExcCode 02)
[ 88.704874] BadVA : 0000000e
[ 88.708069] PrId : 0001a120 (MIPS interAptiv (multi))
[ 88.713005] Modules linked in:
[ 88.716407] Process swapper (pid: 0, threadinfo=(ptrval), task=(ptrval), tls=00000000)
[ 88.725219] Stack : 85f61c28 00000000 0000000e 80780000 87c0ddf0 85cff080 80790000 8052cc7c
[ 88.734529] 87cabf00 00000000 00000001 85f5fb40 807b0000 864d7850 87cabf00 807d0000
[ 88.743839] 864d7800 8655f600 00000000 85cff080 87c1c000 0000006a 00000000 8052d96c
[ 88.753149] 807a0000 8057adb8 87c0dcc8 87c0dc50 85cfff08 00000558 87cabf00 85f58c50
[ 88.762460] 00000002 85f58c00 864d7800 80543308 fffffff4 00000001 85f58c00 864d7800
[ 88.771770] ...
[ 88.774483] Call Trace:
[ 88.777199] [<80687078>] vlan_dev_hard_start_xmit+0x1c/0x1a0
[ 88.783504] [<8052cc7c>] dev_hard_start_xmit+0xac/0x188
[ 88.789326] [<8052d96c>] __dev_queue_xmit+0x6e8/0x7d4
[ 88.794955] [<805a8640>] ip_finish_output2+0x238/0x4d0
[ 88.800677] [<805ab6a0>] ip_output+0xc8/0x140
[ 88.805526] [<805a68f4>] ip_forward+0x364/0x560
[ 88.810567] [<805a4ff8>] ip_rcv+0x48/0xe4
[ 88.815030] [<80528d44>] __netif_receive_skb_one_core+0x44/0x58
[ 88.821635] [<8067f220>] dsa_switch_rcv+0x108/0x1ac
[ 88.827067] [<80528f80>] __netif_receive_skb_list_core+0x228/0x26c
[ 88.833951] [<8052ed84>] netif_receive_skb_list+0x1d4/0x394
[ 88.840160] [<80355a88>] lunar_rx_poll+0x38c/0x828
[ 88.845496] [<8052fa78>] net_rx_action+0x14c/0x3cc
[ 88.850835] [<806ad300>] __do_softirq+0x178/0x338
[ 88.856077] [<8012a2d4>] irq_exit+0xbc/0x100
[ 88.860846] [<802f8b70>] plat_irq_dispatch+0xc0/0x144
[ 88.866477] [<80105974>] handle_int+0x14c/0x158
[ 88.871516] [<806acfb0>] r4k_wait+0x30/0x40
[ 88.876462] Code: afb10014 8c8200a0 00803025 <9443000c> 94a20468 00000000 10620042 00a08025 9605046a
[ 88.887332]
[ 88.888982] ---[ end trace eb863d007da11cf1 ]---
[ 88.894122] Kernel panic - not syncing: Fatal exception in interrupt
[ 88.901202] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---

Fix this by pulling skb off the sublist and zeroing skb->next pointer
before calling ptype callback.

Fixes: 88eb1944e18c ("net: core: propagate SKB lists through packet_type lookup")
Reviewed-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Alexander Lobakin <alobakin@dlink.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 2b67f2aa59ddb..fdcff29df9158 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5014,8 +5014,10 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
 	if (pt_prev->list_func != NULL)
 		pt_prev->list_func(head, pt_prev, orig_dev);
 	else
-		list_for_each_entry_safe(skb, next, head, list)
+		list_for_each_entry_safe(skb, next, head, list) {
+			skb_list_del_init(skb);
 			pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+		}
 }
 
 static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
-- 
GitLab


From dade58ed5af6365ac50ff4259c2a0bf31219e285 Mon Sep 17 00:00:00 2001
From: Yan Zhao <yan.y.zhao@intel.com>
Date: Wed, 27 Mar 2019 00:54:51 -0400
Subject: [PATCH 0308/1861] drm/i915/gvt: do not deliver a workload if its
 creation fails

in workload creation routine, if any failure occurs, do not queue this
workload for delivery. if this failure is fatal, enter into failsafe
mode.

Fixes: 6d76303553ba ("drm/i915/gvt: Move common vGPU workload creation into scheduler.c")
Cc: stable@vger.kernel.org #4.19+
Cc: zhenyuw@linux.intel.com
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 159192c097cc7..05b953793316b 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1486,8 +1486,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 		intel_runtime_pm_put_unchecked(dev_priv);
 	}
 
-	if (ret && (vgpu_is_vm_unhealthy(ret))) {
-		enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
+	if (ret) {
+		if (vgpu_is_vm_unhealthy(ret))
+			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
 		intel_vgpu_destroy_workload(workload);
 		return ERR_PTR(ret);
 	}
-- 
GitLab


From 663a50ceac75c2208d2ad95365bc8382fd42f44d Mon Sep 17 00:00:00 2001
From: Yan Zhao <yan.y.zhao@intel.com>
Date: Wed, 27 Mar 2019 00:55:45 -0400
Subject: [PATCH 0309/1861] drm/i915/gvt: do not let pin count of shadow mm go
 negative

shadow mm's pin count got increased in workload preparation phase, which
is after workload scanning.
it will get decreased in complete_current_workload() anyway after
workload completion.
Sometimes, if a workload meets a scanning error, its shadow mm pin count
will not get increased but will get decreased in the end.
This patch lets shadow mm's pin count not go below 0.

Fixes: 2707e4446688 ("drm/i915/gvt: vGPU graphics memory virtualization")
Cc: zhenyuw@linux.intel.com
Cc: stable@vger.kernel.org #4.14+
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index d7052ab7908c8..cf133ef038735 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1946,7 +1946,7 @@ void _intel_vgpu_mm_release(struct kref *mm_ref)
  */
 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
 {
-	atomic_dec(&mm->pincount);
+	atomic_dec_if_positive(&mm->pincount);
 }
 
 /**
-- 
GitLab


From ff0e2a7bd13f7c332d7f09ff45d08df4bf512ce0 Mon Sep 17 00:00:00 2001
From: Anup Patel <Anup.Patel@wdc.com>
Date: Fri, 22 Mar 2019 10:04:44 +0000
Subject: [PATCH 0310/1861] RISC-V: Fix FIXMAP_TOP to avoid overlap with
 VMALLOC area

The FIXMAP area overlaps with VMALLOC area in Linux-5.1-rc1 hence we get
below warning in Linux RISC-V 32bit kernel. This warning does not show-up
in Linux RISC-V 64bit kernel due to large VMALLOC area.

WARNING: CPU: 0 PID: 22 at mm/vmalloc.c:150 vmap_page_range_noflush+0x134/0x15c
Modules linked in:
CPU: 0 PID: 22 Comm: kworker/0:1 Not tainted 5.1.0-rc1-00005-gebc2f658040e #1
Workqueue: events pcpu_balance_workfn
Call Trace:
[<c002b950>] walk_stackframe+0x0/0xa0
[<c002baac>] show_stack+0x28/0x32
[<c0587354>] dump_stack+0x62/0x7e
[<c002fdee>] __warn+0x98/0xce
[<c002fe52>] warn_slowpath_null+0x2e/0x3c
[<c00e71ce>] vmap_page_range_noflush+0x134/0x15c
[<c00e7886>] map_kernel_range_noflush+0xc/0x14
[<c00d54b8>] pcpu_populate_chunk+0x19e/0x236
[<c00d610e>] pcpu_balance_workfn+0x448/0x464
[<c00408d6>] process_one_work+0x16c/0x2ea
[<c0040b46>] worker_thread+0xf2/0x3b2
[<c004519a>] kthread+0xce/0xdc
[<c002a974>] ret_from_exception+0x0/0xc

This patch fixes above warning by placing FIXMAP area below VMALLOC area.

Fixes: f2c17aabc917 ("RISC-V: Implement compile-time fixed mappings")
Signed-off-by: Anup Patel <anup.patel@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/fixmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 57afe604b495b..c207f6634b91c 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -26,7 +26,7 @@ enum fixed_addresses {
 };
 
 #define FIXADDR_SIZE		(__end_of_fixed_addresses * PAGE_SIZE)
-#define FIXADDR_TOP		(PAGE_OFFSET)
+#define FIXADDR_TOP		(VMALLOC_START)
 #define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
 
 #define FIXMAP_PAGE_IO		PAGE_KERNEL
-- 
GitLab


From da4ed37873918eeb4e8db7f0cf55e0a7e18788c3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 7 Mar 2019 15:56:34 -0800
Subject: [PATCH 0311/1861] RISC-V: Use IS_ENABLED(CONFIG_CMODEL_MEDLOW)

IS_ENABLED should generally use CONFIG_ prefaced symbols and
it doesn't appear as if there is a CMODEL_MEDLOW define.

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/module.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 7dd308129b40f..2872edce894d1 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -141,7 +141,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
 {
 	s32 hi20;
 
-	if (IS_ENABLED(CMODEL_MEDLOW)) {
+	if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) {
 		pr_err(
 		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
 		  me->name, (long long)v, location);
-- 
GitLab


From 2d4ea4b95cae3133de6b18ec5d5a42ee824fa0ef Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 7 Mar 2019 15:51:45 -0800
Subject: [PATCH 0312/1861] s390/mem_detect: Use
 IS_ENABLED(CONFIG_BLK_DEV_INITRD)

IS_ENABLED should generally use CONFIG_ prefaced symbols and
it doesn't appear as if there is a BLK_DEV_INITRD define.

Cc: <stable@vger.kernel.org> # 4.20
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/mem_detect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 4cb771ba13fa7..5d316fe404804 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -25,7 +25,7 @@ static void *mem_detect_alloc_extended(void)
 {
 	unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
 
-	if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
 	    INITRD_START < offset + ENTRIES_EXTENDED_MAX)
 		offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64));
 
-- 
GitLab


From 2cc9637ce825f3a9f51f8f78af7474e9e85bfa5f Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <oberpar@linux.ibm.com>
Date: Fri, 22 Mar 2019 16:01:17 +0100
Subject: [PATCH 0313/1861] s390/dasd: Fix capacity calculation for large
 volumes

The DASD driver incorrectly limits the maximum number of blocks of ECKD
DASD volumes to 32 bit numbers. Volumes with a capacity greater than
2^32-1 blocks are incorrectly recognized as smaller volumes.

This results in the following volume capacity limits depending on the
formatted block size:

  BLKSIZE  MAX_GB   MAX_CYL
      512    2047   5843492
     1024    4095   8676701
     2048    8191  13634816
     4096   16383  23860929

The same problem occurs when a volume with more than 17895697 cylinders
is accessed in raw-track-access mode.

Fix this problem by adding an explicit type cast when calculating the
maximum number of blocks.

Signed-off-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Reviewed-by: Stefan Haberland <sth@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd_eckd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 6e294b4d3635f..f89f9d02e7884 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2004,14 +2004,14 @@ static int dasd_eckd_end_analysis(struct dasd_block *block)
 	blk_per_trk = recs_per_track(&private->rdc_data, 0, block->bp_block);
 
 raw:
-	block->blocks = (private->real_cyl *
+	block->blocks = ((unsigned long) private->real_cyl *
 			  private->rdc_data.trk_per_cyl *
 			  blk_per_trk);
 
 	dev_info(&device->cdev->dev,
-		 "DASD with %d KB/block, %d KB total size, %d KB/track, "
+		 "DASD with %u KB/block, %lu KB total size, %u KB/track, "
 		 "%s\n", (block->bp_block >> 10),
-		 ((private->real_cyl *
+		 (((unsigned long) private->real_cyl *
 		   private->rdc_data.trk_per_cyl *
 		   blk_per_trk * (block->bp_block >> 9)) >> 1),
 		 ((blk_per_trk * block->bp_block) >> 10),
-- 
GitLab


From f85b2b297c16b6d9fa8d9f2f26b73b5571dfb859 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 30 Oct 2018 08:19:54 +0100
Subject: [PATCH 0314/1861] s390/qdio: clean up pci_out_supported()

pci_out_supported() currently takes a single queue as parameter, even
though Output IRQ support is a per-device feature. Adjust the parameter,
so that the macro can also be used in code paths with no access to a queue
struct. This allows us to remove the remaining open-coded checks for
QIB_AC_OUTBOUND_PCI_SUPPORTED.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio.h       |  3 +--
 drivers/s390/cio/qdio_main.c  | 17 +++++++++--------
 drivers/s390/cio/qdio_setup.c |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index a6f7c2986b94f..2c29141005ca3 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -341,8 +341,7 @@ static inline int multicast_outbound(struct qdio_q *q)
 	       (q->nr == q->irq_ptr->nr_output_qs - 1);
 }
 
-#define pci_out_supported(q) \
-	(q->irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)
+#define pci_out_supported(irq) ((irq)->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)
 #define is_qebsm(q)			(q->irq_ptr->sch_token != 0)
 
 #define need_siga_in(q)			(q->irq_ptr->siga_flag.input)
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 9537e656e9278..02d515fa10a17 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -371,7 +371,7 @@ static inline int qdio_siga_input(struct qdio_q *q)
 static inline void qdio_sync_queues(struct qdio_q *q)
 {
 	/* PCI capable outbound queues will also be scanned so sync them too */
-	if (pci_out_supported(q))
+	if (pci_out_supported(q->irq_ptr))
 		qdio_siga_sync_all(q);
 	else
 		qdio_siga_sync_q(q);
@@ -718,7 +718,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 
 	if (need_siga_sync(q))
 		if (((queue_type(q) != QDIO_IQDIO_QFMT) &&
-		    !pci_out_supported(q)) ||
+		    !pci_out_supported(q->irq_ptr)) ||
 		    (queue_type(q) == QDIO_IQDIO_QFMT &&
 		    multicast_outbound(q)))
 			qdio_siga_sync_q(q);
@@ -843,9 +843,9 @@ static void __qdio_outbound_processing(struct qdio_q *q)
 	if (qdio_outbound_q_moved(q))
 		qdio_kick_handler(q);
 
-	if (queue_type(q) == QDIO_ZFCP_QFMT)
-		if (!pci_out_supported(q) && !qdio_outbound_q_done(q))
-			goto sched;
+	if (queue_type(q) == QDIO_ZFCP_QFMT && !pci_out_supported(q->irq_ptr) &&
+	    !qdio_outbound_q_done(q))
+		goto sched;
 
 	if (q->u.out.pci_out_enabled)
 		return;
@@ -883,13 +883,14 @@ void qdio_outbound_timer(struct timer_list *t)
 
 static inline void qdio_check_outbound_after_thinint(struct qdio_q *q)
 {
+	struct qdio_irq *irq = q->irq_ptr;
 	struct qdio_q *out;
 	int i;
 
-	if (!pci_out_supported(q))
+	if (!pci_out_supported(irq))
 		return;
 
-	for_each_output_queue(q->irq_ptr, out, i)
+	for_each_output_queue(irq, out, i)
 		if (!qdio_outbound_q_done(out))
 			qdio_tasklet_schedule(out);
 }
@@ -976,7 +977,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr)
 		}
 	}
 
-	if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED))
+	if (!pci_out_supported(irq_ptr))
 		return;
 
 	for_each_output_queue(irq_ptr, q, i) {
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index a59887fad13e6..99d7d2566a3a8 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -523,7 +523,7 @@ void qdio_print_subchannel_info(struct qdio_irq *irq_ptr,
 		 irq_ptr->schid.sch_no,
 		 is_thinint_irq(irq_ptr),
 		 (irq_ptr->sch_token) ? 1 : 0,
-		 (irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) ? 1 : 0,
+		 pci_out_supported(irq_ptr) ? 1 : 0,
 		 css_general_characteristics.aif_tdd,
 		 (irq_ptr->siga_flag.input) ? "R" : " ",
 		 (irq_ptr->siga_flag.output) ? "W" : " ",
-- 
GitLab


From 2f2f3839fb8d0fb548fa6ee2d3bec656dc61f1ac Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 30 Oct 2018 08:21:27 +0100
Subject: [PATCH 0315/1861] s390/qdio: clean up
 qdio_check_outbound_after_thinint()

This helper is not thinint-specific, qdio_get_next_buffers() also calls it
for non-thinint devices. So give it a more fitting name, and while at it
adjust its parameter.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_main.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 02d515fa10a17..c25a40ecc1056 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -881,9 +881,8 @@ void qdio_outbound_timer(struct timer_list *t)
 	qdio_tasklet_schedule(q);
 }
 
-static inline void qdio_check_outbound_after_thinint(struct qdio_q *q)
+static inline void qdio_check_outbound_pci_queues(struct qdio_irq *irq)
 {
-	struct qdio_irq *irq = q->irq_ptr;
 	struct qdio_q *out;
 	int i;
 
@@ -901,11 +900,8 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
 	if (need_siga_sync(q) && need_siga_sync_after_ai(q))
 		qdio_sync_queues(q);
 
-	/*
-	 * The interrupt could be caused by a PCI request. Check the
-	 * PCI capable outbound queues.
-	 */
-	qdio_check_outbound_after_thinint(q);
+	/* The interrupt could be caused by a PCI request: */
+	qdio_check_outbound_pci_queues(q->irq_ptr);
 
 	if (!qdio_inbound_q_moved(q))
 		return;
@@ -1687,8 +1683,7 @@ int qdio_get_next_buffers(struct ccw_device *cdev, int nr, int *bufnr,
 	if (need_siga_sync(q))
 		qdio_sync_queues(q);
 
-	/* check the PCI capable outbound queues. */
-	qdio_check_outbound_after_thinint(q);
+	qdio_check_outbound_pci_queues(irq_ptr);
 
 	if (!qdio_inbound_q_moved(q))
 		return 0;
-- 
GitLab


From 46a984ffb86c8542fa510656fa8cb33befe8ee8f Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Thu, 28 Mar 2019 11:21:47 +0100
Subject: [PATCH 0316/1861] s390/cpum_cf: Add support for CPU-MF SVN 6

Add support for the CPU-Measurement Facility counter
second version number 6. This number is used to detect some
more counters in the crypto counter set and the extended
counter set.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_cf.c        |  15 +++-
 arch/s390/kernel/perf_cpum_cf_events.c | 107 +++++++++++++++++--------
 2 files changed, 84 insertions(+), 38 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index e1c54d28713a8..48d48b6187c0d 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,8 +2,8 @@
 /*
  * Performance event support for s390x - CPU-measurement Counter Facility
  *
- *  Copyright IBM Corp. 2012, 2017
- *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *  Copyright IBM Corp. 2012, 2019
+ *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
  */
 #define KMSG_COMPONENT	"cpum_cf"
 #define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
@@ -26,7 +26,7 @@ static enum cpumf_ctr_set get_counter_set(u64 event)
 		set = CPUMF_CTR_SET_USER;
 	else if (event < 128)
 		set = CPUMF_CTR_SET_CRYPTO;
-	else if (event < 256)
+	else if (event < 288)
 		set = CPUMF_CTR_SET_EXT;
 	else if (event >= 448 && event < 496)
 		set = CPUMF_CTR_SET_MT_DIAG;
@@ -50,12 +50,19 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
 			err = -EOPNOTSUPP;
 		break;
 	case CPUMF_CTR_SET_CRYPTO:
+		if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 &&
+		     hwc->config > 79) ||
+		    (cpuhw->info.csvn >= 6 && hwc->config > 83))
+			err = -EOPNOTSUPP;
+		break;
 	case CPUMF_CTR_SET_EXT:
 		if (cpuhw->info.csvn < 1)
 			err = -EOPNOTSUPP;
 		if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
 		    (cpuhw->info.csvn == 2 && hwc->config > 175) ||
-		    (cpuhw->info.csvn  > 2 && hwc->config > 255))
+		    (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5
+		     && hwc->config > 255) ||
+		    (cpuhw->info.csvn >= 6 && hwc->config > 287))
 			err = -EOPNOTSUPP;
 		break;
 	case CPUMF_CTR_SET_MT_DIAG:
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index b45238c897287..34cc96449b304 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -31,22 +31,26 @@ CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020);
 CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
 CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004);
 CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_FUNCTIONS, 0x0040);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_CYCLES, 0x0041);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS, 0x0042);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_CYCLES, 0x0043);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_FUNCTIONS, 0x0044);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_CYCLES, 0x0045);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS, 0x0046);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_CYCLES, 0x0047);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_FUNCTIONS, 0x0048);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_CYCLES, 0x0049);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS, 0x004a);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_CYCLES, 0x004b);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_FUNCTIONS, 0x004c);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_CYCLES, 0x004d);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS, 0x004e);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_FUNCTION_COUNT, 0x0050);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_CYCLES_COUNT, 0x0051);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT, 0x0052);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT, 0x0053);
 CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
 CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
 CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
@@ -262,23 +266,47 @@ static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = {
 	NULL,
 };
 
-static struct attribute *cpumcf_svn_generic_pmu_event_attr[] __initdata = {
-	CPUMF_EVENT_PTR(cf_svn_generic, PRNG_FUNCTIONS),
-	CPUMF_EVENT_PTR(cf_svn_generic, PRNG_CYCLES),
-	CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS),
-	CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_CYCLES),
-	CPUMF_EVENT_PTR(cf_svn_generic, SHA_FUNCTIONS),
-	CPUMF_EVENT_PTR(cf_svn_generic, SHA_CYCLES),
-	CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS),
-	CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_CYCLES),
-	CPUMF_EVENT_PTR(cf_svn_generic, DEA_FUNCTIONS),
-	CPUMF_EVENT_PTR(cf_svn_generic, DEA_CYCLES),
-	CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS),
-	CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_CYCLES),
-	CPUMF_EVENT_PTR(cf_svn_generic, AES_FUNCTIONS),
-	CPUMF_EVENT_PTR(cf_svn_generic, AES_CYCLES),
-	CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS),
-	CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_CYCLES),
+static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_FUNCTION_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_CYCLES_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT),
 	NULL,
 };
 
@@ -562,7 +590,18 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
 	default:
 		cfvn = none;
 	}
-	csvn = cpumcf_svn_generic_pmu_event_attr;
+
+	/* Determine version specific crypto set */
+	switch (ci.csvn) {
+	case 1 ... 5:
+		csvn = cpumcf_svn_12345_pmu_event_attr;
+		break;
+	case 6:
+		csvn = cpumcf_svn_6_pmu_event_attr;
+		break;
+	default:
+		csvn = none;
+	}
 
 	/* Determine model-specific counter set(s) */
 	get_cpu_id(&cpu_id);
-- 
GitLab


From e8d368ad20f514dce86a64931fe4a6f06a0a6703 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 28 Mar 2019 20:34:25 +0100
Subject: [PATCH 0317/1861] efi/libstub: Refactor the cmd_stubcopy Makefile
 command

It took me a while to understand what is going on in the nested
if-blocks.

Simplify it by removing unneeded code.

  - if_changed automatically adds 'set -e', so any failure in the
    series of commands makes it immediately fail as a whole.
    So, the outer if block is entirely redundant.

  - Since commit 9c2af1c7377a ("kbuild: add .DELETE_ON_ERROR special target"),
    GNU Make automatically deletes the target on any failure
    in its recipe. The explicit 'rm -f $@' is redundant.

  - Surrounding commands with ( ) will spawn a subshell to execute them
    in it, but it is rarely useful to do so.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20190328193429.21373-2-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/Makefile | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index b0103e16fc1b9..ae9081988c88a 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -86,12 +86,13 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
 # this time, use objcopy and leave all sections in place.
 #
 quiet_cmd_stubcopy = STUBCPY $@
-      cmd_stubcopy = if $(STRIP) --strip-debug $(STUBCOPY_RM-y) -o $@ $<; \
-		     then if $(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y); \
-		     then (echo >&2 "$@: absolute symbol references not allowed in the EFI stub"; \
-			   rm -f $@; /bin/false);			  \
-		     else $(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@; fi	  \
-		     else /bin/false; fi
+      cmd_stubcopy =							\
+	$(STRIP) --strip-debug $(STUBCOPY_RM-y) -o $@ $<;		\
+	if $(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y); then		\
+		echo "$@: absolute symbol references not allowed in the EFI stub" >&2; \
+		/bin/false;						\
+	fi;								\
+	$(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@
 
 #
 # ARM discards the .data section because it disallows r/w data in the
-- 
GitLab


From c2999c281ea2d2ebbdfce96cecc7b52e2ae7c406 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 28 Mar 2019 20:34:26 +0100
Subject: [PATCH 0318/1861] efifb: Omit memory map check on legacy boot

Since the following commit:

  38ac0287b7f4 ("fbdev/efifb: Honour UEFI memory map attributes when mapping the FB")

efifb_probe() checks its memory range via efi_mem_desc_lookup(),
and this leads to a spurious error message:

   EFI_MEMMAP is not enabled

at every boot on KVM.  This is quite annoying since the error message
appears even if you set "quiet" boot option.

Since this happens on legacy boot, which strangely enough exposes
a EFI framebuffer via screen_info, let's double check that we are
doing an EFI boot before attempting to access the EFI memory map.

Reported-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Jones <pjones@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20190328193429.21373-3-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/video/fbdev/efifb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index ba906876cc454..9e529cc2b4ffd 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -464,7 +464,8 @@ static int efifb_probe(struct platform_device *dev)
 	info->apertures->ranges[0].base = efifb_fix.smem_start;
 	info->apertures->ranges[0].size = size_remap;
 
-	if (!efi_mem_desc_lookup(efifb_fix.smem_start, &md)) {
+	if (efi_enabled(EFI_BOOT) &&
+	    !efi_mem_desc_lookup(efifb_fix.smem_start, &md)) {
 		if ((efifb_fix.smem_start + efifb_fix.smem_len) >
 		    (md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT))) {
 			pr_err("efifb: video memory @ 0x%lx spans multiple EFI memory regions\n",
-- 
GitLab


From 5e83cfe947444c7f201f8c39ce0189922ec9f578 Mon Sep 17 00:00:00 2001
From: Marcin Benka <mbenka@marvell.com>
Date: Thu, 28 Mar 2019 20:34:27 +0100
Subject: [PATCH 0319/1861] efi/arm: Show SMBIOS bank/device location in CPER
 and GHES error logs

Run dmi_memdev_walk() for arch arm* as other architectures do.
This improves error logging as the memory device handle is
translated now to the DIMM entry's name provided by the DMI handle.

Before:

 {1}[Hardware Error]:   DIMM location: not present. DMI handle: 0x0038

After:

 {1}[Hardware Error]:   DIMM location: N0 DIMM_A0

Signed-off-by: Marcin Benka <mbenka@marvell.com>
Signed-off-by: Robert Richter <rrichter@marvell.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20190328193429.21373-4-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/arm-runtime.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 0c1af675c3385..4a0dfe4ab8295 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -167,6 +167,7 @@ static int __init arm_dmi_init(void)
 	 * itself, depends on dmi_scan_machine() having been called already.
 	 */
 	dmi_scan_machine();
+	dmi_memdev_walk();
 	if (dmi_available)
 		dmi_set_dump_stack_arch_desc();
 	return 0;
-- 
GitLab


From 0fca08122eaf5c956a2cbe12775245d747f8b1ac Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@marvell.com>
Date: Thu, 28 Mar 2019 20:34:28 +0100
Subject: [PATCH 0320/1861] efi: Unify DMI setup code over the arm/arm64, ia64
 and x86 architectures

All architectures (arm/arm64, ia64 and x86) do the same here, so unify
the code.

Note: We do not need to call dump_stack_set_arch_desc() in case of
!dmi_available. Both strings, dmi_ids_string and dump_stack_arch_
desc_str are initialized zero and thus nothing would change.

Signed-off-by: Robert Richter <rrichter@marvell.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20190328193429.21373-5-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/kernel/setup.c           |  4 +---
 arch/x86/kernel/setup.c            |  6 ++----
 drivers/firmware/dmi_scan.c        | 28 +++++++++++++++-------------
 drivers/firmware/efi/arm-runtime.c |  7 ++-----
 include/linux/dmi.h                |  8 ++------
 5 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 583a3746d70be..c9cfa760cd57b 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -1058,9 +1058,7 @@ check_bugs (void)
 
 static int __init run_dmi_scan(void)
 {
-	dmi_scan_machine();
-	dmi_memdev_walk();
-	dmi_set_dump_stack_arch_desc();
+	dmi_setup();
 	return 0;
 }
 core_initcall(run_dmi_scan);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3d872a527cd96..3773905cd2c1d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1005,13 +1005,11 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled(EFI_BOOT))
 		efi_init();
 
-	dmi_scan_machine();
-	dmi_memdev_walk();
-	dmi_set_dump_stack_arch_desc();
+	dmi_setup();
 
 	/*
 	 * VMware detection requires dmi to be available, so this
-	 * needs to be done after dmi_scan_machine(), for the boot CPU.
+	 * needs to be done after dmi_setup(), for the boot CPU.
 	 */
 	init_hypervisor_platform();
 
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 099d83e4e910e..fae2d5c433145 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -416,11 +416,8 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
 	nr++;
 }
 
-void __init dmi_memdev_walk(void)
+static void __init dmi_memdev_walk(void)
 {
-	if (!dmi_available)
-		return;
-
 	if (dmi_walk_early(count_mem_devices) == 0 && dmi_memdev_nr) {
 		dmi_memdev = dmi_alloc(sizeof(*dmi_memdev) * dmi_memdev_nr);
 		if (dmi_memdev)
@@ -614,7 +611,7 @@ static int __init dmi_smbios3_present(const u8 *buf)
 	return 1;
 }
 
-void __init dmi_scan_machine(void)
+static void __init dmi_scan_machine(void)
 {
 	char __iomem *p, *q;
 	char buf[32];
@@ -769,15 +766,20 @@ static int __init dmi_init(void)
 subsys_initcall(dmi_init);
 
 /**
- * dmi_set_dump_stack_arch_desc - set arch description for dump_stack()
+ *	dmi_setup - scan and setup DMI system information
  *
- * Invoke dump_stack_set_arch_desc() with DMI system information so that
- * DMI identifiers are printed out on task dumps.  Arch boot code should
- * call this function after dmi_scan_machine() if it wants to print out DMI
- * identifiers on task dumps.
+ *	Scan the DMI system information. This setups DMI identifiers
+ *	(dmi_system_id) for printing it out on task dumps and prepares
+ *	DIMM entry information (dmi_memdev_info) from the SMBIOS table
+ *	for using this when reporting memory errors.
  */
-void __init dmi_set_dump_stack_arch_desc(void)
+void __init dmi_setup(void)
 {
+	dmi_scan_machine();
+	if (!dmi_available)
+		return;
+
+	dmi_memdev_walk();
 	dump_stack_set_arch_desc("%s", dmi_ids_string);
 }
 
@@ -841,7 +843,7 @@ static bool dmi_is_end_of_table(const struct dmi_system_id *dmi)
  *	returns non zero or we hit the end. Callback function is called for
  *	each successful match. Returns the number of matches.
  *
- *	dmi_scan_machine must be called before this function is called.
+ *	dmi_setup must be called before this function is called.
  */
 int dmi_check_system(const struct dmi_system_id *list)
 {
@@ -871,7 +873,7 @@ EXPORT_SYMBOL(dmi_check_system);
  *	Walk the blacklist table until the first match is found.  Return the
  *	pointer to the matching entry or NULL if there's no match.
  *
- *	dmi_scan_machine must be called before this function is called.
+ *	dmi_setup must be called before this function is called.
  */
 const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list)
 {
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 4a0dfe4ab8295..e2ac5fa5531b9 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -162,14 +162,11 @@ void efi_virtmap_unload(void)
 static int __init arm_dmi_init(void)
 {
 	/*
-	 * On arm64/ARM, DMI depends on UEFI, and dmi_scan_machine() needs to
+	 * On arm64/ARM, DMI depends on UEFI, and dmi_setup() needs to
 	 * be called early because dmi_id_init(), which is an arch_initcall
 	 * itself, depends on dmi_scan_machine() having been called already.
 	 */
-	dmi_scan_machine();
-	dmi_memdev_walk();
-	if (dmi_available)
-		dmi_set_dump_stack_arch_desc();
+	dmi_setup();
 	return 0;
 }
 core_initcall(arm_dmi_init);
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index c46fdb36700bc..8de8c4f15163a 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -102,9 +102,7 @@ const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list);
 extern const char * dmi_get_system_info(int field);
 extern const struct dmi_device * dmi_find_device(int type, const char *name,
 	const struct dmi_device *from);
-extern void dmi_scan_machine(void);
-extern void dmi_memdev_walk(void);
-extern void dmi_set_dump_stack_arch_desc(void);
+extern void dmi_setup(void);
 extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp);
 extern int dmi_get_bios_year(void);
 extern int dmi_name_in_vendors(const char *str);
@@ -122,9 +120,7 @@ static inline int dmi_check_system(const struct dmi_system_id *list) { return 0;
 static inline const char * dmi_get_system_info(int field) { return NULL; }
 static inline const struct dmi_device * dmi_find_device(int type, const char *name,
 	const struct dmi_device *from) { return NULL; }
-static inline void dmi_scan_machine(void) { return; }
-static inline void dmi_memdev_walk(void) { }
-static inline void dmi_set_dump_stack_arch_desc(void) { }
+static inline void dmi_setup(void) { }
 static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp)
 {
 	if (yearp)
-- 
GitLab


From 02562d0ca1084a688ac5c92e0e92947f62f13093 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 28 Mar 2019 20:34:29 +0100
Subject: [PATCH 0321/1861] efi/libstub/arm: Omit unneeded stripping of
 ksymtab/kcrctab sections

Commit f922c4abdf764 ("module: allow symbol exports to be disabled")
introduced a way to inhibit generation of kcrctab/ksymtab sections
when building ordinary kernel code to be used in a different execution
context (decompressor, EFI stub, etc)

That means we no longer have to strip those sections explicitly when
building the EFI libstub objects, so drop this from the Makefile.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20190328193429.21373-6-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index ae9081988c88a..b1f7b64652dbb 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -71,7 +71,6 @@ CFLAGS_arm64-stub.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 extra-$(CONFIG_EFI_ARMSTUB)	:= $(lib-y)
 lib-$(CONFIG_EFI_ARMSTUB)	:= $(patsubst %.o,%.stub.o,$(lib-y))
 
-STUBCOPY_RM-y			:= -R *ksymtab* -R *kcrctab*
 STUBCOPY_FLAGS-$(CONFIG_ARM64)	+= --prefix-alloc-sections=.init \
 				   --prefix-symbols=__efistub_
 STUBCOPY_RELOC-$(CONFIG_ARM64)	:= R_AARCH64_ABS
@@ -87,7 +86,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
 #
 quiet_cmd_stubcopy = STUBCPY $@
       cmd_stubcopy =							\
-	$(STRIP) --strip-debug $(STUBCOPY_RM-y) -o $@ $<;		\
+	$(STRIP) --strip-debug -o $@ $<;				\
 	if $(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y); then		\
 		echo "$@: absolute symbol references not allowed in the EFI stub" >&2; \
 		/bin/false;						\
-- 
GitLab


From 6620f45ff8519549a6877663f965c10002918dc2 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 27 Mar 2019 16:13:48 +0100
Subject: [PATCH 0322/1861] clk: meson: vid-pll-div: remove warning and return
 0 on invalid config

The vid_pll_div is a programmable fractional divider, but vendor gives a
limited of known configuration value and it's corresponding fraction.

Thus when at reset value (0) or unknown value, we cannot determine the
result rate.

The initial behaviour was to print a warning, but the warning triggers
at each boot and when the clock tree is refreshed.

This patch moves the print to debug and returns 0 instead of the
parent rate.

Fixes: 72dbb8c94d0d ("clk: meson: Add vid_pll divider driver")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://lkml.kernel.org/r/20190327151348.27402-1-narmstrong@baylibre.com
---
 drivers/clk/meson/vid-pll-div.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/meson/vid-pll-div.c b/drivers/clk/meson/vid-pll-div.c
index 08bcc01c09238..daff235bc7633 100644
--- a/drivers/clk/meson/vid-pll-div.c
+++ b/drivers/clk/meson/vid-pll-div.c
@@ -82,8 +82,8 @@ static unsigned long meson_vid_pll_div_recalc_rate(struct clk_hw *hw,
 	div = _get_table_val(meson_parm_read(clk->map, &pll_div->val),
 			     meson_parm_read(clk->map, &pll_div->sel));
 	if (!div || !div->divider) {
-		pr_info("%s: Invalid config value for vid_pll_div\n", __func__);
-		return parent_rate;
+		pr_debug("%s: Invalid config value for vid_pll_div\n", __func__);
+		return 0;
 	}
 
 	return DIV_ROUND_UP_ULL(parent_rate * div->multiplier, div->divider);
-- 
GitLab


From b49c15e1211cc962cb73bbaaa5175ae068144893 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 19 Mar 2019 12:00:13 +0100
Subject: [PATCH 0323/1861] mac80211: un-schedule TXQs on powersave start
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Once a station enters powersave, its queues should not be returned by
ieee80211_next_txq() anymore. They will be re-scheduled again after the
station has woken up again

Fixes: 1866760096bf4 ("mac80211: Add TXQ scheduling API")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7f8d93401ce07..bf0b187f994e9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1568,7 +1568,15 @@ static void sta_ps_start(struct sta_info *sta)
 		return;
 
 	for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
-		if (txq_has_queue(sta->sta.txq[tid]))
+		struct ieee80211_txq *txq = sta->sta.txq[tid];
+		struct txq_info *txqi = to_txq_info(txq);
+
+		spin_lock(&local->active_txq_lock[txq->ac]);
+		if (!list_empty(&txqi->schedule_order))
+			list_del_init(&txqi->schedule_order);
+		spin_unlock(&local->active_txq_lock[txq->ac]);
+
+		if (txq_has_queue(txq))
 			set_bit(tid, &sta->txq_buffered_tids);
 		else
 			clear_bit(tid, &sta->txq_buffered_tids);
-- 
GitLab


From 40586e3fc400c00c11151804dcdc93f8c831c808 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 13 Mar 2019 18:54:27 +0100
Subject: [PATCH 0324/1861] mac80211: fix unaligned access in mesh table hash
 function

The pointer to the last four bytes of the address is not guaranteed to be
aligned, so we need to use __get_unaligned_cpu32 here

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_pathtbl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 95eb5064fa916..b76a2aefa9ec0 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath);
 static u32 mesh_table_hash(const void *addr, u32 len, u32 seed)
 {
 	/* Use last four bytes of hw addr as hash index */
-	return jhash_1word(*(u32 *)(addr+2), seed);
+	return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed);
 }
 
 static const struct rhashtable_params mesh_rht_params = {
-- 
GitLab


From 78be2d21cc1cd3069c6138dcfecec62583130171 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Fri, 15 Mar 2019 17:38:57 +0200
Subject: [PATCH 0325/1861] mac80211: Increase MAX_MSG_LEN

Looks that 100 chars isn't enough for messages, as we keep getting
warnings popping from different places due to message shortening.
Instead of trying to shorten the prints, just increase the buffer size.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/trace_msg.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 366b9e6f043e2..40141df09f255 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -1,4 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright (C) 2019 Intel Corporation
+ */
+
 #ifdef CONFIG_MAC80211_MESSAGE_TRACING
 
 #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -11,7 +16,7 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM mac80211_msg
 
-#define MAX_MSG_LEN	100
+#define MAX_MSG_LEN	120
 
 DECLARE_EVENT_CLASS(mac80211_msg_event,
 	TP_PROTO(struct va_format *vaf),
-- 
GitLab


From 08a75a887ee46828b54600f4bb7068d872a5edd5 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 15 Mar 2019 17:39:00 +0200
Subject: [PATCH 0326/1861] cfg80211: Handle WMM rules in regulatory domain
 intersection

The support added for regulatory WMM rules did not handle
the case of regulatory domain intersections. Fix it.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2f1bf91eb2265..0ba778f371cb2 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1309,6 +1309,16 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1,
 	return dfs_region1;
 }
 
+static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1,
+				    const struct ieee80211_wmm_ac *wmm_ac2,
+				    struct ieee80211_wmm_ac *intersect)
+{
+	intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min);
+	intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max);
+	intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot);
+	intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn);
+}
+
 /*
  * Helper for regdom_intersect(), this does the real
  * mathematical intersection fun
@@ -1323,6 +1333,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
 	struct ieee80211_freq_range *freq_range;
 	const struct ieee80211_power_rule *power_rule1, *power_rule2;
 	struct ieee80211_power_rule *power_rule;
+	const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2;
+	struct ieee80211_wmm_rule *wmm_rule;
 	u32 freq_diff, max_bandwidth1, max_bandwidth2;
 
 	freq_range1 = &rule1->freq_range;
@@ -1333,6 +1345,10 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
 	power_rule2 = &rule2->power_rule;
 	power_rule = &intersected_rule->power_rule;
 
+	wmm_rule1 = &rule1->wmm_rule;
+	wmm_rule2 = &rule2->wmm_rule;
+	wmm_rule = &intersected_rule->wmm_rule;
+
 	freq_range->start_freq_khz = max(freq_range1->start_freq_khz,
 					 freq_range2->start_freq_khz);
 	freq_range->end_freq_khz = min(freq_range1->end_freq_khz,
@@ -1376,6 +1392,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
 	intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms,
 					   rule2->dfs_cac_ms);
 
+	if (rule1->has_wmm && rule2->has_wmm) {
+		u8 ac;
+
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+			reg_wmm_rules_intersect(&wmm_rule1->client[ac],
+						&wmm_rule2->client[ac],
+						&wmm_rule->client[ac]);
+			reg_wmm_rules_intersect(&wmm_rule1->ap[ac],
+						&wmm_rule2->ap[ac],
+						&wmm_rule->ap[ac]);
+		}
+
+		intersected_rule->has_wmm = true;
+	} else if (rule1->has_wmm) {
+		*wmm_rule = *wmm_rule1;
+		intersected_rule->has_wmm = true;
+	} else if (rule2->has_wmm) {
+		*wmm_rule = *wmm_rule2;
+		intersected_rule->has_wmm = true;
+	} else {
+		intersected_rule->has_wmm = false;
+	}
+
 	if (!is_valid_reg_rule(intersected_rule))
 		return -EINVAL;
 
-- 
GitLab


From eb9b64e3a9f8483e6e54f4e03b2ae14ae5db2690 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 16 Mar 2019 18:06:31 +0100
Subject: [PATCH 0327/1861] mac80211: fix memory accounting with A-MSDU
 aggregation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

skb->truesize can change due to memory reallocation or when adding extra
fragments. Adjust fq->memory_usage accordingly

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8a49a74c0a374..5f546de10d960 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3221,6 +3221,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	u8 max_subframes = sta->sta.max_amsdu_subframes;
 	int max_frags = local->hw.max_tx_fragments;
 	int max_amsdu_len = sta->sta.max_amsdu_len;
+	int orig_truesize;
 	__be16 len;
 	void *data;
 	bool ret = false;
@@ -3261,6 +3262,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (!head || skb_is_gso(head))
 		goto out;
 
+	orig_truesize = head->truesize;
 	orig_len = head->len;
 
 	if (skb->len + head->len > max_amsdu_len)
@@ -3318,6 +3320,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	*frag_tail = skb;
 
 out_recalc:
+	fq->memory_usage += head->truesize - orig_truesize;
 	if (head->len != orig_len) {
 		flow->backlog += head->len - orig_len;
 		tin->backlog_bytes += head->len - orig_len;
-- 
GitLab


From 344c9719c508bb3ef4e9c134066c83ff00ab6206 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Mar 2019 16:57:35 -0700
Subject: [PATCH 0328/1861] cfg80211: Change an 'else if' into an 'else' in
 cfg80211_calculate_bitrate_he

When building with -Wsometimes-uninitialized, Clang warns:

net/wireless/util.c:1223:11: warning: variable 'result' is used
uninitialized whenever 'if' condition is false
[-Wsometimes-uninitialized]

Clang can't evaluate at this point that WARN(1, ...) always returns true
because __ret_warn_on is defined as !!(condition), which isn't
immediately evaluated as 1. Change this branch to else so that it's
clear to Clang that we intend to bail out here.

Link: https://github.com/ClangBuiltLinux/linux/issues/382
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index e4b8db5e81ec7..75899b62bdc9e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1220,9 +1220,11 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
 	else if (rate->bw == RATE_INFO_BW_HE_RU &&
 		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26)
 		result = rates_26[rate->he_gi];
-	else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
-		      rate->bw, rate->he_ru_alloc))
+	else {
+		WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
+		     rate->bw, rate->he_ru_alloc);
 		return 0;
+	}
 
 	/* now scale to the appropriate MCS */
 	tmp = result;
-- 
GitLab


From 4856bfd230985e43e84c26473c91028ff0a533bd Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 1 Mar 2019 14:48:37 +0100
Subject: [PATCH 0329/1861] mac80211: do not call driver wake_tx_queue op
 during reconfig

There are several scenarios in which mac80211 can call drv_wake_tx_queue
after ieee80211_restart_hw has been called and has not yet completed.
Driver private structs are considered uninitialized until mac80211 has
uploaded the vifs, stations and keys again, so using private tx queue
data during that time is not safe.

The driver can also not rely on drv_reconfig_complete to figure out when
it is safe to accept drv_wake_tx_queue calls again, because it is only
called after all tx queues are woken again.

To fix this, bail out early in drv_wake_tx_queue if local->in_reconfig
is set.

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/driver-ops.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 28d022a3eee30..ae4f0be3b393b 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1195,6 +1195,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
 
+	if (local->in_reconfig)
+		return;
+
 	if (!check_sdata_in_driver(sdata))
 		return;
 
-- 
GitLab


From 90abf96abd9bb00f36c8d3640255e6bfa73f7495 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 25 Feb 2019 12:38:49 +0000
Subject: [PATCH 0330/1861] cfg80211: Use kmemdup in cfg80211_gen_new_ie()

Use kmemdup rather than duplicating its implementation

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 287518c6caa40..04d888628f29d 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -190,10 +190,9 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
 	/* copy subelement as we need to change its content to
 	 * mark an ie after it is processed.
 	 */
-	sub_copy = kmalloc(subie_len, gfp);
+	sub_copy = kmemdup(subelement, subie_len, gfp);
 	if (!sub_copy)
 		return 0;
-	memcpy(sub_copy, subelement, subie_len);
 
 	pos = &new_ie[0];
 
-- 
GitLab


From d6db02a88a4aaa1cd7105137c67ddec7f3bdbc05 Mon Sep 17 00:00:00 2001
From: Sunil Dutt <usdutt@codeaurora.org>
Date: Mon, 25 Feb 2019 15:37:20 +0530
Subject: [PATCH 0331/1861] nl80211: Add NL80211_FLAG_CLEAR_SKB flag for other
 NL commands

This commit adds NL80211_FLAG_CLEAR_SKB flag to other NL commands
that carry key data to ensure they do not stick around on heap
after the SKB is freed.

Also introduced this flag for NL80211_CMD_VENDOR as there are sub
commands which configure the keys.

Signed-off-by: Sunil Dutt <usdutt@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 25a9e3b5c1542..47e30a58566c2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13650,7 +13650,8 @@ static const struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_DEAUTHENTICATE,
@@ -13701,7 +13702,8 @@ static const struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
@@ -13709,7 +13711,8 @@ static const struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_DISCONNECT,
@@ -13738,7 +13741,8 @@ static const struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_PMKSA,
@@ -14090,7 +14094,8 @@ static const struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_UNS_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_SET_QOS_MAP,
@@ -14145,7 +14150,8 @@ static const struct genl_ops nl80211_ops[] = {
 		.doit = nl80211_set_pmk,
 		.policy = nl80211_policy,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-				  NL80211_FLAG_NEED_RTNL,
+				  NL80211_FLAG_NEED_RTNL |
+				  NL80211_FLAG_CLEAR_SKB,
 	},
 	{
 		.cmd = NL80211_CMD_DEL_PMK,
-- 
GitLab


From 5b989c18dab2e82bac8a5564a174794bf84b20e6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 15 Mar 2019 11:03:35 +0100
Subject: [PATCH 0332/1861] mac80211: rework locking for txq scheduling /
 airtime fairness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Holding the lock around the entire duration of tx scheduling can create
some nasty lock contention, especially when processing airtime information
from the tx status or the rx path.
Improve locking by only holding the active_txq_lock for lookups / scheduling
list modifications.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 49 ++++++++++++++++--------------------------
 net/mac80211/tx.c      | 44 ++++++++++++++-----------------------
 2 files changed, 35 insertions(+), 58 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ac2ed8ec662bd..616998252dc7e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6231,8 +6231,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @ac: AC number to return packets from.
  *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
  * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
  * is returned, it should be returned with ieee80211_return_txq() after the
  * driver has finished scheduling it.
@@ -6240,51 +6238,42 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac);
 
 /**
- * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
- *
- * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @txq: pointer obtained from station or virtual interface
- *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
- */
-void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
-
-/**
- * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC
+ * ieee80211_txq_schedule_start - start new scheduling round for TXQs
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @ac: AC number to acquire locks for
  *
- * Acquire locks needed to schedule TXQs from the given AC. Should be called
- * before ieee80211_next_txq() or ieee80211_return_txq().
+ * Should be called before ieee80211_next_txq() or ieee80211_return_txq().
+ * The driver must not call multiple TXQ scheduling rounds concurrently.
  */
-void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
-	__acquires(txq_lock);
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac);
+
+/* (deprecated) */
+static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
+{
+}
 
 /**
- * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC
+ * ieee80211_schedule_txq - schedule a TXQ for transmission
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @ac: AC number to acquire locks for
+ * @txq: pointer obtained from station or virtual interface
  *
- * Release locks previously acquired by ieee80211_txq_schedule_end().
+ * Schedules a TXQ for transmission if it is not already scheduled.
  */
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
-	__releases(txq_lock);
+void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
 
 /**
- * ieee80211_schedule_txq - schedule a TXQ for transmission
+ * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @txq: pointer obtained from station or virtual interface
- *
- * Schedules a TXQ for transmission if it is not already scheduled. Takes a
- * lock, which means it must *not* be called between
- * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end()
  */
-void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
-	__acquires(txq_lock) __releases(txq_lock);
+static inline void
+ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+{
+	ieee80211_schedule_txq(hw, txq);
+}
 
 /**
  * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5f546de10d960..134a3da147c6d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3649,16 +3649,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_txq *ret = NULL;
 	struct txq_info *txqi = NULL;
 
-	lockdep_assert_held(&local->active_txq_lock[ac]);
+	spin_lock_bh(&local->active_txq_lock[ac]);
 
  begin:
 	txqi = list_first_entry_or_null(&local->active_txqs[ac],
 					struct txq_info,
 					schedule_order);
 	if (!txqi)
-		return NULL;
+		goto out;
 
 	if (txqi->txq.sta) {
 		struct sta_info *sta = container_of(txqi->txq.sta,
@@ -3675,21 +3676,25 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 
 
 	if (txqi->schedule_round == local->schedule_round[ac])
-		return NULL;
+		goto out;
 
 	list_del_init(&txqi->schedule_order);
 	txqi->schedule_round = local->schedule_round[ac];
-	return &txqi->txq;
+	ret = &txqi->txq;
+
+out:
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+	return ret;
 }
 EXPORT_SYMBOL(ieee80211_next_txq);
 
-void ieee80211_return_txq(struct ieee80211_hw *hw,
-			  struct ieee80211_txq *txq)
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			    struct ieee80211_txq *txq)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct txq_info *txqi = to_txq_info(txq);
 
-	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+	spin_lock_bh(&local->active_txq_lock[txq->ac]);
 
 	if (list_empty(&txqi->schedule_order) &&
 	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
@@ -3709,17 +3714,7 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
 			list_add_tail(&txqi->schedule_order,
 				      &local->active_txqs[txq->ac]);
 	}
-}
-EXPORT_SYMBOL(ieee80211_return_txq);
 
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
-			    struct ieee80211_txq *txq)
-	__acquires(txq_lock) __releases(txq_lock)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-
-	spin_lock_bh(&local->active_txq_lock[txq->ac]);
-	ieee80211_return_txq(hw, txq);
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
 EXPORT_SYMBOL(ieee80211_schedule_txq);
@@ -3732,7 +3727,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 	struct sta_info *sta;
 	u8 ac = txq->ac;
 
-	lockdep_assert_held(&local->active_txq_lock[ac]);
+	spin_lock_bh(&local->active_txq_lock[ac]);
 
 	if (!txqi->txq.sta)
 		goto out;
@@ -3762,34 +3757,27 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 
 	sta->airtime[ac].deficit += sta->airtime_weight;
 	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+	spin_unlock_bh(&local->active_txq_lock[ac]);
 
 	return false;
 out:
 	if (!list_empty(&txqi->schedule_order))
 		list_del_init(&txqi->schedule_order);
+	spin_unlock_bh(&local->active_txq_lock[ac]);
 
 	return true;
 }
 EXPORT_SYMBOL(ieee80211_txq_may_transmit);
 
 void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
-	__acquires(txq_lock)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	spin_lock_bh(&local->active_txq_lock[ac]);
 	local->schedule_round[ac]++;
-}
-EXPORT_SYMBOL(ieee80211_txq_schedule_start);
-
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
-	__releases(txq_lock)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-
 	spin_unlock_bh(&local->active_txq_lock[ac]);
 }
-EXPORT_SYMBOL(ieee80211_txq_schedule_end);
+EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
-- 
GitLab


From 436b0a583af0497b4b84cb73c12f8bd620f868af Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Tue, 26 Mar 2019 16:29:30 -0500
Subject: [PATCH 0333/1861] EDAC/altera: Do less intrusive error injection

Improve the Arria10 and Stratix10 error injection routine
by reading the data and changing just 1 bit before writing
back out. Previous routine would overwrite the first bytes
to 0 then change 1 bit but this method is less intrusive.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/1553635771-32693-1-git-send-email-thor.thayer@linux.intel.com
---
 drivers/edac/altera_edac.c | 30 +++++++++++++-----------------
 drivers/edac/altera_edac.h |  2 +-
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 5ff263850cc71..acb3006df6315 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1762,28 +1762,24 @@ static ssize_t altr_edac_a10_device_trig2(struct file *file,
 	if (trig_type == ALTR_UE_TRIGGER_CHAR) {
 		writel(priv->ue_set_mask, set_addr);
 	} else {
-		/* Setup write of 0 to first 4 bytes */
-		writel(0x0, drvdata->base + ECC_BLK_WDATA0_OFST);
-		writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST);
-		writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST);
-		writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST);
-		/* Setup write of 4 bytes */
+		/* Setup read/write of 4 bytes */
 		writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST);
 		/* Setup Address to 0 */
-		writel(0x0, drvdata->base + ECC_BLK_ADDRESS_OFST);
-		/* Setup accctrl to write & data override */
-		writel(ECC_WRITE_DOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
-		/* Kick it. */
-		writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST);
-		/* Setup accctrl to read & ecc override */
-		writel(ECC_READ_EOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
+		writel(0, drvdata->base + ECC_BLK_ADDRESS_OFST);
+		/* Setup accctrl to read & ecc & data override */
+		writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
 		/* Kick it. */
 		writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST);
 		/* Setup write for single bit change */
-		writel(0x1, drvdata->base + ECC_BLK_WDATA0_OFST);
-		writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST);
-		writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST);
-		writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST);
+		writel(readl(drvdata->base + ECC_BLK_RDATA0_OFST) ^ 0x1,
+		       drvdata->base + ECC_BLK_WDATA0_OFST);
+		writel(readl(drvdata->base + ECC_BLK_RDATA1_OFST),
+		       drvdata->base + ECC_BLK_WDATA1_OFST);
+		writel(readl(drvdata->base + ECC_BLK_RDATA2_OFST),
+		       drvdata->base + ECC_BLK_WDATA2_OFST);
+		writel(readl(drvdata->base + ECC_BLK_RDATA3_OFST),
+		       drvdata->base + ECC_BLK_WDATA3_OFST);
+
 		/* Copy Read ECC to Write ECC */
 		writel(readl(drvdata->base + ECC_BLK_RECC0_OFST),
 		       drvdata->base + ECC_BLK_WECC0_OFST);
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 60513f201ffb4..1532ec9c3510f 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -321,7 +321,7 @@ struct altr_sdram_mc_data {
 #define ECC_BLK_STARTACC_OFST             0x7C
 
 #define ECC_XACT_KICK                     0x10000
-#define ECC_WORD_WRITE                    0xF
+#define ECC_WORD_WRITE                    0xFF
 #define ECC_WRITE_DOVR                    0x101
 #define ECC_WRITE_EDOVR                   0x103
 #define ECC_READ_EOVR                     0x2
-- 
GitLab


From 788586efd116d6d7d05985881eda503333e702b4 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Tue, 26 Mar 2019 16:29:31 -0500
Subject: [PATCH 0334/1861] EDAC/altera: Initialize peripheral FIFOs in probe()

The FIFO memory and ECC initialization doesn't need to be
done as a separate operation early in the startup.

Improve the Arria10 and Stratix10 peripheral FIFO init
by initializing memory and enabling ECC as part of the
device driver initialization.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/1553635771-32693-2-git-send-email-thor.thayer@linux.intel.com
---
 drivers/edac/altera_edac.c | 162 +++++++++++++++++++++----------------
 1 file changed, 91 insertions(+), 71 deletions(-)

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index acb3006df6315..761199175c763 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1361,8 +1361,19 @@ static const struct edac_device_prv_data a10_l2ecc_data = {
 
 #ifdef CONFIG_EDAC_ALTERA_ETHERNET
 
+static int __init socfpga_init_ethernet_ecc(struct altr_edac_device_dev *dev)
+{
+	int ret;
+
+	ret = altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc");
+	if (ret)
+		return ret;
+
+	return altr_check_ecc_deps(dev);
+}
+
 static const struct edac_device_prv_data a10_enetecc_data = {
-	.setup = altr_check_ecc_deps,
+	.setup = socfpga_init_ethernet_ecc,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
 	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1374,21 +1385,25 @@ static const struct edac_device_prv_data a10_enetecc_data = {
 	.inject_fops = &altr_edac_a10_device_inject2_fops,
 };
 
-static int __init socfpga_init_ethernet_ecc(void)
-{
-	return altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc");
-}
-
-early_initcall(socfpga_init_ethernet_ecc);
-
 #endif	/* CONFIG_EDAC_ALTERA_ETHERNET */
 
 /********************** NAND Device Functions **********************/
 
 #ifdef CONFIG_EDAC_ALTERA_NAND
 
+static int __init socfpga_init_nand_ecc(struct altr_edac_device_dev *device)
+{
+	int ret;
+
+	ret = altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc");
+	if (ret)
+		return ret;
+
+	return altr_check_ecc_deps(device);
+}
+
 static const struct edac_device_prv_data a10_nandecc_data = {
-	.setup = altr_check_ecc_deps,
+	.setup = socfpga_init_nand_ecc,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
 	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1400,21 +1415,25 @@ static const struct edac_device_prv_data a10_nandecc_data = {
 	.inject_fops = &altr_edac_a10_device_inject_fops,
 };
 
-static int __init socfpga_init_nand_ecc(void)
-{
-	return altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc");
-}
-
-early_initcall(socfpga_init_nand_ecc);
-
 #endif	/* CONFIG_EDAC_ALTERA_NAND */
 
 /********************** DMA Device Functions **********************/
 
 #ifdef CONFIG_EDAC_ALTERA_DMA
 
+static int __init socfpga_init_dma_ecc(struct altr_edac_device_dev *device)
+{
+	int ret;
+
+	ret = altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc");
+	if (ret)
+		return ret;
+
+	return altr_check_ecc_deps(device);
+}
+
 static const struct edac_device_prv_data a10_dmaecc_data = {
-	.setup = altr_check_ecc_deps,
+	.setup = socfpga_init_dma_ecc,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
 	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1426,21 +1445,25 @@ static const struct edac_device_prv_data a10_dmaecc_data = {
 	.inject_fops = &altr_edac_a10_device_inject_fops,
 };
 
-static int __init socfpga_init_dma_ecc(void)
-{
-	return altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc");
-}
-
-early_initcall(socfpga_init_dma_ecc);
-
 #endif	/* CONFIG_EDAC_ALTERA_DMA */
 
 /********************** USB Device Functions **********************/
 
 #ifdef CONFIG_EDAC_ALTERA_USB
 
+static int __init socfpga_init_usb_ecc(struct altr_edac_device_dev *device)
+{
+	int ret;
+
+	ret = altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc");
+	if (ret)
+		return ret;
+
+	return altr_check_ecc_deps(device);
+}
+
 static const struct edac_device_prv_data a10_usbecc_data = {
-	.setup = altr_check_ecc_deps,
+	.setup = socfpga_init_usb_ecc,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
 	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1452,21 +1475,25 @@ static const struct edac_device_prv_data a10_usbecc_data = {
 	.inject_fops = &altr_edac_a10_device_inject2_fops,
 };
 
-static int __init socfpga_init_usb_ecc(void)
-{
-	return altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc");
-}
-
-early_initcall(socfpga_init_usb_ecc);
-
 #endif	/* CONFIG_EDAC_ALTERA_USB */
 
 /********************** QSPI Device Functions **********************/
 
 #ifdef CONFIG_EDAC_ALTERA_QSPI
 
+static int __init socfpga_init_qspi_ecc(struct altr_edac_device_dev *device)
+{
+	int ret;
+
+	ret = altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc");
+	if (ret)
+		return ret;
+
+	return altr_check_ecc_deps(device);
+}
+
 static const struct edac_device_prv_data a10_qspiecc_data = {
-	.setup = altr_check_ecc_deps,
+	.setup = socfpga_init_qspi_ecc,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
 	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1478,13 +1505,6 @@ static const struct edac_device_prv_data a10_qspiecc_data = {
 	.inject_fops = &altr_edac_a10_device_inject_fops,
 };
 
-static int __init socfpga_init_qspi_ecc(void)
-{
-	return altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc");
-}
-
-early_initcall(socfpga_init_qspi_ecc);
-
 #endif	/* CONFIG_EDAC_ALTERA_QSPI */
 
 /********************* SDMMC Device Functions **********************/
@@ -1593,6 +1613,35 @@ err_release_group_1:
 	return rc;
 }
 
+static int __init socfpga_init_sdmmc_ecc(struct altr_edac_device_dev *device)
+{
+	int rc = -ENODEV;
+	struct device_node *child;
+
+	child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
+	if (!child)
+		return -ENODEV;
+
+	if (!of_device_is_available(child))
+		goto exit;
+
+	if (validate_parent_available(child))
+		goto exit;
+
+	/* Init portB */
+	rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK,
+				     a10_sdmmceccb_data.ecc_enable_mask, 1);
+	if (rc)
+		goto exit;
+
+	/* Setup portB */
+	return altr_portb_setup(device);
+
+exit:
+	of_node_put(child);
+	return rc;
+}
+
 static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id)
 {
 	struct altr_edac_device_dev *ad = dev_id;
@@ -1617,7 +1666,7 @@ static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id)
 }
 
 static const struct edac_device_prv_data a10_sdmmcecca_data = {
-	.setup = altr_portb_setup,
+	.setup = socfpga_init_sdmmc_ecc,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
 	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1630,7 +1679,7 @@ static const struct edac_device_prv_data a10_sdmmcecca_data = {
 };
 
 static const struct edac_device_prv_data a10_sdmmceccb_data = {
-	.setup = altr_portb_setup,
+	.setup = socfpga_init_sdmmc_ecc,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENB,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENB,
 	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1642,35 +1691,6 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = {
 	.inject_fops = &altr_edac_a10_device_inject_fops,
 };
 
-static int __init socfpga_init_sdmmc_ecc(void)
-{
-	int rc = -ENODEV;
-	struct device_node *child;
-
-	if (!socfpga_is_a10() && !socfpga_is_s10())
-		return -ENODEV;
-
-	child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
-	if (!child) {
-		edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n");
-		return -ENODEV;
-	}
-
-	if (!of_device_is_available(child))
-		goto exit;
-
-	if (validate_parent_available(child))
-		goto exit;
-
-	rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK,
-				     a10_sdmmcecca_data.ecc_enable_mask, 1);
-exit:
-	of_node_put(child);
-	return rc;
-}
-
-early_initcall(socfpga_init_sdmmc_ecc);
-
 #endif	/* CONFIG_EDAC_ALTERA_SDMMC */
 
 /********************* Arria10 EDAC Device Functions *************************/
-- 
GitLab


From fe61692886669bbcc260f980903eacb4ddebaf59 Mon Sep 17 00:00:00 2001
From: Rodrigo Siqueira <rodrigosiqueiramelo@gmail.com>
Date: Thu, 14 Mar 2019 15:48:45 -0300
Subject: [PATCH 0335/1861] drm/atomic-helper: Make atomic_enable/disable crtc
 callbacks optional

Allow atomic_enable and atomic_disable operations from
drm_crtc_helper_funcs struct optional. With this, the target display
drivers don't need to define a dummy function if they don't need one.

Changes since v2:
* Don't make funcs optional
* Update kerneldoc for atomic_enable/disable
* Replace "if (funcs->atomic_enable)" by "if (funcs->commit)"
* Improve commit message

Signed-off-by: Rodrigo Siqueira <rodrigosiqueiramelo@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20190314184845.gjmvkamobj4dilyp@smtp.gmail.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/drm_atomic_helper.c      | 5 ++---
 include/drm/drm_modeset_helper_vtables.h | 4 ++++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 40ac198480345..fbb76332cc9f1 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1034,7 +1034,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 			funcs->atomic_disable(crtc, old_crtc_state);
 		else if (funcs->disable)
 			funcs->disable(crtc);
-		else
+		else if (funcs->dpms)
 			funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
 
 		if (!(dev->irq_enabled && dev->num_crtcs))
@@ -1277,10 +1277,9 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 		if (new_crtc_state->enable) {
 			DRM_DEBUG_ATOMIC("enabling [CRTC:%d:%s]\n",
 					 crtc->base.id, crtc->name);
-
 			if (funcs->atomic_enable)
 				funcs->atomic_enable(crtc, old_crtc_state);
-			else
+			else if (funcs->commit)
 				funcs->commit(crtc);
 		}
 	}
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index cfb7be40bed7a..ce4de6b1e444a 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -418,6 +418,8 @@ struct drm_crtc_helper_funcs {
 	 * Drivers can use the @old_crtc_state input parameter if the operations
 	 * needed to enable the CRTC don't depend solely on the new state but
 	 * also on the transition between the old state and the new state.
+	 *
+	 * This function is optional.
 	 */
 	void (*atomic_enable)(struct drm_crtc *crtc,
 			      struct drm_crtc_state *old_crtc_state);
@@ -441,6 +443,8 @@ struct drm_crtc_helper_funcs {
 	 * parameter @old_crtc_state which could be used to access the old
 	 * state. Atomic drivers should consider to use this one instead
 	 * of @disable.
+	 *
+	 * This function is optional.
 	 */
 	void (*atomic_disable)(struct drm_crtc *crtc,
 			       struct drm_crtc_state *old_crtc_state);
-- 
GitLab


From aba0954327c831f593702e3a81ef3ad4bec7a838 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 29 Mar 2019 11:28:52 +0100
Subject: [PATCH 0336/1861] tick/broadcast: Fix warning about undefined
 tick_broadcast_oneshot_offline()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Randconfig builds with

  CONFIG_TICK_ONESHOT=y
  CONFIG_HOTPLUG_CPU=n

trigger

  kernel/time/tick-broadcast.c:39:13: warning: ‘tick_broadcast_oneshot_offline’ \
	  declared ‘static’ but never defined [-Wunused-function]

due to that function's definition missing.

Move the CONFIG_HOTPLUG_CPU ifdeffery around its declaration too.

Fixes: 1b72d4323798 ("tick: Remove outgoing CPU from broadcast masks")
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190329110508.6621-1-bp@alien8.de
---
 kernel/time/tick-broadcast.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 0283523de045c..7541cbca695eb 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -36,12 +36,16 @@ static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+# ifdef CONFIG_HOTPLUG_CPU
 static void tick_broadcast_oneshot_offline(unsigned int cpu);
+# endif
 #else
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
+# ifdef CONFIG_HOTPLUG_CPU
 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
+# endif
 #endif
 
 /*
-- 
GitLab


From a72a19327b92e09dab0eb9fd2bc83466465cbffb Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 29 Mar 2019 00:09:39 +0100
Subject: [PATCH 0337/1861] x86/mm/tlb: Define LOADED_MM_SWITCHING with
 pointer-sized number

sparse complains that LOADED_MM_SWITCHING's definition casts an int to a
pointer:

  arch/x86/mm/tlb.c:409:17: warning: non size-preserving integer to pointer cast

Use a pointer-sized integer constant instead.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190328230939.15711-1-jannh@google.com
---
 arch/x86/include/asm/tlbflush.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index f4204bf377fcf..90926e8dd1f8c 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -167,7 +167,7 @@ struct tlb_state {
 	 */
 	struct mm_struct *loaded_mm;
 
-#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL)
 
 	/* Last user mm for optimizing IBPB */
 	union {
-- 
GitLab


From 872e192fab643887f143106eb56443d87e5e87c1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 27 Mar 2019 18:11:03 +0000
Subject: [PATCH 0338/1861] scsi: qedi: remove declaration of nvm_image from
 stack

The nvm_image is a large struct qedi_nvm_iscsi_image object of over 24K so
don't declare it on the stack just for a sizeof requirement; use sizeof on
struct qedi_nvm_iscsi_image instead.

Fixes: c77a2fa3ff8f ("scsi: qedi: Add the CRC size within iSCSI NVM image")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_main.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index e74a62448ba46..e5db9a9954dc0 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1392,10 +1392,8 @@ static void qedi_free_nvm_iscsi_cfg(struct qedi_ctx *qedi)
 
 static int qedi_alloc_nvm_iscsi_cfg(struct qedi_ctx *qedi)
 {
-	struct qedi_nvm_iscsi_image nvm_image;
-
 	qedi->iscsi_image = dma_alloc_coherent(&qedi->pdev->dev,
-					       sizeof(nvm_image),
+					       sizeof(struct qedi_nvm_iscsi_image),
 					       &qedi->nvm_buf_dma, GFP_KERNEL);
 	if (!qedi->iscsi_image) {
 		QEDI_ERR(&qedi->dbg_ctx, "Could not allocate NVM BUF.\n");
@@ -2236,14 +2234,13 @@ static void qedi_boot_release(void *data)
 static int qedi_get_boot_info(struct qedi_ctx *qedi)
 {
 	int ret = 1;
-	struct qedi_nvm_iscsi_image nvm_image;
 
 	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
 		  "Get NVM iSCSI CFG image\n");
 	ret = qedi_ops->common->nvm_get_image(qedi->cdev,
 					      QED_NVM_IMAGE_ISCSI_CFG,
 					      (char *)qedi->iscsi_image,
-					      sizeof(nvm_image));
+					      sizeof(struct qedi_nvm_iscsi_image));
 	if (ret)
 		QEDI_ERR(&qedi->dbg_ctx,
 			 "Could not get NVM image. ret = %d\n", ret);
-- 
GitLab


From 39f0584ee695f1d75f4181b209602b9c49e73821 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 12 Mar 2019 19:39:23 +0100
Subject: [PATCH 0339/1861] x86/mce: Remove mce_report_event()

Calling this function has been wrong for a while now:

* Can't call schedule_work() in #MC context.

* mce_notify_irq() either.

* None of that noodling is needed anymore - all it needs to do is kick
the IRQ work which would self-IPI so that once the #MC handler is done,
the work queue will run and process queued MCE records.

So remove it.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190325172121.7926-1-bp@alien8.de
---
 arch/x86/kernel/cpu/mce/core.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index c3498732ba287..58925e7ccb276 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -460,23 +460,6 @@ static void mce_irq_work_cb(struct irq_work *entry)
 	mce_schedule_work();
 }
 
-static void mce_report_event(struct pt_regs *regs)
-{
-	if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
-		mce_notify_irq();
-		/*
-		 * Triggering the work queue here is just an insurance
-		 * policy in case the syscall exit notify handler
-		 * doesn't run soon enough or ends up running on the
-		 * wrong CPU (can happen when audit sleeps)
-		 */
-		mce_schedule_work();
-		return;
-	}
-
-	irq_work_queue(&mce_irq_work);
-}
-
 /*
  * Check if the address reported by the CPU is in a format we can parse.
  * It would be possible to add code for most other cases, but all would
@@ -1331,7 +1314,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		mce_panic("Fatal machine check on current CPU", &m, msg);
 
 	if (worst > 0)
-		mce_report_event(regs);
+		irq_work_queue(&mce_irq_work);
+
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 
 	sync_core();
-- 
GitLab


From a165dcc923ada2ffdee1d4f41f12f81b66d04c55 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 11 Mar 2019 17:57:30 +0800
Subject: [PATCH 0340/1861] hwmon: (w83773g) Select REGMAP_I2C to fix build
 error

Select REGMAP_I2C to avoid below build error:
ERROR: "__devm_regmap_init_i2c" [drivers/hwmon/w83773g.ko] undefined!

Fixes: ee249f271524 ("hwmon: Add W83773G driver")
Cc: stable@vger.kernel.org
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 6f929bfa9fcd3..d0f1dfe2bcbbd 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1759,6 +1759,7 @@ config SENSORS_VT8231
 config SENSORS_W83773G
 	tristate "Nuvoton W83773G"
 	depends on I2C
+	select REGMAP_I2C
 	help
 	  If you say yes here you get support for the Nuvoton W83773G hardware
 	  monitoring chip.
-- 
GitLab


From 8e6af454117a51dbf6c8a47c00180a0c235052fe Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Tue, 19 Mar 2019 16:01:58 -0500
Subject: [PATCH 0341/1861] hwmon: (occ) Fix power sensor indexing

In the case of power sensor version 0xA0, the sensor indexing overlapped
with the "caps" power sensors, resulting in probe failure and kernel
warnings. Fix this by specifying the next index for each power sensor
version.

Fixes: 54076cb3b5ff ("hwmon (occ): Add sensor attributes and register ...")
Cc: stable@vger.kernel.org
Signed-off-by: Eddie James <eajames@linux.ibm.com>
Tested-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/occ/common.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index b91a80abf724d..4679acb4918e7 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -890,6 +890,8 @@ static int occ_setup_sensor_attrs(struct occ *occ)
 				s++;
 			}
 		}
+
+		s = (sensors->power.num_sensors * 4) + 1;
 	} else {
 		for (i = 0; i < sensors->power.num_sensors; ++i) {
 			s = i + 1;
@@ -918,11 +920,11 @@ static int occ_setup_sensor_attrs(struct occ *occ)
 						     show_power, NULL, 3, i);
 			attr++;
 		}
-	}
 
-	if (sensors->caps.num_sensors >= 1) {
 		s = sensors->power.num_sensors + 1;
+	}
 
+	if (sensors->caps.num_sensors >= 1) {
 		snprintf(attr->name, sizeof(attr->name), "power%d_label", s);
 		attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL,
 					     0, 0);
-- 
GitLab


From 5fd43ddbec7623441239d247155a30b69e51bea1 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 20 Mar 2019 10:32:58 -0700
Subject: [PATCH 0342/1861] hwmon: (ntc_thermistor) Fix temperature type
 reporting

Commit 7cc7de93fad4 ("hwmon: (ntc_thermistor) Convert to new hwmon API")
converted the driver to use the new hwmon API, but introduced a subtle
error: The temperature type is no longer reported as temp1_type, but as
temp2_type.

Fixes: 7cc7de93fad4 ("hwmon: (ntc_thermistor) Convert to new hwmon API")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ntc_thermistor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index e4f9f7ce92fab..f9abeeeead9e9 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -640,7 +640,7 @@ static const struct hwmon_channel_info ntc_chip = {
 };
 
 static const u32 ntc_temp_config[] = {
-	HWMON_T_INPUT, HWMON_T_TYPE,
+	HWMON_T_INPUT | HWMON_T_TYPE,
 	0
 };
 
-- 
GitLab


From d3b018f757560ab5c2bce0e7f46e0c1510d7afd4 Mon Sep 17 00:00:00 2001
From: Carlos Menin <menin@carlosaurelio.net>
Date: Wed, 13 Mar 2019 11:11:26 -0300
Subject: [PATCH 0343/1861] dt-bindings: hwmon: (adc128d818) Specify ti,mode
 property size

By default, cells in DT are 32-bit in size. The driver reads "ti,mode"
using the function of_property_read_u8() which causes the value to be
read incorrectly in little-endian architectures if the size is not
specified.

Make it explicit in the binding documentation that this prorperty must
be set as a 8-bit value.

Signed-off-by: Carlos Menin <menin@carlosaurelio.net>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/hwmon/adc128d818.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/hwmon/adc128d818.txt b/Documentation/devicetree/bindings/hwmon/adc128d818.txt
index 08bab0e94d25a..d0ae46d7bac37 100644
--- a/Documentation/devicetree/bindings/hwmon/adc128d818.txt
+++ b/Documentation/devicetree/bindings/hwmon/adc128d818.txt
@@ -26,7 +26,7 @@ Required node properties:
 
 Optional node properties:
 
- - ti,mode:     Operation mode (see above).
+ - ti,mode:     Operation mode (u8) (see above).
 
 
 Example (operation mode 2):
@@ -34,5 +34,5 @@ Example (operation mode 2):
 	adc128d818@1d {
 		compatible = "ti,adc128d818";
 		reg = <0x1d>;
-		ti,mode = <2>;
+		ti,mode = /bits/ 8 <2>;
 	};
-- 
GitLab


From ae37a8cd9b0ad3416d71e54cfaeb3744178189a8 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 7 Mar 2019 15:54:51 +0100
Subject: [PATCH 0344/1861] x86/cpufeature: Remove __pure attribute to
 _static_cpu_has()

__pure is used to make gcc do Common Subexpression Elimination (CSE)
and thus save subsequent invocations of a function which does a complex
computation (without side effects). As a simple example:

  bool a = _static_cpu_has(x);
  bool b = _static_cpu_has(x);

gets turned into

  bool a = _static_cpu_has(x);
  bool b = a;

However, gcc doesn't do CSE with asm()s when those get inlined - like it
is done with _static_cpu_has() - because, for example, the t_yes/t_no
labels are different for each inlined function body and thus cannot be
detected as equivalent anymore for the CSE heuristic to hit.

However, this all is beside the point because best it should be avoided
to have more than one call to _static_cpu_has(X) in the same function
due to the fact that each such call is an alternatives patch site and it
is simply pointless.

Therefore, drop the __pure attribute as it is not doing anything.

Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190307151036.GD26566@zn.tnic
---
 arch/x86/include/asm/cpufeature.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ce95b8cbd2296..2fb791a1b479b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -159,7 +159,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  * These will statically patch the target code for additional
  * performance.
  */
-static __always_inline __pure bool _static_cpu_has(u16 bit)
+static __always_inline bool _static_cpu_has(u16 bit)
 {
 	asm_volatile_goto("1: jmp 6f\n"
 		 "2:\n"
-- 
GitLab


From 676e4a6fe703f2dae699ee9d56f14516f9ada4ea Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Fri, 29 Mar 2019 10:18:00 +0100
Subject: [PATCH 0345/1861] xdp: fix cpumap redirect SKB creation bug

We want to avoid leaking pointer info from xdp_frame (that is placed in
top of frame) like commit 6dfb970d3dbd ("xdp: avoid leaking info stored in
frame data on page reuse"), and followup commit 97e19cce05e5 ("bpf:
reserve xdp_frame size in xdp headroom") that reserve this headroom.

These changes also affected how cpumap constructed SKBs, as xdpf->headroom
size changed, the skb data starting point were in-effect shifted with 32
bytes (sizeof xdp_frame). This was still okay, as the cpumap frame_size
calculation also included xdpf->headroom which were reduced by same amount.

A bug was introduced in commit 77ea5f4cbe20 ("bpf/cpumap: make sure
frame_size for build_skb is aligned if headroom isn't"), where the
xdpf->headroom became part of the SKB_DATA_ALIGN rounding up. This
round-up to find the frame_size is in principle still correct as it does
not exceed the 2048 bytes frame_size (which is max for ixgbe and i40e),
but the 32 bytes offset of pkt_data_start puts this over the 2048 bytes
limit. This cause skb_shared_info to spill into next frame. It is a little
hard to trigger, as the SKB need to use above 15 skb_shinfo->frags[] as
far as I calculate. This does happen in practise for TCP streams when
skb_try_coalesce() kicks in.

KASAN can be used to detect these wrong memory accesses, I've seen:
 BUG: KASAN: use-after-free in skb_try_coalesce+0x3cb/0x760
 BUG: KASAN: wild-memory-access in skb_release_data+0xe2/0x250

Driver veth also construct a SKB from xdp_frame in this way, but is not
affected, as it doesn't reserve/deduct the room (used by xdp_frame) from
the SKB headroom. Instead is clears the pointers via xdp_scrub_frame(),
and allows SKB to use this area.

The fix in this patch is to do like veth and instead allow SKB to (re)use
the area occupied by xdp_frame, by clearing via xdp_scrub_frame().  (This
does kill the idea of the SKB being able to access (mem) info from this
area, but I guess it was a bad idea anyhow, and it was already killed by
the veth changes.)

Fixes: 77ea5f4cbe20 ("bpf/cpumap: make sure frame_size for build_skb is aligned if headroom isn't")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/cpumap.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8974b3755670e..3c18260403dde 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work)
 static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 					 struct xdp_frame *xdpf)
 {
+	unsigned int hard_start_headroom;
 	unsigned int frame_size;
 	void *pkt_data_start;
 	struct sk_buff *skb;
 
+	/* Part of headroom was reserved to xdpf */
+	hard_start_headroom = sizeof(struct xdp_frame) +  xdpf->headroom;
+
 	/* build_skb need to place skb_shared_info after SKB end, and
 	 * also want to know the memory "truesize".  Thus, need to
 	 * know the memory frame size backing xdp_buff.
@@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	 * is not at a fixed memory location, with mixed length
 	 * packets, which is bad for cache-line hotness.
 	 */
-	frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) +
+	frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) +
 		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-	pkt_data_start = xdpf->data - xdpf->headroom;
+	pkt_data_start = xdpf->data - hard_start_headroom;
 	skb = build_skb(pkt_data_start, frame_size);
 	if (!skb)
 		return NULL;
 
-	skb_reserve(skb, xdpf->headroom);
+	skb_reserve(skb, hard_start_headroom);
 	__skb_put(skb, xdpf->len);
 	if (xdpf->metasize)
 		skb_metadata_set(skb, xdpf->metasize);
@@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	 * - RX ring dev queue index	(skb_record_rx_queue)
 	 */
 
+	/* Allow SKB to reuse area used by xdp_frame */
+	xdp_scrub_frame(xdpf);
+
 	return skb;
 }
 
-- 
GitLab


From e8b26b2135dedc0284490bfeac06dfc4418d0105 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Tue, 19 Mar 2019 11:24:38 +0200
Subject: [PATCH 0346/1861] net/mlx5: Decrease default mr cache size

Delete initialization of high order entries in mr cache to decrease initial
memory footprint. When required, the administrator can populate the
entries with memory keys via the /sys interface.

This approach is very helpful to significantly reduce the per HW function
memory footprint in virtualization environments such as SRIOV.

Fixes: 9603b61de1ee ("mlx5: Move pci device handling from mlx5_ib to mlx5_core")
Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reported-by:  Shalom Toledo <shalomt@mellanox.com>
Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/main.c    | 20 -------------------
 1 file changed, 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 70cc906a102b2..76716419370df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -164,26 +164,6 @@ static struct mlx5_profile profile[] = {
 			.size	= 8,
 			.limit	= 4
 		},
-		.mr_cache[16]	= {
-			.size	= 8,
-			.limit	= 4
-		},
-		.mr_cache[17]	= {
-			.size	= 8,
-			.limit	= 4
-		},
-		.mr_cache[18]	= {
-			.size	= 8,
-			.limit	= 4
-		},
-		.mr_cache[19]	= {
-			.size	= 4,
-			.limit	= 2
-		},
-		.mr_cache[20]	= {
-			.size	= 4,
-			.limit	= 2
-		},
 	},
 };
 
-- 
GitLab


From bc87a0036826a37b43489b029af8143bd07c6cca Mon Sep 17 00:00:00 2001
From: Gavi Teitz <gavi@mellanox.com>
Date: Mon, 11 Mar 2019 11:56:34 +0200
Subject: [PATCH 0347/1861] net/mlx5e: Fix error handling when refreshing TIRs

Previously, a false positive would be caught if the TIRs list is
empty, since the err value was initialized to -ENOMEM, and was only
updated if a TIR is refreshed. This is resolved by initializing the
err value to zero.

Fixes: b676f653896a ("net/mlx5e: Refactor refresh TIRs")
Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 3078491cc0d06..8100786f6fb5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -141,15 +141,17 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_tir *tir;
-	int err  = -ENOMEM;
+	int err  = 0;
 	u32 tirn = 0;
 	int inlen;
 	void *in;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
 	in = kvzalloc(inlen, GFP_KERNEL);
-	if (!in)
+	if (!in) {
+		err = -ENOMEM;
 		goto out;
+	}
 
 	if (enable_uc_lb)
 		MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
-- 
GitLab


From 8998576bd9c695ef1297540a50d7b3abbc69286b Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dmitrolin@mellanox.com>
Date: Mon, 4 Feb 2019 09:45:47 +0000
Subject: [PATCH 0348/1861] net/mlx5e: Allow IPv4 ttl & IPv6 hop_limit rewrite
 for all L4 protocols

For some protocols we are not allowing IP header rewrite offload, since
the HW is not capable to properly adjust the l4 checksum. However, TTL
& HOPLIMIT modification can be done for all IP protocols, because they
are not part of the pseudo header taken into account for checksum.

Fixes: 738678817573 ("drivers: net: use flow action infrastructure")
Signed-off-by: Dmytro Linkin <dmitrolin@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 52 +++++++++++++++++--
 1 file changed, 48 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index b4967a0ff8c7b..2b85f93a1c52b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2158,6 +2158,52 @@ static bool csum_offload_supported(struct mlx5e_priv *priv,
 	return true;
 }
 
+struct ip_ttl_word {
+	__u8	ttl;
+	__u8	protocol;
+	__sum16	check;
+};
+
+struct ipv6_hoplimit_word {
+	__be16	payload_len;
+	__u8	nexthdr;
+	__u8	hop_limit;
+};
+
+static bool is_action_keys_supported(const struct flow_action_entry *act)
+{
+	u32 mask, offset;
+	u8 htype;
+
+	htype = act->mangle.htype;
+	offset = act->mangle.offset;
+	mask = ~act->mangle.mask;
+	/* For IPv4 & IPv6 header check 4 byte word,
+	 * to determine that modified fields
+	 * are NOT ttl & hop_limit only.
+	 */
+	if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
+		struct ip_ttl_word *ttl_word =
+			(struct ip_ttl_word *)&mask;
+
+		if (offset != offsetof(struct iphdr, ttl) ||
+		    ttl_word->protocol ||
+		    ttl_word->check) {
+			return true;
+		}
+	} else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+		struct ipv6_hoplimit_word *hoplimit_word =
+			(struct ipv6_hoplimit_word *)&mask;
+
+		if (offset != offsetof(struct ipv6hdr, payload_len) ||
+		    hoplimit_word->payload_len ||
+		    hoplimit_word->nexthdr) {
+			return true;
+		}
+	}
+	return false;
+}
+
 static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
 					  struct flow_action *flow_action,
 					  u32 actions,
@@ -2165,9 +2211,9 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
 {
 	const struct flow_action_entry *act;
 	bool modify_ip_header;
-	u8 htype, ip_proto;
 	void *headers_v;
 	u16 ethertype;
+	u8 ip_proto;
 	int i;
 
 	if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)
@@ -2187,9 +2233,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
 		    act->id != FLOW_ACTION_ADD)
 			continue;
 
-		htype = act->mangle.htype;
-		if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4 ||
-		    htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
+		if (is_action_keys_supported(act)) {
 			modify_ip_header = true;
 			break;
 		}
-- 
GitLab


From 8e949363f017e2011464812a714fb29710fb95b4 Mon Sep 17 00:00:00 2001
From: Aditya Pakki <pakki001@umn.edu>
Date: Tue, 19 Mar 2019 16:42:40 -0500
Subject: [PATCH 0349/1861] net: mlx5: Add a missing check on idr_find, free
 buf

idr_find() can return a NULL value to 'flow' which is used without a
check. The patch adds a check to avoid potential NULL pointer dereference.

In case of mlx5_fpga_sbu_conn_sendmsg() failure, free buf allocated
using kzalloc.

Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines")
Signed-off-by: Aditya Pakki <pakki001@umn.edu>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index 5cf5f2a9d51fe..8de64e88c670c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -217,15 +217,21 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
 	void *cmd;
 	int ret;
 
+	rcu_read_lock();
+	flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+	rcu_read_unlock();
+
+	if (!flow) {
+		WARN_ONCE(1, "Received NULL pointer for handle\n");
+		return -EINVAL;
+	}
+
 	buf = kzalloc(size, GFP_ATOMIC);
 	if (!buf)
 		return -ENOMEM;
 
 	cmd = (buf + 1);
 
-	rcu_read_lock();
-	flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
-	rcu_read_unlock();
 	mlx5_fpga_tls_flow_to_cmd(flow, cmd);
 
 	MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
@@ -238,6 +244,8 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
 	buf->complete = mlx_tls_kfree_complete;
 
 	ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+	if (ret < 0)
+		kfree(buf);
 
 	return ret;
 }
-- 
GitLab


From 80a2a9026b24c6bd34b8d58256973e22270bedec Mon Sep 17 00:00:00 2001
From: Yuval Avnery <yuvalav@mellanox.com>
Date: Mon, 11 Mar 2019 06:18:24 +0200
Subject: [PATCH 0350/1861] net/mlx5e: Add a lock on tir list

Refresh tirs is looping over a global list of tirs while netdevs are
adding and removing tirs from that list. That is why a lock is
required.

Fixes: 724b2aa15126 ("net/mlx5e: TIRs management refactoring")
Signed-off-by: Yuval Avnery <yuvalav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 7 +++++++
 include/linux/mlx5/driver.h                         | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 8100786f6fb5c..1539cf3de5dc9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev,
 	if (err)
 		return err;
 
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 
 	return 0;
 }
@@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev,
 void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 		       struct mlx5e_tir *tir)
 {
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	mlx5_core_destroy_tir(mdev, tir->tirn);
 	list_del(&tir->list);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 }
 
 static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
@@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
 	}
 
 	INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+	mutex_init(&mdev->mlx5e_res.td.list_lock);
 
 	return 0;
 
@@ -159,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
 		tirn = tir->tirn;
 		err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
@@ -170,6 +176,7 @@ out:
 	kvfree(in);
 	if (err)
 		netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 
 	return err;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 022541dc5dbfd..0d07296488448 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -594,6 +594,8 @@ enum mlx5_pagefault_type_flags {
 };
 
 struct mlx5_td {
+	/* protects tirs list changes while tirs refresh */
+	struct mutex     list_lock;
 	struct list_head tirs_list;
 	u32              tdn;
 };
-- 
GitLab


From 8d047bf56a2cc13d90e6a5074015d65045fd43e7 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Thu, 28 Feb 2019 09:27:33 +0200
Subject: [PATCH 0351/1861] net/mlx5: ethtool, Fix type analysis of advertised
 link-mode

Ethtool option set_link_ksettings allows setting of legacy link-modes
or extended link-modes. Refine the decision of which type of link-modes
is set.

Fixes: 6a897372417e ("net/mlx5: ethtool, Add ethtool support for 50Gbps per lane link modes")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index a0987cc5fe4a1..561e36af8e77d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -997,8 +997,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 
 #define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1)
 
-	ext_requested = (link_ksettings->link_modes.advertising[0] >
-			MLX5E_PTYS_EXT);
+	ext_requested = !!(link_ksettings->link_modes.advertising[0] >
+			MLX5E_PTYS_EXT ||
+			link_ksettings->link_modes.advertising[1]);
 	ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
 
 	/*when ptys_extended_ethernet is set legacy link modes are deprecated */
-- 
GitLab


From dd1b9e09c12b4231148f446c2eefd886ef6e3ddd Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Thu, 28 Feb 2019 09:39:02 +0200
Subject: [PATCH 0352/1861] net/mlx5: ethtool, Allow legacy link-modes
 configuration via non-extended ptys

Allow configuration of legacy link-modes even when extended link-modes
are supported. This requires reading of legacy advertisement even when
extended link-modes are supported. Since legacy and extended
advertisement are mutually excluded, wait for empty reply from extended
advertisement before reading legacy advertisement.

Fixes: 6a897372417e ("net/mlx5: ethtool, Add ethtool support for 50Gbps per lane link modes")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/port.c |  3 --
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 47 ++++++++++++-------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 122927f3a6005..d5e5afbdca6dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -96,9 +96,6 @@ int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
 	if (!eproto)
 		return -EINVAL;
 
-	if (ext !=  MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet))
-		return -EOPNOTSUPP;
-
 	err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 561e36af8e77d..5efce4a3ff796 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -603,16 +603,18 @@ static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev,
 			  __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
-static void ptys2ethtool_adver_link(struct mlx5_core_dev *mdev,
-				    unsigned long *advertising_modes,
-				    u32 eth_proto_cap)
+static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
+				    u32 eth_proto_cap, bool ext)
 {
 	unsigned long proto_cap = eth_proto_cap;
 	struct ptys2ethtool_config *table;
 	u32 max_size;
 	int proto;
 
-	mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size);
+	table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
+	max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
+			 ARRAY_SIZE(ptys2legacy_ethtool_table);
+
 	for_each_set_bit(proto, &proto_cap, max_size)
 		bitmap_or(advertising_modes, advertising_modes,
 			  table[proto].advertised,
@@ -794,12 +796,12 @@ static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap,
 	ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
 }
 
-static void get_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_cap,
-			    u8 tx_pause, u8 rx_pause,
-			    struct ethtool_link_ksettings *link_ksettings)
+static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause,
+			    struct ethtool_link_ksettings *link_ksettings,
+			    bool ext)
 {
 	unsigned long *advertising = link_ksettings->link_modes.advertising;
-	ptys2ethtool_adver_link(mdev, advertising, eth_proto_cap);
+	ptys2ethtool_adver_link(advertising, eth_proto_cap, ext);
 
 	if (rx_pause)
 		ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
@@ -854,8 +856,9 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp,
 			       struct ethtool_link_ksettings *link_ksettings)
 {
 	unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
+	bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
 
-	ptys2ethtool_adver_link(mdev, lp_advertising, eth_proto_lp);
+	ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext);
 }
 
 int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
@@ -872,6 +875,7 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
 	u8 an_disable_admin;
 	u8 an_status;
 	u8 connector_type;
+	bool admin_ext;
 	bool ext;
 	int err;
 
@@ -886,6 +890,19 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
 					      eth_proto_capability);
 	eth_proto_admin  = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
 					      eth_proto_admin);
+	/* Fields: eth_proto_admin and ext_eth_proto_admin  are
+	 * mutually exclusive. Hence try reading legacy advertising
+	 * when extended advertising is zero.
+	 * admin_ext indicates how eth_proto_admin should be
+	 * interpreted
+	 */
+	admin_ext = ext;
+	if (ext && !eth_proto_admin) {
+		eth_proto_admin  = MLX5_GET_ETH_PROTO(ptys_reg, out, false,
+						      eth_proto_admin);
+		admin_ext = false;
+	}
+
 	eth_proto_oper   = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
 					      eth_proto_oper);
 	eth_proto_lp	    = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
@@ -899,7 +916,8 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
 
 	get_supported(mdev, eth_proto_cap, link_ksettings);
-	get_advertising(mdev, eth_proto_admin, tx_pause, rx_pause, link_ksettings);
+	get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings,
+			admin_ext);
 	get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings);
 
 	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
@@ -1001,16 +1019,13 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 			MLX5E_PTYS_EXT ||
 			link_ksettings->link_modes.advertising[1]);
 	ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
-
-	/*when ptys_extended_ethernet is set legacy link modes are deprecated */
-	if (ext_requested != ext_supported)
-		return -EPROTONOSUPPORT;
+	ext_requested &= ext_supported;
 
 	speed = link_ksettings->base.speed;
 	ethtool2ptys_adver_func = ext_requested ?
 				  mlx5e_ethtool2ptys_ext_adver_link :
 				  mlx5e_ethtool2ptys_adver_link;
-	err = mlx5_port_query_eth_proto(mdev, 1, ext_supported, &eproto);
+	err = mlx5_port_query_eth_proto(mdev, 1, ext_requested, &eproto);
 	if (err) {
 		netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
 			   __func__, err);
@@ -1038,7 +1053,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 	if (!an_changes && link_modes == eproto.admin)
 		goto out;
 
-	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_supported);
+	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_requested);
 	mlx5_toggle_port_link(mdev);
 
 out:
-- 
GitLab


From 8a91ad9355c66c5026d3d911b434a25408ab876c Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Thu, 7 Mar 2019 09:27:18 +0200
Subject: [PATCH 0353/1861] net/mlx5: E-Switch, Fix access to invalid memory
 when toggling esw modes

The esw fdb table has a union of legacy and offloads members.
So if we were in a certain esw mode we could set some memebers and not
set null which is fine as on destroy path and don't care.
But then moving from legacy to switchdev a second time, the cleanup flow
of legacy mode checks if a struct member was in use if it's not null so
we need to make sure to reset the code to null when we init legacy mode.

Fixes: 8da202b24913 ("net/mlx5: E-Switch, Add support for VEPA in legacy mode.")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ecd2c747f7260..1a3b4d5d77fb4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -431,6 +431,8 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw)
 {
 	int err;
 
+	memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+
 	err = esw_create_legacy_vepa_table(esw);
 	if (err)
 		return err;
-- 
GitLab


From 84be899f6fd233ff2aeaf14cc43e6457425122b2 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Tue, 26 Feb 2019 04:28:32 -0800
Subject: [PATCH 0354/1861] net/mlx5e: Correctly use the namespace type when
 allocating pedit action

The capacity of FDB offloading and NIC offloading table are
different, and when allocating the pedit actions, we should
use the correct namespace type.

Fixes: c500c86b0c75d ("net/mlx5e: support for two independent packet edit actions")
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2b85f93a1c52b..5fb5cab36bf65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2701,7 +2701,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 
 	if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
 	    hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
-		err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
+		err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
 					    parse_attr, hdrs, extack);
 		if (err)
 			return err;
-- 
GitLab


From 5c1d260ed10cf08dd7a0299c103ad0a3f9a9f7a1 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Thu, 21 Mar 2019 15:51:35 -0700
Subject: [PATCH 0355/1861] net/mlx5: E-Switch, Protect from invalid memory
 access in offload fdb table

The esw offloads structures share a union with the legacy mode structs.
Reset the offloads struct to zero in init to protect from null
assumptions made by the legacy mode code.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f2260391be5b9..9b2d78ee22b88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1611,6 +1611,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
 {
 	int err;
 
+	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
 	mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
 
 	err = esw_create_offloads_fdb_tables(esw, nvports);
-- 
GitLab


From eca4a928585ac08147e5cc8e2111ecbc6279ee31 Mon Sep 17 00:00:00 2001
From: Omri Kahalon <omrik@mellanox.com>
Date: Sun, 24 Feb 2019 16:31:08 +0200
Subject: [PATCH 0356/1861] net/mlx5: E-Switch, Fix esw manager vport
 indication for more vport commands

Traditionally, the PF (Physical Function) which resides on vport 0 was
the E-switch manager. Since the ECPF (Embedded CPU Physical Function),
which resides on vport 0xfffe, was introduced as the E-Switch manager,
the assumption that the E-switch manager is on vport 0 is incorrect.

Since the eswitch code already uses the actual vport value, all we
need is to always set other_vport=1.

Signed-off-by: Omri Kahalon <omrik@mellanox.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1a3b4d5d77fb4..c938954599c90 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -105,8 +105,7 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
 		 opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 	MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
 	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
 				     in, nic_vport_context);
 
@@ -134,8 +133,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
 	MLX5_SET(modify_esw_vport_context_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
 	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+	MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
 	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
-- 
GitLab


From 36acf63a066f7e095ef2322118c2742a177daa65 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Fri, 22 Mar 2019 09:42:08 -0500
Subject: [PATCH 0357/1861] net/mlx5: E-Switch, fix syndrome (0x678139) when
 turn on vepa

Make sure the struct mlx5_flow_destination is zero before
filling in the field.

Fixes: 8da202b24913 ("net/mlx5: E-Switch, Add support for VEPA in legacy mode.")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index c938954599c90..8a67fd197b792 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2157,6 +2157,7 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
 
 	/* Star rule to forward all traffic to uplink vport */
 	memset(spec, 0, sizeof(*spec));
+	memset(&dest, 0, sizeof(dest));
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport.num = MLX5_VPORT_UPLINK;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-- 
GitLab


From 5ec983e924c7978aaec3cf8679ece9436508bb20 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Thu, 7 Mar 2019 14:49:50 -0600
Subject: [PATCH 0358/1861] net/mlx5e: Update xoff formula

Set minimum speed in xoff threshold formula to 40Gbps

Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/port_buffer.c  | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index eac245a93f918..f00de0c987cd6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -122,7 +122,9 @@ out:
 	return err;
 }
 
-/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B])
+ * minimum speed value is 40Gbps
+ */
 static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
 {
 	u32 speed;
@@ -130,10 +132,9 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
 	int err;
 
 	err = mlx5e_port_linkspeed(priv->mdev, &speed);
-	if (err) {
-		mlx5_core_warn(priv->mdev, "cannot get port speed\n");
-		return 0;
-	}
+	if (err)
+		speed = SPEED_40000;
+	speed = max_t(u32, speed, SPEED_40000);
 
 	xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
 
-- 
GitLab


From e28408e98bced123038857b6e3c81fa12a2e3e68 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Thu, 7 Mar 2019 14:07:32 -0600
Subject: [PATCH 0359/1861] net/mlx5e: Update xon formula

Set xon = xoff - netdev's max_mtu.
netdev's max_mtu will give enough time for the pause frame to
arrive at the sender.

Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/en/port_buffer.c       | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index f00de0c987cd6..4ab0d030b5448 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -143,7 +143,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
 }
 
 static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
-				 u32 xoff, unsigned int mtu)
+				 u32 xoff, unsigned int max_mtu)
 {
 	int i;
 
@@ -155,11 +155,12 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
 		}
 
 		if (port_buffer->buffer[i].size <
-		    (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
+		    (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT)))
 			return -ENOMEM;
 
 		port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
-		port_buffer->buffer[i].xon  = port_buffer->buffer[i].xoff - mtu;
+		port_buffer->buffer[i].xon  =
+			port_buffer->buffer[i].xoff - max_mtu;
 	}
 
 	return 0;
@@ -167,7 +168,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
 
 /**
  * update_buffer_lossy()
- *   mtu: device's MTU
+ *   max_mtu: netdev's max_mtu
  *   pfc_en: <input> current pfc configuration
  *   buffer: <input> current prio to buffer mapping
  *   xoff:   <input> xoff value
@@ -184,7 +185,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
  *     Return 0 if no error.
  *     Set change to true if buffer configuration is modified.
  */
-static int update_buffer_lossy(unsigned int mtu,
+static int update_buffer_lossy(unsigned int max_mtu,
 			       u8 pfc_en, u8 *buffer, u32 xoff,
 			       struct mlx5e_port_buffer *port_buffer,
 			       bool *change)
@@ -221,7 +222,7 @@ static int update_buffer_lossy(unsigned int mtu,
 	}
 
 	if (changed) {
-		err = update_xoff_threshold(port_buffer, xoff, mtu);
+		err = update_xoff_threshold(port_buffer, xoff, max_mtu);
 		if (err)
 			return err;
 
@@ -231,6 +232,7 @@ static int update_buffer_lossy(unsigned int mtu,
 	return 0;
 }
 
+#define MINIMUM_MAX_MTU 9216
 int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
 				    u32 change, unsigned int mtu,
 				    struct ieee_pfc *pfc,
@@ -242,12 +244,14 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
 	bool update_prio2buffer = false;
 	u8 buffer[MLX5E_MAX_PRIORITY];
 	bool update_buffer = false;
+	unsigned int max_mtu;
 	u32 total_used = 0;
 	u8 curr_pfc_en;
 	int err;
 	int i;
 
 	mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+	max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU);
 
 	err = mlx5e_port_query_buffer(priv, &port_buffer);
 	if (err)
@@ -255,7 +259,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
 
 	if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
 		update_buffer = true;
-		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
 		if (err)
 			return err;
 	}
@@ -265,7 +269,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
 		if (err)
 			return err;
 
-		err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff,
+		err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff,
 					  &port_buffer, &update_buffer);
 		if (err)
 			return err;
@@ -277,8 +281,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
 		if (err)
 			return err;
 
-		err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff,
-					  &port_buffer, &update_buffer);
+		err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer,
+					  xoff, &port_buffer, &update_buffer);
 		if (err)
 			return err;
 	}
@@ -302,7 +306,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
 			return -EINVAL;
 
 		update_buffer = true;
-		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
 		if (err)
 			return err;
 	}
@@ -310,7 +314,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
 	/* Need to update buffer configuration if xoff value is changed */
 	if (!update_buffer && xoff != priv->dcbx.xoff) {
 		update_buffer = true;
-		err = update_xoff_threshold(&port_buffer, xoff, mtu);
+		err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
 		if (err)
 			return err;
 	}
-- 
GitLab


From 7f1a546e322287ae948e0f5eb8d12b7b638d93a6 Mon Sep 17 00:00:00 2001
From: Eli Britstein <elibr@mellanox.com>
Date: Mon, 18 Mar 2019 09:25:59 +0000
Subject: [PATCH 0360/1861] net/mlx5e: Consider tunnel type for encap contexts

The driver allocates an encap context based on the tunnel properties,
and reuse that context for all flows using the same tunnel properties.
Commit df2ef3bff193 ("net/mlx5e: Add GRE protocol offloading")
introduced another tunnel protocol other than the single VXLAN
previously supported. A flow that uses a tunnel with the same tunnel
properties but with a different tunnel type (GRE vs VXLAN for example)
would mistakenly reuse the previous alocated context, causing the
traffic to be sent with the wrong encapsulation. Fix that by
considering the tunnel type for encap contexts.

Fixes: df2ef3bff193 ("net/mlx5e: Add GRE protocol offloading")
Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 5fb5cab36bf65..d75dc44eb2ff6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2384,15 +2384,22 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 	return 0;
 }
 
-static inline int cmp_encap_info(struct ip_tunnel_key *a,
-				 struct ip_tunnel_key *b)
+struct encap_key {
+	struct ip_tunnel_key *ip_tun_key;
+	int tunnel_type;
+};
+
+static inline int cmp_encap_info(struct encap_key *a,
+				 struct encap_key *b)
 {
-	return memcmp(a, b, sizeof(*a));
+	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
+	       a->tunnel_type != b->tunnel_type;
 }
 
-static inline int hash_encap_info(struct ip_tunnel_key *key)
+static inline int hash_encap_info(struct encap_key *key)
 {
-	return jhash(key, sizeof(*key), 0);
+	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+		     key->tunnel_type);
 }
 
 
@@ -2423,7 +2430,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct ip_tunnel_info *tun_info;
-	struct ip_tunnel_key *key;
+	struct encap_key key, e_key;
 	struct mlx5e_encap_entry *e;
 	unsigned short family;
 	uintptr_t hash_key;
@@ -2433,13 +2440,16 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	parse_attr = attr->parse_attr;
 	tun_info = &parse_attr->tun_info[out_index];
 	family = ip_tunnel_info_af(tun_info);
-	key = &tun_info->key;
+	key.ip_tun_key = &tun_info->key;
+	key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev);
 
-	hash_key = hash_encap_info(key);
+	hash_key = hash_encap_info(&key);
 
 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
 				   encap_hlist, hash_key) {
-		if (!cmp_encap_info(&e->tun_info.key, key)) {
+		e_key.ip_tun_key = &e->tun_info.key;
+		e_key.tunnel_type = e->tunnel_type;
+		if (!cmp_encap_info(&e_key, &key)) {
 			found = true;
 			break;
 		}
-- 
GitLab


From 18bebc6dd3281955240062655a4df35eef2c46b3 Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@virtuozzo.com>
Date: Thu, 28 Mar 2019 13:29:21 +0300
Subject: [PATCH 0361/1861] bonding: show full hw address in sysfs for slave
 entries

Bond expects ethernet hwaddr for its slave, but it can be longer than 6
bytes - infiniband interface for example.

 # cat /sys/devices/<skipped>/net/ib0/address
 80:00:02:08:fe:80:00:00:00:00:00:00:7c:fe:90:03:00:be:5d:e1

 # cat /sys/devices/<skipped>/net/ib0/bonding_slave/perm_hwaddr
 80:00:02:08:fe:80

So print full hwaddr in sysfs "bonding_slave/perm_hwaddr" as well.

Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_sysfs_slave.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index 2f120b2ffef0c..4985268e22733 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -55,7 +55,9 @@ static SLAVE_ATTR_RO(link_failure_count);
 
 static ssize_t perm_hwaddr_show(struct slave *slave, char *buf)
 {
-	return sprintf(buf, "%pM\n", slave->perm_hwaddr);
+	return sprintf(buf, "%*phC\n",
+		       slave->dev->addr_len,
+		       slave->perm_hwaddr);
 }
 static SLAVE_ATTR_RO(perm_hwaddr);
 
-- 
GitLab


From 1b704c4a1ba95574832e730f23817b651db2aa59 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 28 Mar 2019 19:40:36 +0000
Subject: [PATCH 0362/1861] hv_netvsc: Fix unwanted wakeup after tx_disable

After queue stopped, the wakeup mechanism may wake it up again
when ring buffer usage is lower than a threshold. This may cause
send path panic on NULL pointer when we stopped all tx queues in
netvsc_detach and start removing the netvsc device.

This patch fix it by adding a tx_disable flag to prevent unwanted
queue wakeup.

Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic")
Reported-by: Mohammed Gamal <mgamal@redhat.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h |  1 +
 drivers/net/hyperv/netvsc.c     |  6 ++++--
 drivers/net/hyperv/netvsc_drv.c | 32 ++++++++++++++++++++++++++------
 3 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index e859ae2e42d5a..49f41b64077bb 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -987,6 +987,7 @@ struct netvsc_device {
 
 	wait_queue_head_t wait_drain;
 	bool destroy;
+	bool tx_disable; /* if true, do not wake up queue again */
 
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 813d195bbd57f..e0dce373cdd9d 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -110,6 +110,7 @@ static struct netvsc_device *alloc_net_device(void)
 
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
+	net_device->tx_disable = false;
 
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
@@ -719,7 +720,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
 	} else {
 		struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
 
-		if (netif_tx_queue_stopped(txq) &&
+		if (netif_tx_queue_stopped(txq) && !net_device->tx_disable &&
 		    (hv_get_avail_to_write_percent(&channel->outbound) >
 		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) {
 			netif_tx_wake_queue(txq);
@@ -874,7 +875,8 @@ static inline int netvsc_send_pkt(
 	} else if (ret == -EAGAIN) {
 		netif_tx_stop_queue(txq);
 		ndev_ctx->eth_stats.stop_queue++;
-		if (atomic_read(&nvchan->queue_sends) < 1) {
+		if (atomic_read(&nvchan->queue_sends) < 1 &&
+		    !net_device->tx_disable) {
 			netif_tx_wake_queue(txq);
 			ndev_ctx->eth_stats.wake_queue++;
 			ret = -ENOSPC;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index cf4897043e833..b20fb0fb595bd 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -109,6 +109,15 @@ static void netvsc_set_rx_mode(struct net_device *net)
 	rcu_read_unlock();
 }
 
+static void netvsc_tx_enable(struct netvsc_device *nvscdev,
+			     struct net_device *ndev)
+{
+	nvscdev->tx_disable = false;
+	virt_wmb(); /* ensure queue wake up mechanism is on */
+
+	netif_tx_wake_all_queues(ndev);
+}
+
 static int netvsc_open(struct net_device *net)
 {
 	struct net_device_context *ndev_ctx = netdev_priv(net);
@@ -129,7 +138,7 @@ static int netvsc_open(struct net_device *net)
 	rdev = nvdev->extension;
 	if (!rdev->link_state) {
 		netif_carrier_on(net);
-		netif_tx_wake_all_queues(net);
+		netvsc_tx_enable(nvdev, net);
 	}
 
 	if (vf_netdev) {
@@ -184,6 +193,17 @@ static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
 	}
 }
 
+static void netvsc_tx_disable(struct netvsc_device *nvscdev,
+			      struct net_device *ndev)
+{
+	if (nvscdev) {
+		nvscdev->tx_disable = true;
+		virt_wmb(); /* ensure txq will not wake up after stop */
+	}
+
+	netif_tx_disable(ndev);
+}
+
 static int netvsc_close(struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -192,7 +212,7 @@ static int netvsc_close(struct net_device *net)
 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 	int ret;
 
-	netif_tx_disable(net);
+	netvsc_tx_disable(nvdev, net);
 
 	/* No need to close rndis filter if it is removed already */
 	if (!nvdev)
@@ -920,7 +940,7 @@ static int netvsc_detach(struct net_device *ndev,
 
 	/* If device was up (receiving) then shutdown */
 	if (netif_running(ndev)) {
-		netif_tx_disable(ndev);
+		netvsc_tx_disable(nvdev, ndev);
 
 		ret = rndis_filter_close(nvdev);
 		if (ret) {
@@ -1908,7 +1928,7 @@ static void netvsc_link_change(struct work_struct *w)
 		if (rdev->link_state) {
 			rdev->link_state = false;
 			netif_carrier_on(net);
-			netif_tx_wake_all_queues(net);
+			netvsc_tx_enable(net_device, net);
 		} else {
 			notify = true;
 		}
@@ -1918,7 +1938,7 @@ static void netvsc_link_change(struct work_struct *w)
 		if (!rdev->link_state) {
 			rdev->link_state = true;
 			netif_carrier_off(net);
-			netif_tx_stop_all_queues(net);
+			netvsc_tx_disable(net_device, net);
 		}
 		kfree(event);
 		break;
@@ -1927,7 +1947,7 @@ static void netvsc_link_change(struct work_struct *w)
 		if (!rdev->link_state) {
 			rdev->link_state = true;
 			netif_carrier_off(net);
-			netif_tx_stop_all_queues(net);
+			netvsc_tx_disable(net_device, net);
 			event->event = RNDIS_STATUS_MEDIA_CONNECT;
 			spin_lock_irqsave(&ndev_ctx->lock, flags);
 			list_add(&event->list, &ndev_ctx->reconfig_events);
-- 
GitLab


From c43ac97bac987e56c179598ce3398a95d55067bc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 28 Mar 2019 14:54:43 -0700
Subject: [PATCH 0363/1861] net: tls: prevent false connection termination with
 offload

Only decrypt_internal() performs zero copy on rx, all paths
which don't hit decrypt_internal() must set zc to false,
otherwise tls_sw_recvmsg() may return 0 causing the application
to believe that that connection got closed.

Currently this happens with device offload when new record
is first read from.

Fixes: d069b780e367 ("tls: Fix tls_device receive")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reported-by: David Beckett <david.beckett@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 425351ac2a9b1..20b1912279694 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1484,6 +1484,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 
 				return err;
 			}
+		} else {
+			*zc = false;
 		}
 
 		rxm->full_len -= padding_length(ctx, tls_ctx, skb);
-- 
GitLab


From 3d8830266ffc28c16032b859e38a0252e014b631 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 29 Mar 2019 09:18:02 +0800
Subject: [PATCH 0364/1861] net: ethtool: not call vzalloc for zero sized
 memory request

NULL or ZERO_SIZE_PTR will be returned for zero sized memory
request, and derefencing them will lead to a segfault

so it is unnecessory to call vzalloc for zero sized memory
request and not call functions which maybe derefence the
NULL allocated memory

this also fixes a possible memory leak if phy_ethtool_get_stats
returns error, memory should be freed before exit

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Reviewed-by: Wang Li <wangli39@baidu.com>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 46 ++++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b1eb324197321..36ed619faf364 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1797,11 +1797,16 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
 	WARN_ON_ONCE(!ret);
 
 	gstrings.len = ret;
-	data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
-	if (gstrings.len && !data)
-		return -ENOMEM;
 
-	__ethtool_get_strings(dev, gstrings.string_set, data);
+	if (gstrings.len) {
+		data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
+		if (!data)
+			return -ENOMEM;
+
+		__ethtool_get_strings(dev, gstrings.string_set, data);
+	} else {
+		data = NULL;
+	}
 
 	ret = -EFAULT;
 	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1897,11 +1902,15 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
 		return -EFAULT;
 
 	stats.n_stats = n_stats;
-	data = vzalloc(array_size(n_stats, sizeof(u64)));
-	if (n_stats && !data)
-		return -ENOMEM;
 
-	ops->get_ethtool_stats(dev, &stats, data);
+	if (n_stats) {
+		data = vzalloc(array_size(n_stats, sizeof(u64)));
+		if (!data)
+			return -ENOMEM;
+		ops->get_ethtool_stats(dev, &stats, data);
+	} else {
+		data = NULL;
+	}
 
 	ret = -EFAULT;
 	if (copy_to_user(useraddr, &stats, sizeof(stats)))
@@ -1941,16 +1950,21 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
 		return -EFAULT;
 
 	stats.n_stats = n_stats;
-	data = vzalloc(array_size(n_stats, sizeof(u64)));
-	if (n_stats && !data)
-		return -ENOMEM;
 
-	if (dev->phydev && !ops->get_ethtool_phy_stats) {
-		ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
-		if (ret < 0)
-			return ret;
+	if (n_stats) {
+		data = vzalloc(array_size(n_stats, sizeof(u64)));
+		if (!data)
+			return -ENOMEM;
+
+		if (dev->phydev && !ops->get_ethtool_phy_stats) {
+			ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
+			if (ret < 0)
+				goto out;
+		} else {
+			ops->get_ethtool_phy_stats(dev, &stats, data);
+		}
 	} else {
-		ops->get_ethtool_phy_stats(dev, &stats, data);
+		data = NULL;
 	}
 
 	ret = -EFAULT;
-- 
GitLab


From 4d31c4fa3f9ef7b7e2e79fd57d21290f64c938f5 Mon Sep 17 00:00:00 2001
From: Vishal Kulkarni <vishal@chelsio.com>
Date: Fri, 29 Mar 2019 16:56:09 +0530
Subject: [PATCH 0365/1861] cxgb4: Update 1.23.3.0 as the latest firmware
 supported.

Change t4fw_version.h to update latest firmware version
number to 1.23.3.0.

Signed-off-by: Vishal Kulkarni <vishal@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index 9125ddd89dd12..a02b1dff403ec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x16
-#define T4FW_VERSION_MICRO 0x09
+#define T4FW_VERSION_MINOR 0x17
+#define T4FW_VERSION_MICRO 0x03
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x16
-#define T5FW_VERSION_MICRO 0x09
+#define T5FW_VERSION_MINOR 0x17
+#define T5FW_VERSION_MICRO 0x03
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x16
-#define T6FW_VERSION_MICRO 0x09
+#define T6FW_VERSION_MINOR 0x17
+#define T6FW_VERSION_MICRO 0x03
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
-- 
GitLab


From ec915f4744a0a556090874a4a78e85afea77471a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 29 Mar 2019 13:47:14 -0700
Subject: [PATCH 0366/1861] Revert "cxgb4: Update 1.23.3.0 as the latest
 firmware supported."

This reverts commit 4d31c4fa3f9ef7b7e2e79fd57d21290f64c938f5.

Accidently applied this to the wrong tree.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index a02b1dff403ec..9125ddd89dd12 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x17
-#define T4FW_VERSION_MICRO 0x03
+#define T4FW_VERSION_MINOR 0x16
+#define T4FW_VERSION_MICRO 0x09
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x17
-#define T5FW_VERSION_MICRO 0x03
+#define T5FW_VERSION_MINOR 0x16
+#define T5FW_VERSION_MICRO 0x09
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x17
-#define T6FW_VERSION_MICRO 0x03
+#define T6FW_VERSION_MINOR 0x16
+#define T6FW_VERSION_MICRO 0x09
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
-- 
GitLab


From 8fa76162487143d202db20ce84e12061b671a058 Mon Sep 17 00:00:00 2001
From: Wei Huang <wei@redhat.com>
Date: Fri, 29 Mar 2019 15:12:53 -0500
Subject: [PATCH 0367/1861] KVM: arm/arm64: arch_timer: Fix CNTP_TVAL
 calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recently the generic timer test of kvm-unit-tests failed to complete
(stalled) when a physical timer is being used. This issue is caused
by incorrect update of CNTP_CVAL when CNTP_TVAL is being accessed,
introduced by 'Commit 84135d3d18da ("KVM: arm/arm64: consolidate arch
timer trap handlers")'. According to Arm ARM, the read/write behavior
of accesses to the TVAL registers is expected to be:

  * READ: TimerValue = (CompareValue – (Counter - Offset)
  * WRITE: CompareValue = ((Counter - Offset) + Sign(TimerValue)

This patch fixes the TVAL read/write code path according to the
specification.

Fixes: 84135d3d18da ("KVM: arm/arm64: consolidate arch timer trap handlers")
Signed-off-by: Wei Huang <wei@redhat.com>
[maz: commit message tidy-up]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/arch_timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 3417f2dbc3667..d43308dc36179 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -812,7 +812,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
 
 	switch (treg) {
 	case TIMER_REG_TVAL:
-		val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval;
+		val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff;
 		break;
 
 	case TIMER_REG_CTL:
@@ -858,7 +858,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
 {
 	switch (treg) {
 	case TIMER_REG_TVAL:
-		timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff;
+		timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val;
 		break;
 
 	case TIMER_REG_CTL:
-- 
GitLab


From 037c8489ade669e0f09ad40d5b91e5e1159a14b1 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 27 Mar 2019 11:10:44 -0700
Subject: [PATCH 0368/1861] libnvdimm/security: provide fix for secure-erase to
 use zero-key

Add a zero key in order to standardize hardware that want a key of 0's to
be passed. Some platforms defaults to a zero-key with security enabled
rather than allow the OS to enable the security. The zero key would allow
us to manage those platform as well. This also adds a fix to secure erase
so it can use the zero key to do crypto erase. Some other security commands
already use zero keys. This introduces a standard zero-key to allow
unification of semantics cross nvdimm security commands.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/security.c        | 17 ++++++++++++-----
 tools/testing/nvdimm/test/nfit.c | 11 +++++++++--
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index f8bb746a549f7..6bea6852bf278 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -22,6 +22,8 @@ static bool key_revalidate = true;
 module_param(key_revalidate, bool, 0444);
 MODULE_PARM_DESC(key_revalidate, "Require key validation at init.");
 
+static const char zero_key[NVDIMM_PASSPHRASE_LEN];
+
 static void *key_data(struct key *key)
 {
 	struct encrypted_key_payload *epayload = dereference_key_locked(key);
@@ -286,8 +288,9 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
 {
 	struct device *dev = &nvdimm->dev;
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
-	struct key *key;
+	struct key *key = NULL;
 	int rc;
+	const void *data;
 
 	/* The bus lock should be held at the top level of the call stack */
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
@@ -319,11 +322,15 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
 		return -EOPNOTSUPP;
 	}
 
-	key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
-	if (!key)
-		return -ENOKEY;
+	if (keyid != 0) {
+		key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
+		if (!key)
+			return -ENOKEY;
+		data = key_data(key);
+	} else
+		data = zero_key;
 
-	rc = nvdimm->sec.ops->erase(nvdimm, key_data(key), pass_type);
+	rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type);
 	dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key),
 			pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
 			rc == 0 ? "success" : "fail");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index b579f962451d6..cad719876ef45 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -225,6 +225,8 @@ static struct workqueue_struct *nfit_wq;
 
 static struct gen_pool *nfit_pool;
 
+static const char zero_key[NVDIMM_PASSPHRASE_LEN];
+
 static struct nfit_test *to_nfit_test(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -1059,8 +1061,7 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t,
 	struct device *dev = &t->pdev.dev;
 	struct nfit_test_sec *sec = &dimm_sec_info[dimm];
 
-	if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) ||
-			(sec->state & ND_INTEL_SEC_STATE_FROZEN)) {
+	if (sec->state & ND_INTEL_SEC_STATE_FROZEN) {
 		nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
 		dev_dbg(dev, "secure erase: wrong security state\n");
 	} else if (memcmp(nd_cmd->passphrase, sec->passphrase,
@@ -1068,6 +1069,12 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t,
 		nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
 		dev_dbg(dev, "secure erase: wrong passphrase\n");
 	} else {
+		if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED)
+				&& (memcmp(nd_cmd->passphrase, zero_key,
+					ND_INTEL_PASSPHRASE_SIZE) != 0)) {
+			dev_dbg(dev, "invalid zero key\n");
+			return 0;
+		}
 		memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
 		memset(sec->master_passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
 		sec->state = 0;
-- 
GitLab


From d2e5b6436c28e7ee4988497d31122e06217876fb Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 27 Mar 2019 11:12:45 -0700
Subject: [PATCH 0369/1861] libnvdimm/security, acpi/nfit: unify zero-key for
 all security commands

With zero-key defined, we can remove previous detection of key id 0 or null
key in order to deal with a zero-key situation. Syncing all security
commands to use the zero-key. Helper functions are introduced to return the
data that points to the actual key payload or the zero_key. This helps
uniformly handle the key material even with zero_key.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/intel.c |  10 ++--
 drivers/nvdimm/security.c | 117 ++++++++++++++++++++++----------------
 2 files changed, 73 insertions(+), 54 deletions(-)

diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c
index f70de71f79d6a..cddd0fcf622c3 100644
--- a/drivers/acpi/nfit/intel.c
+++ b/drivers/acpi/nfit/intel.c
@@ -122,9 +122,8 @@ static int intel_security_change_key(struct nvdimm *nvdimm,
 	if (!test_bit(cmd, &nfit_mem->dsm_mask))
 		return -ENOTTY;
 
-	if (old_data)
-		memcpy(nd_cmd.cmd.old_pass, old_data->data,
-				sizeof(nd_cmd.cmd.old_pass));
+	memcpy(nd_cmd.cmd.old_pass, old_data->data,
+			sizeof(nd_cmd.cmd.old_pass));
 	memcpy(nd_cmd.cmd.new_pass, new_data->data,
 			sizeof(nd_cmd.cmd.new_pass));
 	rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
@@ -336,9 +335,8 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
 
 	/* flush all cache before we erase DIMM */
 	nvdimm_invalidate_cache();
-	if (nkey)
-		memcpy(nd_cmd.cmd.passphrase, nkey->data,
-				sizeof(nd_cmd.cmd.passphrase));
+	memcpy(nd_cmd.cmd.passphrase, nkey->data,
+			sizeof(nd_cmd.cmd.passphrase));
 	rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
 	if (rc < 0)
 		return rc;
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index 6bea6852bf278..a570f2263a424 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -77,6 +77,16 @@ static struct key *nvdimm_request_key(struct nvdimm *nvdimm)
 	return key;
 }
 
+static const void *nvdimm_get_key_payload(struct nvdimm *nvdimm,
+		struct key **key)
+{
+	*key = nvdimm_request_key(nvdimm);
+	if (!*key)
+		return zero_key;
+
+	return key_data(*key);
+}
+
 static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
 		key_serial_t id, int subclass)
 {
@@ -107,36 +117,57 @@ static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
 	return key;
 }
 
-static struct key *nvdimm_key_revalidate(struct nvdimm *nvdimm)
+static const void *nvdimm_get_user_key_payload(struct nvdimm *nvdimm,
+		key_serial_t id, int subclass, struct key **key)
+{
+	*key = NULL;
+	if (id == 0) {
+		if (subclass == NVDIMM_BASE_KEY)
+			return zero_key;
+		else
+			return NULL;
+	}
+
+	*key = nvdimm_lookup_user_key(nvdimm, id, subclass);
+	if (!*key)
+		return NULL;
+
+	return key_data(*key);
+}
+
+
+static int nvdimm_key_revalidate(struct nvdimm *nvdimm)
 {
 	struct key *key;
 	int rc;
+	const void *data;
 
 	if (!nvdimm->sec.ops->change_key)
-		return NULL;
+		return -EOPNOTSUPP;
 
-	key = nvdimm_request_key(nvdimm);
-	if (!key)
-		return NULL;
+	data = nvdimm_get_key_payload(nvdimm, &key);
 
 	/*
 	 * Send the same key to the hardware as new and old key to
 	 * verify that the key is good.
 	 */
-	rc = nvdimm->sec.ops->change_key(nvdimm, key_data(key),
-			key_data(key), NVDIMM_USER);
+	rc = nvdimm->sec.ops->change_key(nvdimm, data, data, NVDIMM_USER);
 	if (rc < 0) {
 		nvdimm_put_key(key);
-		key = NULL;
+		return rc;
 	}
-	return key;
+
+	nvdimm_put_key(key);
+	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+	return 0;
 }
 
 static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
 {
 	struct device *dev = &nvdimm->dev;
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
-	struct key *key = NULL;
+	struct key *key;
+	const void *data;
 	int rc;
 
 	/* The bus lock should be held at the top level of the call stack */
@@ -162,16 +193,11 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
 		if (!key_revalidate)
 			return 0;
 
-		key = nvdimm_key_revalidate(nvdimm);
-		if (!key)
-			return nvdimm_security_freeze(nvdimm);
+		return nvdimm_key_revalidate(nvdimm);
 	} else
-		key = nvdimm_request_key(nvdimm);
+		data = nvdimm_get_key_payload(nvdimm, &key);
 
-	if (!key)
-		return -ENOKEY;
-
-	rc = nvdimm->sec.ops->unlock(nvdimm, key_data(key));
+	rc = nvdimm->sec.ops->unlock(nvdimm, data);
 	dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key),
 			rc == 0 ? "success" : "fail");
 
@@ -197,6 +223,7 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 	struct key *key;
 	int rc;
+	const void *data;
 
 	/* The bus lock should be held at the top level of the call stack */
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
@@ -216,11 +243,12 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
 		return -EBUSY;
 	}
 
-	key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
-	if (!key)
+	data = nvdimm_get_user_key_payload(nvdimm, keyid,
+			NVDIMM_BASE_KEY, &key);
+	if (!data)
 		return -ENOKEY;
 
-	rc = nvdimm->sec.ops->disable(nvdimm, key_data(key));
+	rc = nvdimm->sec.ops->disable(nvdimm, data);
 	dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
 			rc == 0 ? "success" : "fail");
 
@@ -237,6 +265,7 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 	struct key *key, *newkey;
 	int rc;
+	const void *data, *newdata;
 
 	/* The bus lock should be held at the top level of the call stack */
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
@@ -251,22 +280,19 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
 		return -EIO;
 	}
 
-	if (keyid == 0)
-		key = NULL;
-	else {
-		key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
-		if (!key)
-			return -ENOKEY;
-	}
+	data = nvdimm_get_user_key_payload(nvdimm, keyid,
+			NVDIMM_BASE_KEY, &key);
+	if (!data)
+		return -ENOKEY;
 
-	newkey = nvdimm_lookup_user_key(nvdimm, new_keyid, NVDIMM_NEW_KEY);
-	if (!newkey) {
+	newdata = nvdimm_get_user_key_payload(nvdimm, new_keyid,
+			NVDIMM_NEW_KEY, &newkey);
+	if (!newdata) {
 		nvdimm_put_key(key);
 		return -ENOKEY;
 	}
 
-	rc = nvdimm->sec.ops->change_key(nvdimm, key ? key_data(key) : NULL,
-			key_data(newkey), pass_type);
+	rc = nvdimm->sec.ops->change_key(nvdimm, data, newdata, pass_type);
 	dev_dbg(dev, "key: %d %d update%s: %s\n",
 			key_serial(key), key_serial(newkey),
 			pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
@@ -322,13 +348,10 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
 		return -EOPNOTSUPP;
 	}
 
-	if (keyid != 0) {
-		key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
-		if (!key)
-			return -ENOKEY;
-		data = key_data(key);
-	} else
-		data = zero_key;
+	data = nvdimm_get_user_key_payload(nvdimm, keyid,
+			NVDIMM_BASE_KEY, &key);
+	if (!data)
+		return -ENOKEY;
 
 	rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type);
 	dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key),
@@ -344,8 +367,9 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
 {
 	struct device *dev = &nvdimm->dev;
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
-	struct key *key;
+	struct key *key = NULL;
 	int rc;
+	const void *data;
 
 	/* The bus lock should be held at the top level of the call stack */
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
@@ -375,15 +399,12 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
 		return -EBUSY;
 	}
 
-	if (keyid == 0)
-		key = NULL;
-	else {
-		key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
-		if (!key)
-			return -ENOKEY;
-	}
+	data = nvdimm_get_user_key_payload(nvdimm, keyid,
+			NVDIMM_BASE_KEY, &key);
+	if (!data)
+		return -ENOKEY;
 
-	rc = nvdimm->sec.ops->overwrite(nvdimm, key ? key_data(key) : NULL);
+	rc = nvdimm->sec.ops->overwrite(nvdimm, data);
 	dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key),
 			rc == 0 ? "success" : "fail");
 
-- 
GitLab


From 288ac524cf70a8e7ed851a61ed2a9744039dae8d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 30 Mar 2019 17:13:24 +0100
Subject: [PATCH 0370/1861] r8169: disable default rx interrupt coalescing on
 RTL8168

It was reported that re-introducing ASPM, in combination with RX
interrupt coalescing, results in significantly increased packet
latency, see [0]. Disabling ASPM or RX interrupt coalescing fixes
the issue. Therefore change the driver's default to disable RX
interrupt coalescing. Users still have the option to enable RX
coalescing via ethtool.

[0] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=925496

Fixes: a99790bf5c7f ("r8169: Reinstate ASPM Support")
Reported-by: Mike Crowe <mac@mcrowe.com>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 7562ccbbb39af..19efa88f3f02d 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5460,7 +5460,7 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp)
 	tp->cp_cmd |= PktCntrDisable | INTT_1;
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	RTL_W16(tp, IntrMitigate, 0x5151);
+	RTL_W16(tp, IntrMitigate, 0x5100);
 
 	/* Work around for RxFIFO overflow. */
 	if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
-- 
GitLab


From 7f75591fc5a123929a29636834d1bcb8b5c9fee3 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Mon, 25 Mar 2019 14:01:23 +0100
Subject: [PATCH 0371/1861] iio: core: fix a possible circular locking
 dependency

This fixes a possible circular locking dependency detected warning seen
with:
- CONFIG_PROVE_LOCKING=y
- consumer/provider IIO devices (ex: "voltage-divider" consumer of "adc")

When using the IIO consumer interface, e.g. iio_channel_get(), the consumer
device will likely call iio_read_channel_raw() or similar that rely on
'info_exist_lock' mutex.

typically:
...
	mutex_lock(&chan->indio_dev->info_exist_lock);
	if (chan->indio_dev->info == NULL) {
		ret = -ENODEV;
		goto err_unlock;
	}
	ret = do_some_ops()
err_unlock:
	mutex_unlock(&chan->indio_dev->info_exist_lock);
	return ret;
...

Same mutex is also hold in iio_device_unregister().

The following deadlock warning happens when:
- the consumer device has called an API like iio_read_channel_raw()
  at least once.
- the consumer driver is unregistered, removed (unbind from sysfs)

======================================================
WARNING: possible circular locking dependency detected
4.19.24 #577 Not tainted
------------------------------------------------------
sh/372 is trying to acquire lock:
(kn->count#30){++++}, at: kernfs_remove_by_name_ns+0x3c/0x84

but task is already holding lock:
(&dev->info_exist_lock){+.+.}, at: iio_device_unregister+0x18/0x60

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&dev->info_exist_lock){+.+.}:
       __mutex_lock+0x70/0xa3c
       mutex_lock_nested+0x1c/0x24
       iio_read_channel_raw+0x1c/0x60
       iio_read_channel_info+0xa8/0xb0
       dev_attr_show+0x1c/0x48
       sysfs_kf_seq_show+0x84/0xec
       seq_read+0x154/0x528
       __vfs_read+0x2c/0x15c
       vfs_read+0x8c/0x110
       ksys_read+0x4c/0xac
       ret_fast_syscall+0x0/0x28
       0xbedefb60

-> #0 (kn->count#30){++++}:
       lock_acquire+0xd8/0x268
       __kernfs_remove+0x288/0x374
       kernfs_remove_by_name_ns+0x3c/0x84
       remove_files+0x34/0x78
       sysfs_remove_group+0x40/0x9c
       sysfs_remove_groups+0x24/0x34
       device_remove_attrs+0x38/0x64
       device_del+0x11c/0x360
       cdev_device_del+0x14/0x2c
       iio_device_unregister+0x24/0x60
       release_nodes+0x1bc/0x200
       device_release_driver_internal+0x1a0/0x230
       unbind_store+0x80/0x130
       kernfs_fop_write+0x100/0x1e4
       __vfs_write+0x2c/0x160
       vfs_write+0xa4/0x17c
       ksys_write+0x4c/0xac
       ret_fast_syscall+0x0/0x28
       0xbe906840

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&dev->info_exist_lock);
                               lock(kn->count#30);
                               lock(&dev->info_exist_lock);
  lock(kn->count#30);

 *** DEADLOCK ***
...

cdev_device_del() can be called without holding the lock. It should be safe
as info_exist_lock prevents kernelspace consumers to use the exported
routines during/after provider removal. cdev_device_del() is for userspace.

Help to reproduce:
See example: Documentation/devicetree/bindings/iio/afe/voltage-divider.txt
sysv {
	compatible = "voltage-divider";
	io-channels = <&adc 0>;
	output-ohms = <22>;
	full-ohms = <222>;
};

First, go to iio:deviceX for the "voltage-divider", do one read:
$ cd /sys/bus/iio/devices/iio:deviceX
$ cat in_voltage0_raw

Then, unbind the consumer driver. It triggers above deadlock warning.
$ cd /sys/bus/platform/drivers/iio-rescale/
$ echo sysv > unbind

Note I don't actually expect stable will pick this up all the
way back into IIO being in staging, but if's probably valid that
far back.

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Fixes: ac917a81117c ("staging:iio:core set the iio_dev.info pointer to null on unregister")
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 4700fd5d8c90a..9c4d92115504a 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1743,10 +1743,10 @@ EXPORT_SYMBOL(__iio_device_register);
  **/
 void iio_device_unregister(struct iio_dev *indio_dev)
 {
-	mutex_lock(&indio_dev->info_exist_lock);
-
 	cdev_device_del(&indio_dev->chrdev, &indio_dev->dev);
 
+	mutex_lock(&indio_dev->info_exist_lock);
+
 	iio_device_unregister_debugfs(indio_dev);
 
 	iio_disable_all_buffers(indio_dev);
-- 
GitLab


From 583e6361414903c5206258a30e5bd88cb03c0254 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Wed, 27 Mar 2019 22:35:35 +0200
Subject: [PATCH 0372/1861] net: stmmac: use correct DMA buffer size in the RX
 descriptor

We always program the maximum DMA buffer size into the receive descriptor,
although the allocated size may be less. E.g. with the default MTU size
we allocate only 1536 bytes. If somebody sends us a bigger frame, then
memory may get corrupted.

Fix by using exact buffer sizes.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/descs_com.h   | 22 ++++++++++++-------
 .../ethernet/stmicro/stmmac/dwmac4_descs.c    |  2 +-
 .../ethernet/stmicro/stmmac/dwxgmac2_descs.c  |  2 +-
 .../net/ethernet/stmicro/stmmac/enh_desc.c    | 10 ++++++---
 drivers/net/ethernet/stmicro/stmmac/hwif.h    |  2 +-
 .../net/ethernet/stmicro/stmmac/norm_desc.c   | 10 ++++++---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  6 +++--
 7 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
index 40d6356a7e73c..3dfb07a789525 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
@@ -29,11 +29,13 @@
 /* Specific functions used for Ring mode */
 
 /* Enhanced descriptors */
-static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end)
+static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end,
+					   int bfsize)
 {
-	p->des1 |= cpu_to_le32((BUF_SIZE_8KiB
-			<< ERDES1_BUFFER2_SIZE_SHIFT)
-		   & ERDES1_BUFFER2_SIZE_MASK);
+	if (bfsize == BUF_SIZE_16KiB)
+		p->des1 |= cpu_to_le32((BUF_SIZE_8KiB
+				<< ERDES1_BUFFER2_SIZE_SHIFT)
+			   & ERDES1_BUFFER2_SIZE_MASK);
 
 	if (end)
 		p->des1 |= cpu_to_le32(ERDES1_END_RING);
@@ -59,11 +61,15 @@ static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 }
 
 /* Normal descriptors */
-static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end)
+static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end, int bfsize)
 {
-	p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1)
-				<< RDES1_BUFFER2_SIZE_SHIFT)
-		    & RDES1_BUFFER2_SIZE_MASK);
+	if (bfsize >= BUF_SIZE_2KiB) {
+		int bfsize2;
+
+		bfsize2 = min(bfsize - BUF_SIZE_2KiB + 1, BUF_SIZE_2KiB - 1);
+		p->des1 |= cpu_to_le32((bfsize2 << RDES1_BUFFER2_SIZE_SHIFT)
+			    & RDES1_BUFFER2_SIZE_MASK);
+	}
 
 	if (end)
 		p->des1 |= cpu_to_le32(RDES1_END_RING);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 7fbb6a4dbf510..e061e9f5fad71 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -296,7 +296,7 @@ exit:
 }
 
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
-				   int mode, int end)
+				   int mode, int end, int bfsize)
 {
 	dwmac4_set_rx_owner(p, disable_rx_ic);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 1d858fdec9971..98fa471da7c0f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -123,7 +123,7 @@ static int dwxgmac2_get_rx_timestamp_status(void *desc, void *next_desc,
 }
 
 static void dwxgmac2_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
-				  int mode, int end)
+				  int mode, int end, int bfsize)
 {
 	dwxgmac2_set_rx_owner(p, disable_rx_ic);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 5ef91a790f9d1..e8855e6adb481 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -259,15 +259,19 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 }
 
 static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
-				  int mode, int end)
+				  int mode, int end, int bfsize)
 {
+	int bfsize1;
+
 	p->des0 |= cpu_to_le32(RDES0_OWN);
-	p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK);
+
+	bfsize1 = min(bfsize, BUF_SIZE_8KiB);
+	p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		ehn_desc_rx_set_on_chain(p);
 	else
-		ehn_desc_rx_set_on_ring(p, end);
+		ehn_desc_rx_set_on_ring(p, end, bfsize);
 
 	if (disable_rx_ic)
 		p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC);
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 92b8944f26e3c..5bb00234d961c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -33,7 +33,7 @@ struct dma_extended_desc;
 struct stmmac_desc_ops {
 	/* DMA RX descriptor ring initialization */
 	void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode,
-			int end);
+			int end, int bfsize);
 	/* DMA TX descriptor ring initialization */
 	void (*init_tx_desc)(struct dma_desc *p, int mode, int end);
 	/* Invoked by the xmit function to prepare the tx descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index de65bb29feba9..c55a9815b394c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -135,15 +135,19 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 }
 
 static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
-			       int end)
+			       int end, int bfsize)
 {
+	int bfsize1;
+
 	p->des0 |= cpu_to_le32(RDES0_OWN);
-	p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK);
+
+	bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
+	p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		ndesc_rx_set_on_chain(p, end);
 	else
-		ndesc_rx_set_on_ring(p, end);
+		ndesc_rx_set_on_ring(p, end, bfsize);
 
 	if (disable_rx_ic)
 		p->des1 |= cpu_to_le32(RDES1_DISABLE_IC);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6a2e1031a62ae..4e496cf655f28 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1136,11 +1136,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue)
 		if (priv->extend_desc)
 			stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic,
 					priv->use_riwt, priv->mode,
-					(i == DMA_RX_SIZE - 1));
+					(i == DMA_RX_SIZE - 1),
+					priv->dma_buf_sz);
 		else
 			stmmac_init_rx_desc(priv, &rx_q->dma_rx[i],
 					priv->use_riwt, priv->mode,
-					(i == DMA_RX_SIZE - 1));
+					(i == DMA_RX_SIZE - 1),
+					priv->dma_buf_sz);
 }
 
 /**
-- 
GitLab


From 972c9be784e077bc56472c78243e0326e525b689 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Wed, 27 Mar 2019 22:35:36 +0200
Subject: [PATCH 0373/1861] net: stmmac: ratelimit RX error logs

Ratelimit RX error logs.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4e496cf655f28..392d94cede175 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3433,9 +3433,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			 *  ignored
 			 */
 			if (frame_len > priv->dma_buf_sz) {
-				netdev_err(priv->dev,
-					   "len %d larger than size (%d)\n",
-					   frame_len, priv->dma_buf_sz);
+				if (net_ratelimit())
+					netdev_err(priv->dev,
+						   "len %d larger than size (%d)\n",
+						   frame_len, priv->dma_buf_sz);
 				priv->dev->stats.rx_length_errors++;
 				break;
 			}
@@ -3492,9 +3493,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			} else {
 				skb = rx_q->rx_skbuff[entry];
 				if (unlikely(!skb)) {
-					netdev_err(priv->dev,
-						   "%s: Inconsistent Rx chain\n",
-						   priv->dev->name);
+					if (net_ratelimit())
+						netdev_err(priv->dev,
+							   "%s: Inconsistent Rx chain\n",
+							   priv->dev->name);
 					priv->dev->stats.rx_dropped++;
 					break;
 				}
-- 
GitLab


From 07b3975352374c3f5ebb4a42ef0b253fe370542d Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Wed, 27 Mar 2019 22:35:37 +0200
Subject: [PATCH 0374/1861] net: stmmac: don't stop NAPI processing when
 dropping a packet

Currently, if we drop a packet, we exit from NAPI loop before the budget
is consumed. In some situations this will make the RX processing stall
e.g. when flood pinging the system with oversized packets, as the
errorneous packets are not dropped efficiently.

If we drop a packet, we should just continue to the next one as long as
the budget allows.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 392d94cede175..a26e36dbb5df0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3354,9 +3354,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 	struct stmmac_channel *ch = &priv->channel[queue];
-	unsigned int entry = rx_q->cur_rx;
+	unsigned int next_entry = rx_q->cur_rx;
 	int coe = priv->hw->rx_csum;
-	unsigned int next_entry;
 	unsigned int count = 0;
 	bool xmac;
 
@@ -3374,10 +3373,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 		stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
 	}
 	while (count < limit) {
-		int status;
+		int entry, status;
 		struct dma_desc *p;
 		struct dma_desc *np;
 
+		entry = next_entry;
+
 		if (priv->extend_desc)
 			p = (struct dma_desc *)(rx_q->dma_erx + entry);
 		else
@@ -3438,7 +3439,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 						   "len %d larger than size (%d)\n",
 						   frame_len, priv->dma_buf_sz);
 				priv->dev->stats.rx_length_errors++;
-				break;
+				continue;
 			}
 
 			/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
@@ -3473,7 +3474,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 						dev_warn(priv->device,
 							 "packet dropped\n");
 					priv->dev->stats.rx_dropped++;
-					break;
+					continue;
 				}
 
 				dma_sync_single_for_cpu(priv->device,
@@ -3498,7 +3499,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 							   "%s: Inconsistent Rx chain\n",
 							   priv->dev->name);
 					priv->dev->stats.rx_dropped++;
-					break;
+					continue;
 				}
 				prefetch(skb->data - NET_IP_ALIGN);
 				rx_q->rx_skbuff[entry] = NULL;
@@ -3533,7 +3534,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			priv->dev->stats.rx_packets++;
 			priv->dev->stats.rx_bytes += frame_len;
 		}
-		entry = next_entry;
 	}
 
 	stmmac_rx_refill(priv, queue);
-- 
GitLab


From 1b746ce8b397e58f9e40ce5c63b7198de6930482 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Wed, 27 Mar 2019 22:35:38 +0200
Subject: [PATCH 0375/1861] net: stmmac: don't overwrite discard_frame status

If we have error bits set, the discard_frame status will get overwritten
by checksum bit checks, which might set the status back to good one.
Fix by checking the COE status only if the frame is good.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index e8855e6adb481..c42ef6c729c08 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -231,9 +231,10 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 	 * It doesn't match with the information reported into the databook.
 	 * At any rate, we need to understand if the CSUM hw computation is ok
 	 * and report this info to the upper layers. */
-	ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR),
-				 !!(rdes0 & RDES0_FRAME_TYPE),
-				 !!(rdes0 & ERDES0_RX_MAC_ADDR));
+	if (likely(ret == good_frame))
+		ret = enh_desc_coe_rdes0(!!(rdes0 & RDES0_IPC_CSUM_ERROR),
+					 !!(rdes0 & RDES0_FRAME_TYPE),
+					 !!(rdes0 & ERDES0_RX_MAC_ADDR));
 
 	if (unlikely(rdes0 & RDES0_DRIBBLING))
 		x->dribbling_bit++;
-- 
GitLab


From 8ac0c24fe1c256af6644caf3d311029440ec2fbd Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Wed, 27 Mar 2019 22:35:39 +0200
Subject: [PATCH 0376/1861] net: stmmac: fix dropping of multi-descriptor RX
 frames

Packets without the last descriptor set should be dropped early. If we
receive a frame larger than the DMA buffer, the HW will continue using the
next descriptor. Driver mistakes these as individual frames, and sometimes
a truncated frame (without the LD set) may look like a valid packet.

This fixes a strange issue where the system replies to 4098-byte ping
although the MTU/DMA buffer size is set to 4096, and yet at the same
time it's logging an oversized packet.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index c42ef6c729c08..5202d6ad79194 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -201,6 +201,11 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 	if (unlikely(rdes0 & RDES0_OWN))
 		return dma_own;
 
+	if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
+		stats->rx_length_errors++;
+		return discard_frame;
+	}
+
 	if (unlikely(rdes0 & RDES0_ERROR_SUMMARY)) {
 		if (unlikely(rdes0 & RDES0_DESCRIPTOR_ERROR)) {
 			x->rx_desc++;
-- 
GitLab


From 057a0c5642a2ff2db7c421cdcde34294a23bf37b Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Wed, 27 Mar 2019 22:35:40 +0200
Subject: [PATCH 0377/1861] net: stmmac: don't log oversized frames

This is log is harmful as it can trigger multiple times per packet. Delete
it.

Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index c55a9815b394c..b7dd4e3c760d8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -91,8 +91,6 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 		return dma_own;
 
 	if (unlikely(!(rdes0 & RDES0_LAST_DESCRIPTOR))) {
-		pr_warn("%s: Oversized frame spanned multiple buffers\n",
-			__func__);
 		stats->rx_length_errors++;
 		return discard_frame;
 	}
-- 
GitLab


From 6f07e5f06c8712acc423485f657799fc8e11e56c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 31 Mar 2019 22:50:08 +0800
Subject: [PATCH 0378/1861] tipc: check bearer name with right length in
 tipc_nl_compat_bearer_enable

Syzbot reported the following crash:

BUG: KMSAN: uninit-value in memchr+0xce/0x110 lib/string.c:961
  memchr+0xce/0x110 lib/string.c:961
  string_is_valid net/tipc/netlink_compat.c:176 [inline]
  tipc_nl_compat_bearer_enable+0x2c4/0x910 net/tipc/netlink_compat.c:401
  __tipc_nl_compat_doit net/tipc/netlink_compat.c:321 [inline]
  tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:354
  tipc_nl_compat_handle net/tipc/netlink_compat.c:1162 [inline]
  tipc_nl_compat_recv+0x1ae7/0x2750 net/tipc/netlink_compat.c:1265
  genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
  genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
  netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
  genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
  netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
  netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
  netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
  sock_sendmsg_nosec net/socket.c:622 [inline]
  sock_sendmsg net/socket.c:632 [inline]

Uninit was created at:
  __alloc_skb+0x309/0xa20 net/core/skbuff.c:208
  alloc_skb include/linux/skbuff.h:1012 [inline]
  netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
  netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
  sock_sendmsg_nosec net/socket.c:622 [inline]
  sock_sendmsg net/socket.c:632 [inline]

It was triggered when the bearer name size < TIPC_MAX_BEARER_NAME,
it would check with a wrong len/TLV_GET_DATA_LEN(msg->req), which
also includes priority and disc_domain length.

This patch is to fix it by checking it with a right length:
'TLV_GET_DATA_LEN(msg->req) - offsetof(struct tipc_bearer_config, name)'.

Reported-by: syzbot+8b707430713eb46e1e45@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/netlink_compat.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4ad3586da8f02..5f8e53cca2220 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -397,7 +397,12 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
 	if (!bearer)
 		return -EMSGSIZE;
 
-	len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+	len = TLV_GET_DATA_LEN(msg->req);
+	len -= offsetof(struct tipc_bearer_config, name);
+	if (len <= 0)
+		return -EINVAL;
+
+	len = min_t(int, len, TIPC_MAX_BEARER_NAME);
 	if (!string_is_valid(b->name, len))
 		return -EINVAL;
 
-- 
GitLab


From 8c63bf9ab4be8b83bd8c34aacfd2f1d2c8901c8a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 31 Mar 2019 22:50:09 +0800
Subject: [PATCH 0379/1861] tipc: check link name with right length in
 tipc_nl_compat_link_set

A similar issue as fixed by Patch "tipc: check bearer name with right
length in tipc_nl_compat_bearer_enable" was also found by syzbot in
tipc_nl_compat_link_set().

The length to check with should be 'TLV_GET_DATA_LEN(msg->req) -
offsetof(struct tipc_link_config, name)'.

Reported-by: syzbot+de00a87b8644a582ae79@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/netlink_compat.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 5f8e53cca2220..0bfd03d67fdd9 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -771,7 +771,12 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
 
 	lc = (struct tipc_link_config *)TLV_DATA(msg->req);
 
-	len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+	len = TLV_GET_DATA_LEN(msg->req);
+	len -= offsetof(struct tipc_link_config, name);
+	if (len <= 0)
+		return -EINVAL;
+
+	len = min_t(int, len, TIPC_MAX_LINK_NAME);
 	if (!string_is_valid(lc->name, len))
 		return -EINVAL;
 
-- 
GitLab


From 2ac695d1d602ce00b12170242f58c3d3a8e36d04 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 31 Mar 2019 22:50:10 +0800
Subject: [PATCH 0380/1861] tipc: handle the err returned from cmd header
 function

Syzbot found a crash:

  BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872
  Call Trace:
    tipc_nl_compat_name_table_dump+0x54f/0xcd0 net/tipc/netlink_compat.c:872
    __tipc_nl_compat_dumpit+0x59e/0xda0 net/tipc/netlink_compat.c:215
    tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:280
    tipc_nl_compat_handle net/tipc/netlink_compat.c:1226 [inline]
    tipc_nl_compat_recv+0x1b5f/0x2750 net/tipc/netlink_compat.c:1265
    genl_family_rcv_msg net/netlink/genetlink.c:601 [inline]
    genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626
    netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
    genl_rcv+0x63/0x80 net/netlink/genetlink.c:637
    netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
    netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
    netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917
    sock_sendmsg_nosec net/socket.c:622 [inline]
    sock_sendmsg net/socket.c:632 [inline]

  Uninit was created at:
    __alloc_skb+0x309/0xa20 net/core/skbuff.c:208
    alloc_skb include/linux/skbuff.h:1012 [inline]
    netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
    netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892
    sock_sendmsg_nosec net/socket.c:622 [inline]
    sock_sendmsg net/socket.c:632 [inline]

It was supposed to be fixed on commit 974cb0e3e7c9 ("tipc: fix uninit-value
in tipc_nl_compat_name_table_dump") by checking TLV_GET_DATA_LEN(msg->req)
in cmd->header()/tipc_nl_compat_name_table_dump_header(), which is called
ahead of tipc_nl_compat_name_table_dump().

However, tipc_nl_compat_dumpit() doesn't handle the error returned from cmd
header function. It means even when the check added in that fix fails, it
won't stop calling tipc_nl_compat_name_table_dump(), and the issue will be
triggered again.

So this patch is to add the process for the err returned from cmd header
function in tipc_nl_compat_dumpit().

Reported-by: syzbot+3ce8520484b0d4e260a5@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/netlink_compat.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 0bfd03d67fdd9..340a6e7c43a7d 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -267,8 +267,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	if (msg->rep_type)
 		tipc_tlv_init(msg->rep, msg->rep_type);
 
-	if (cmd->header)
-		(*cmd->header)(msg);
+	if (cmd->header) {
+		err = (*cmd->header)(msg);
+		if (err) {
+			kfree_skb(msg->rep);
+			msg->rep = NULL;
+			return err;
+		}
+	}
 
 	arg = nlmsg_new(0, GFP_KERNEL);
 	if (!arg) {
-- 
GitLab


From 4fdcfab5b5537c21891e22e65996d4d0dd8ab4ca Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Mar 2019 01:39:50 +0000
Subject: [PATCH 0381/1861] jffs2: fix use-after-free on symlink traversal

free the symlink body after the same RCU delay we have for freeing the
struct inode itself, so that traversal during RCU pathwalk wouldn't step
into freed memory.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/jffs2/readinode.c | 5 -----
 fs/jffs2/super.c     | 5 ++++-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 389ea53ea4875..bccfc40b3a74a 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1414,11 +1414,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
 
 	jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
 
-	if (f->target) {
-		kfree(f->target);
-		f->target = NULL;
-	}
-
 	fds = f->dents;
 	while(fds) {
 		fd = fds;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bb6ae387469f4..05d892c79339f 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -47,7 +47,10 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
 static void jffs2_i_callback(struct rcu_head *head)
 {
 	struct inode *inode = container_of(head, struct inode, i_rcu);
-	kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
+	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+
+	kfree(f->target);
+	kmem_cache_free(jffs2_inode_cachep, f);
 }
 
 static void jffs2_destroy_inode(struct inode *inode)
-- 
GitLab


From 0cdc17ebd2072b6cdd3ec3695ea7ede745664a8b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Mar 2019 01:40:38 +0000
Subject: [PATCH 0382/1861] ubifs: fix use-after-free on symlink traversal

free the symlink body after the same RCU delay we have for freeing the
struct inode itself, so that traversal during RCU pathwalk wouldn't step
into freed memory.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ubifs/super.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 8dc2818fdd849..12628184772c0 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -276,14 +276,12 @@ static void ubifs_i_callback(struct rcu_head *head)
 {
 	struct inode *inode = container_of(head, struct inode, i_rcu);
 	struct ubifs_inode *ui = ubifs_inode(inode);
+	kfree(ui->data);
 	kmem_cache_free(ubifs_inode_slab, ui);
 }
 
 static void ubifs_destroy_inode(struct inode *inode)
 {
-	struct ubifs_inode *ui = ubifs_inode(inode);
-
-	kfree(ui->data);
 	call_rcu(&inode->i_rcu, ubifs_i_callback);
 }
 
-- 
GitLab


From 93b919da64c15b90953f96a536e5e61df896ca57 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Mar 2019 01:43:37 +0000
Subject: [PATCH 0383/1861] debugfs: fix use-after-free on symlink traversal

symlink body shouldn't be freed without an RCU delay.  Switch debugfs to
->destroy_inode() and use of call_rcu(); free both the inode and symlink
body in the callback.  Similar to solution for bpf, only here it's even
more obvious that ->evict_inode() can be dropped.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/debugfs/inode.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 95b5e78c22b1e..f25daa207421c 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -163,19 +163,24 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root)
 	return 0;
 }
 
-static void debugfs_evict_inode(struct inode *inode)
+static void debugfs_i_callback(struct rcu_head *head)
 {
-	truncate_inode_pages_final(&inode->i_data);
-	clear_inode(inode);
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	if (S_ISLNK(inode->i_mode))
 		kfree(inode->i_link);
+	free_inode_nonrcu(inode);
+}
+
+static void debugfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, debugfs_i_callback);
 }
 
 static const struct super_operations debugfs_super_operations = {
 	.statfs		= simple_statfs,
 	.remount_fs	= debugfs_remount,
 	.show_options	= debugfs_show_options,
-	.evict_inode	= debugfs_evict_inode,
+	.destroy_inode	= debugfs_destroy_inode,
 };
 
 static void debugfs_release_dentry(struct dentry *dentry)
-- 
GitLab


From c63adb28f6d913310430f14c69f0a2ea55eed0cc Mon Sep 17 00:00:00 2001
From: Annaliese McDermond <nh6z@nh6z.net>
Date: Sat, 30 Mar 2019 09:02:02 -0700
Subject: [PATCH 0384/1861] ASoC: tlv320aic32x4: Fix Common Pins

The common pins were mistakenly not added to the DAPM graph.
Adding these pins will allow valid graphs to be created.

Signed-off-by: Annaliese McDermond <nh6z@nh6z.net>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/tlv320aic32x4.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c
index 96f1526cb258a..5520044929f42 100644
--- a/sound/soc/codecs/tlv320aic32x4.c
+++ b/sound/soc/codecs/tlv320aic32x4.c
@@ -490,6 +490,8 @@ static const struct snd_soc_dapm_widget aic32x4_dapm_widgets[] = {
 	SND_SOC_DAPM_INPUT("IN2_R"),
 	SND_SOC_DAPM_INPUT("IN3_L"),
 	SND_SOC_DAPM_INPUT("IN3_R"),
+	SND_SOC_DAPM_INPUT("CM_L"),
+	SND_SOC_DAPM_INPUT("CM_R"),
 };
 
 static const struct snd_soc_dapm_route aic32x4_dapm_routes[] = {
-- 
GitLab


From 817b4d64da036f5559297a2fdb82b8b14f4ffdcd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 18 Mar 2019 23:00:54 +0300
Subject: [PATCH 0385/1861] ACPI / utils: Introduce
 acpi_dev_get_first_match_dev() helper

The acpi_dev_get_first_match_name() is missing put_device() call
and thus keeping reference counting unbalanced.

In order to fix the issue introduce a new helper to convert existing users
one-by-one to a better API.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/utils.c    | 24 ++++++++++++++++++++++--
 include/acpi/acpi_bus.h |  3 +++
 include/linux/acpi.h    |  6 ++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index c4b06cc075f93..5a2bae2b6c3aa 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -739,6 +739,7 @@ EXPORT_SYMBOL(acpi_dev_found);
 
 struct acpi_dev_match_info {
 	const char *dev_name;
+	struct acpi_device *adev;
 	struct acpi_device_id hid[2];
 	const char *uid;
 	s64 hrv;
@@ -759,6 +760,7 @@ static int acpi_dev_match_cb(struct device *dev, void *data)
 		return 0;
 
 	match->dev_name = acpi_dev_name(adev);
+	match->adev = adev;
 
 	if (match->hrv == -1)
 		return 1;
@@ -806,16 +808,34 @@ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv)
 EXPORT_SYMBOL(acpi_dev_present);
 
 /**
- * acpi_dev_get_first_match_name - Return name of first match of ACPI device
+ * acpi_dev_get_first_match_dev - Return the first match of ACPI device
  * @hid: Hardware ID of the device.
  * @uid: Unique ID of the device, pass NULL to not check _UID
  * @hrv: Hardware Revision of the device, pass -1 to not check _HRV
  *
- * Return device name if a matching device was present
+ * Return the first match of ACPI device if a matching device was present
  * at the moment of invocation, or NULL otherwise.
  *
+ * The caller is responsible to call put_device() on the returned device.
+ *
  * See additional information in acpi_dev_present() as well.
  */
+struct acpi_device *
+acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
+{
+	struct acpi_dev_match_info match = {};
+	struct device *dev;
+
+	strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id));
+	match.uid = uid;
+	match.hrv = hrv;
+
+	dev = bus_find_device(&acpi_bus_type, NULL, &match, acpi_dev_match_cb);
+	return dev ? match.adev : NULL;
+}
+EXPORT_SYMBOL(acpi_dev_get_first_match_dev);
+
+/* DEPRECATED, use acpi_dev_get_first_match_dev() instead */
 const char *
 acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv)
 {
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 0300374101cde..2063e9e2f3845 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -91,6 +91,9 @@ acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
 bool acpi_dev_found(const char *hid);
 bool acpi_dev_present(const char *hid, const char *uid, s64 hrv);
 
+struct acpi_device *
+acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv);
+
 const char *
 acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv);
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d5dcebd7aad33..3e1d16b00513f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -669,6 +669,12 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv)
 	return false;
 }
 
+static inline struct acpi_device *
+acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
+{
+	return NULL;
+}
+
 static inline const char *
 acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv)
 {
-- 
GitLab


From 0cf064db948aca1e760fc513594536f761dee1cd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:21 +0200
Subject: [PATCH 0386/1861] extcon: axp288: Convert to use
 acpi_dev_get_first_match_dev()

acpi_dev_get_first_match_name() is deprecated and going to be removed
because it leaks a reference.

Convert the driver to use acpi_dev_get_first_match_dev() instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/extcon/extcon-axp288.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c
index a983708b77a66..50f9402fb3253 100644
--- a/drivers/extcon/extcon-axp288.c
+++ b/drivers/extcon/extcon-axp288.c
@@ -333,7 +333,7 @@ static int axp288_extcon_probe(struct platform_device *pdev)
 	struct axp288_extcon_info *info;
 	struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent);
 	struct device *dev = &pdev->dev;
-	const char *name;
+	struct acpi_device *adev;
 	int ret, i, pirq;
 
 	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
@@ -357,9 +357,10 @@ static int axp288_extcon_probe(struct platform_device *pdev)
 		if (ret)
 			return ret;
 
-		name = acpi_dev_get_first_match_name("INT3496", NULL, -1);
-		if (name) {
-			info->id_extcon = extcon_get_extcon_dev(name);
+		adev = acpi_dev_get_first_match_dev("INT3496", NULL, -1);
+		if (adev) {
+			info->id_extcon = extcon_get_extcon_dev(acpi_dev_name(adev));
+			put_device(&adev->dev);
 			if (!info->id_extcon)
 				return -EPROBE_DEFER;
 
-- 
GitLab


From d00d2109c3679bf87d412b1667bcb6d42c1ac12f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:22 +0200
Subject: [PATCH 0387/1861] gpio: merrifield: Convert to use
 acpi_dev_get_first_match_dev()

acpi_dev_get_first_match_name() is deprecated and going to be removed
because it leaks a reference.

Convert the driver to use acpi_dev_get_first_match_dev() instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/gpio/gpio-merrifield.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c
index 7c659fdaa6d58..2383dc78b1235 100644
--- a/drivers/gpio/gpio-merrifield.c
+++ b/drivers/gpio/gpio-merrifield.c
@@ -377,10 +377,20 @@ static void mrfld_irq_init_hw(struct mrfld_gpio *priv)
 	}
 }
 
-static const char *mrfld_gpio_get_pinctrl_dev_name(void)
+static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv)
 {
-	const char *dev_name = acpi_dev_get_first_match_name("INTC1002", NULL, -1);
-	return dev_name ? dev_name : "pinctrl-merrifield";
+	struct acpi_device *adev;
+	const char *name;
+
+	adev = acpi_dev_get_first_match_dev("INTC1002", NULL, -1);
+	if (adev) {
+		name = devm_kstrdup(priv->dev, acpi_dev_name(adev), GFP_KERNEL);
+		put_device(&adev->dev);
+	} else {
+		name = "pinctrl-merrifield";
+	}
+
+	return name;
 }
 
 static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -441,7 +451,7 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
 		return retval;
 	}
 
-	pinctrl_dev_name = mrfld_gpio_get_pinctrl_dev_name();
+	pinctrl_dev_name = mrfld_gpio_get_pinctrl_dev_name(priv);
 	for (i = 0; i < ARRAY_SIZE(mrfld_gpio_ranges); i++) {
 		range = &mrfld_gpio_ranges[i];
 		retval = gpiochip_add_pin_range(&priv->chip,
-- 
GitLab


From 1b55f1c6fd64efd7b1339664edc1222ad99f9c9b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:23 +0200
Subject: [PATCH 0388/1861] ASoC: Intel: bytcht_da7213: Convert to use
 acpi_dev_get_first_match_dev()

acpi_dev_get_first_match_name() is deprecated and going to be removed
because it leaks a reference.

Convert the driver to use acpi_dev_get_first_match_dev() instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 sound/soc/intel/boards/bytcht_da7213.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c
index b8e884803777b..4decba3381565 100644
--- a/sound/soc/intel/boards/bytcht_da7213.c
+++ b/sound/soc/intel/boards/bytcht_da7213.c
@@ -226,7 +226,7 @@ static int bytcht_da7213_probe(struct platform_device *pdev)
 	struct snd_soc_card *card;
 	struct snd_soc_acpi_mach *mach;
 	const char *platform_name;
-	const char *i2c_name = NULL;
+	struct acpi_device *adev;
 	int dai_index = 0;
 	int ret_val = 0;
 	int i;
@@ -244,10 +244,11 @@ static int bytcht_da7213_probe(struct platform_device *pdev)
 	}
 
 	/* fixup codec name based on HID */
-	i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1);
-	if (i2c_name) {
+	adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1);
+	if (adev) {
 		snprintf(codec_name, sizeof(codec_name),
-			"%s%s", "i2c-", i2c_name);
+			 "i2c-%s", acpi_dev_name(adev));
+		put_device(&adev->dev);
 		dailink[dai_index].codec_name = codec_name;
 	}
 
-- 
GitLab


From 645056da677082811b978f6285bf1ec0be947340 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:24 +0200
Subject: [PATCH 0389/1861] ASoC: Intel: bytcht_es8316: Convert to use
 acpi_dev_get_first_match_dev()

acpi_dev_get_first_match_name() is deprecated and going to be removed
because it leaks a reference.

Convert the driver to use acpi_dev_get_first_match_dev() instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 sound/soc/intel/boards/bytcht_es8316.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c
index d2a7e6ba11aec..6937c00cf63d1 100644
--- a/sound/soc/intel/boards/bytcht_es8316.c
+++ b/sound/soc/intel/boards/bytcht_es8316.c
@@ -442,7 +442,7 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct snd_soc_acpi_mach *mach;
 	const char *platform_name;
-	const char *i2c_name = NULL;
+	struct acpi_device *adev;
 	struct device *codec_dev;
 	int dai_index = 0;
 	int i;
@@ -463,10 +463,11 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
 	}
 
 	/* fixup codec name based on HID */
-	i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1);
-	if (i2c_name) {
+	adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1);
+	if (adev) {
 		snprintf(codec_name, sizeof(codec_name),
-			"%s%s", "i2c-", i2c_name);
+			 "i2c-%s", acpi_dev_name(adev));
+		put_device(&adev->dev);
 		byt_cht_es8316_dais[dai_index].codec_name = codec_name;
 	}
 
-- 
GitLab


From a320d89e67d6a08af18603b538021087a41bb182 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:25 +0200
Subject: [PATCH 0390/1861] ASoC: Intel: bytcr_rt5640: Convert to use
 acpi_dev_get_first_match_dev()

acpi_dev_get_first_match_name() is deprecated and going to be removed
because it leaks a reference.

Convert the driver to use acpi_dev_get_first_match_dev() instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 940eb27158da7..f9175cf6747eb 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -1154,7 +1154,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
 	struct byt_rt5640_private *priv;
 	struct snd_soc_acpi_mach *mach;
 	const char *platform_name;
-	const char *i2c_name = NULL;
+	struct acpi_device *adev;
 	int ret_val = 0;
 	int dai_index = 0;
 	int i;
@@ -1178,11 +1178,11 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
 	}
 
 	/* fixup codec name based on HID */
-	i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1);
-	if (i2c_name) {
+	adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1);
+	if (adev) {
 		snprintf(byt_rt5640_codec_name, sizeof(byt_rt5640_codec_name),
-			"%s%s", "i2c-", i2c_name);
-
+			 "i2c-%s", acpi_dev_name(adev));
+		put_device(&adev->dev);
 		byt_rt5640_dais[dai_index].codec_name = byt_rt5640_codec_name;
 	}
 
-- 
GitLab


From 7075e9babb5db302907cf32b1db688eb83c85f77 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:26 +0200
Subject: [PATCH 0391/1861] ASoC: Intel: bytcr_rt5651: Convert to use
 acpi_dev_get_first_match_dev()

acpi_dev_get_first_match_name() is deprecated and going to be removed
because it leaks a reference.

Convert the driver to use acpi_dev_get_first_match_dev() instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 sound/soc/intel/boards/bytcr_rt5651.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
index b0a4d297176e0..b744add01d120 100644
--- a/sound/soc/intel/boards/bytcr_rt5651.c
+++ b/sound/soc/intel/boards/bytcr_rt5651.c
@@ -867,8 +867,8 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
 	struct byt_rt5651_private *priv;
 	struct snd_soc_acpi_mach *mach;
 	const char *platform_name;
+	struct acpi_device *adev;
 	struct device *codec_dev;
-	const char *i2c_name = NULL;
 	const char *hp_swapped;
 	bool is_bytcr = false;
 	int ret_val = 0;
@@ -894,14 +894,16 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
 	}
 
 	/* fixup codec name based on HID */
-	i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1);
-	if (!i2c_name) {
+	adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1);
+	if (adev) {
+		snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name),
+			 "i2c-%s", acpi_dev_name(adev));
+		put_device(&adev->dev);
+		byt_rt5651_dais[dai_index].codec_name = byt_rt5651_codec_name;
+	} else {
 		dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id);
 		return -ENODEV;
 	}
-	snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name),
-		"%s%s", "i2c-", i2c_name);
-	byt_rt5651_dais[dai_index].codec_name = byt_rt5651_codec_name;
 
 	codec_dev = bus_find_device_by_name(&i2c_bus_type, NULL,
 					    byt_rt5651_codec_name);
-- 
GitLab


From fe4c283a79db91ac0d4402a363024789275c3fd1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:27 +0200
Subject: [PATCH 0392/1861] ASoC: Intel: cht_bsw_rt5645: Convert to use
 acpi_dev_get_first_match_dev()

acpi_dev_get_first_match_name() is deprecated and going to be removed
because it leaks a reference.

Convert the driver to use acpi_dev_get_first_match_dev() instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 sound/soc/intel/boards/cht_bsw_rt5645.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
index cbc2d458483f3..32dbeaf1ab940 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c
@@ -532,7 +532,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	struct snd_soc_acpi_mach *mach;
 	const char *platform_name;
 	struct cht_mc_private *drv;
-	const char *i2c_name = NULL;
+	struct acpi_device *adev;
 	bool found = false;
 	bool is_bytcr = false;
 	int dai_index = 0;
@@ -573,10 +573,11 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 		}
 
 	/* fixup codec name based on HID */
-	i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1);
-	if (i2c_name) {
+	adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1);
+	if (adev) {
 		snprintf(cht_rt5645_codec_name, sizeof(cht_rt5645_codec_name),
-			"%s%s", "i2c-", i2c_name);
+			 "i2c-%s", acpi_dev_name(adev));
+		put_device(&adev->dev);
 		cht_dailink[dai_index].codec_name = cht_rt5645_codec_name;
 	}
 
-- 
GitLab


From b664e6fe2225972e0eb3df0bb8dbedd1a0fc641f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:28 +0200
Subject: [PATCH 0393/1861] ASoC: Intel: cht_bsw_rt5672: Convert to use
 acpi_dev_get_first_match_dev()

acpi_dev_get_first_match_name() is deprecated and going to be removed
because it leaks a reference.

Convert the driver to use acpi_dev_get_first_match_dev() instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 sound/soc/intel/boards/cht_bsw_rt5672.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c
index 3d5a2b3a06f08..0f7770822388e 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5672.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5672.c
@@ -401,7 +401,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	struct cht_mc_private *drv;
 	struct snd_soc_acpi_mach *mach = pdev->dev.platform_data;
 	const char *platform_name;
-	const char *i2c_name;
+	struct acpi_device *adev;
 	int i;
 
 	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
@@ -411,10 +411,11 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	strcpy(drv->codec_name, RT5672_I2C_DEFAULT);
 
 	/* fixup codec name based on HID */
-	i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1);
-	if (i2c_name) {
+	adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1);
+	if (adev) {
 		snprintf(drv->codec_name, sizeof(drv->codec_name),
-			 "i2c-%s", i2c_name);
+			 "i2c-%s", acpi_dev_name(adev));
+		put_device(&adev->dev);
 		for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) {
 			if (!strcmp(cht_dailink[i].codec_name,
 				RT5672_I2C_DEFAULT)) {
-- 
GitLab


From 257f9053c0204ea47491aa236004fd1226f75fa8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 28 Mar 2019 19:17:29 +0200
Subject: [PATCH 0394/1861] ACPI / utils: Remove deprecated function since no
 user left

There is no more user of acpi_dev_get_first_match_name(),
which is deprecated and has no user left, so, remove it for good.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/utils.c    | 16 ----------------
 include/acpi/acpi_bus.h |  3 ---
 include/linux/acpi.h    |  6 ------
 3 files changed, 25 deletions(-)

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 5a2bae2b6c3aa..89363b245489a 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -835,22 +835,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
 }
 EXPORT_SYMBOL(acpi_dev_get_first_match_dev);
 
-/* DEPRECATED, use acpi_dev_get_first_match_dev() instead */
-const char *
-acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv)
-{
-	struct acpi_dev_match_info match = {};
-	struct device *dev;
-
-	strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id));
-	match.uid = uid;
-	match.hrv = hrv;
-
-	dev = bus_find_device(&acpi_bus_type, NULL, &match, acpi_dev_match_cb);
-	return dev ? match.dev_name : NULL;
-}
-EXPORT_SYMBOL(acpi_dev_get_first_match_name);
-
 /*
  * acpi_backlight= handling, this is done here rather then in video_detect.c
  * because __setup cannot be used in modules.
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 2063e9e2f3845..f7981751ac776 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -94,9 +94,6 @@ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv);
 struct acpi_device *
 acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv);
 
-const char *
-acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv);
-
 #ifdef CONFIG_ACPI
 
 #include <linux/proc_fs.h>
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3e1d16b00513f..392413075cc0f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -675,12 +675,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
 	return NULL;
 }
 
-static inline const char *
-acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv)
-{
-	return NULL;
-}
-
 static inline bool is_acpi_node(struct fwnode_handle *fwnode)
 {
 	return false;
-- 
GitLab


From aefa763b18a220f5fc1d5ab02af09158b6cc36ea Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Mon, 1 Apr 2019 09:24:39 +0800
Subject: [PATCH 0395/1861] ACPI: video: Use vendor backlight on Sony VPCEH3U1E

On Sony Vaio VPCEH3U1E, ACPI backlight control does not work, and native
backlight works. Thus force use vendor backlight control on this system.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=202401
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/video_detect.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 43587ac680e47..0e0a3929e34e2 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -141,6 +141,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"),
 		},
 	},
+	{
+	.callback = video_detect_force_vendor,
+	.ident = "Sony VPCEH3U1E",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "VPCEH3U1E"),
+		},
+	},
 
 	/*
 	 * These models have a working acpi_video backlight control, and using
-- 
GitLab


From fd427103e8dfcb4b438269afd710b63e7af61463 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Mon, 25 Mar 2019 08:43:33 +0000
Subject: [PATCH 0396/1861] powerpc/32: Fix early boot failure with RTAS
 built-in

Commit 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing
stack pointer while in RTAS") changes the code to use a field in
thread struct to store the stack pointer while in RTAS instead of
using SPRN_SPRG2. It therefore converts all places which were
manipulating SPRN_SPRG2 to use that field. During early startup, the
zeroing of SPRN_SPRG2 has been replaced by a zeroing of that field in
thread struct. But at least in start_here, that's done wrongly because
it used the physical address of the fields while MMU is on at that
time.

So the virtual address of the field should be used instead, but in
the meantime, thread struct has already been zeroed and initialised
so we can just drop this initialisation.

Reported-by: Larry Finger <Larry.Finger@lwfinger.net>
Fixes: 0df977eafc79 ("powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Tested-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/head_32.S | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 48051c8977c56..e25b615e9f9e6 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -851,10 +851,6 @@ __secondary_start:
 	tophys(r4,r2)
 	addi	r4,r4,THREAD	/* phys address of our thread_struct */
 	mtspr	SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
-	li	r3,0
-	stw	r3, RTAS_SP(r4)		/* 0 => not in RTAS */
-#endif
 	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
 	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
 	mtspr	SPRN_SPRG_PGDIR, r4
@@ -941,10 +937,6 @@ start_here:
 	tophys(r4,r2)
 	addi	r4,r4,THREAD	/* init task's THREAD */
 	mtspr	SPRN_SPRG_THREAD,r4
-#ifdef CONFIG_PPC_RTAS
-	li	r3,0
-	stw	r3, RTAS_SP(r4)		/* 0 => not in RTAS */
-#endif
 	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
 	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
 	mtspr	SPRN_SPRG_PGDIR, r4
-- 
GitLab


From 42b1bd33dcdef4ffd98f695e188bab82f9fa46d8 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 29 Mar 2019 17:01:18 +0300
Subject: [PATCH 0397/1861] block/bfq: fix ifdef for CONFIG_BFQ_GROUP_IOSCHED=y
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace BFQ_GROUP_IOSCHED_ENABLED with CONFIG_BFQ_GROUP_IOSCHED.
Code under these ifdefs never worked, something might be broken.

Fixes: 0471559c2fbd ("block, bfq: add/remove entity weights correctly")
Fixes: 73d58118498b ("block, bfq: consider also ioprio classes in symmetry detection")
Reviewed-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 2 +-
 block/bfq-wf2q.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 4c592496a16a2..fac188dd78fa7 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -674,7 +674,7 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
 	 * at least two nodes.
 	 */
 	return !(varied_queue_weights || multiple_classes_busy
-#ifdef BFQ_GROUP_IOSCHED_ENABLED
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
 	       || bfqd->num_groups_with_pending_reqs > 0
 #endif
 		);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 63311d1ff1edf..a11bef75483d4 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1012,7 +1012,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
 		entity->on_st = true;
 	}
 
-#ifdef BFQ_GROUP_IOSCHED_ENABLED
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
 	if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */
 		struct bfq_group *bfqg =
 			container_of(entity, struct bfq_group, entity);
-- 
GitLab


From 74e7c6c877f620d65a8269692d089bbd066f626c Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 29 Mar 2019 14:13:23 +0800
Subject: [PATCH 0398/1861] HID: i2c-hid: Disable runtime PM on Synaptics
 touchpad

We have a new Dell laptop which has the synaptics I2C touchpad
(06cb:7e7e) on it. After booting up the Linux, the touchpad doesn't
work, there is no interrupt when touching the touchpad, after
disable the runtime PM, everything works well.

I also tried the quirk of I2C_HID_QUIRK_DELAY_AFTER_SLEEP, it is
better after applied this quirk, there are interrupts but data it
reports is invalid.

Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h              | 1 +
 drivers/hid/i2c-hid/i2c-hid-core.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index b6d93f4ad037e..adce58f24f763 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1083,6 +1083,7 @@
 #define USB_DEVICE_ID_SYNAPTICS_HD	0x0ac3
 #define USB_DEVICE_ID_SYNAPTICS_QUAD_HD	0x1ac3
 #define USB_DEVICE_ID_SYNAPTICS_TP_V103	0x5710
+#define I2C_DEVICE_ID_SYNAPTICS_7E7E	0x7e7e
 
 #define USB_VENDOR_ID_TEXAS_INSTRUMENTS	0x2047
 #define USB_DEVICE_ID_TEXAS_INSTRUMENTS_LENOVO_YOGA	0x0855
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 90164fed08d35..4d1f24ee249c4 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -184,6 +184,8 @@ static const struct i2c_hid_quirks {
 		I2C_HID_QUIRK_NO_RUNTIME_PM },
 	{ USB_VENDOR_ID_ELAN, HID_ANY_ID,
 		 I2C_HID_QUIRK_BOGUS_IRQ },
+	{ USB_VENDOR_ID_SYNAPTICS, I2C_DEVICE_ID_SYNAPTICS_7E7E,
+		I2C_HID_QUIRK_NO_RUNTIME_PM },
 	{ 0, 0 }
 };
 
-- 
GitLab


From 344bf332ceb2364a2fcd3ab4133dce5ea35c2594 Mon Sep 17 00:00:00 2001
From: Muchun Song <smuchun@gmail.com>
Date: Sat, 30 Mar 2019 21:13:46 +0800
Subject: [PATCH 0399/1861] arm64: mm: fix incorrect assignment of 'max_mapnr'

Although we don't actually make use of the 'max_mapnr' global variable,
we do set it to a junk value for !CONFIG_FLATMEM configurations that
leave mem_map uninitialised.

To avoid somebody tripping over this in future, set 'max_mapnr' using
max_pfn, which is calculated directly from the memblock information.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Muchun Song <smuchun@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 6bc135042f5e4..c29b17b520cd7 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -535,7 +535,7 @@ void __init mem_init(void)
 	else
 		swiotlb_force = SWIOTLB_NO_FORCE;
 
-	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
+	set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
 	free_unused_memmap();
-- 
GitLab


From faa3604eda325588451c9c1eb4c8a8d04c1cd633 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Sat, 30 Mar 2019 05:50:38 +0800
Subject: [PATCH 0400/1861] x86/resctrl: Fix typos in the mba_sc mount option

The user can control the MBA memory bandwidth in MBps (Mega
Bytes per second) units of the MBA Software Controller (mba_sc)
by using the "mba_MBps" mount option. For details, see
Documentation/x86/resctrl_ui.txt.

However, commit

  23bf1b6be9c2 ("kernfs, sysfs, cgroup, intel_rdt: Support fs_context")

changed the mount option name from "mba_MBps" to "mba_mpbs" by mistake.

Change it back from to "mba_MBps" because it is user-visible, and
correct "Opt_mba_mpbs" spelling to "Opt_mba_mbps".

 [ bp: massage commit message. ]

Fixes: 23bf1b6be9c2 ("kernfs, sysfs, cgroup, intel_rdt: Support fs_context")
Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: dhowells@redhat.com
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: pei.p.jia@intel.com
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1553896238-22130-1-git-send-email-xiaochen.shen@intel.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 399601eda8e43..54b9eef3eea97 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2039,14 +2039,14 @@ out:
 enum rdt_param {
 	Opt_cdp,
 	Opt_cdpl2,
-	Opt_mba_mpbs,
+	Opt_mba_mbps,
 	nr__rdt_params
 };
 
 static const struct fs_parameter_spec rdt_param_specs[] = {
 	fsparam_flag("cdp",		Opt_cdp),
 	fsparam_flag("cdpl2",		Opt_cdpl2),
-	fsparam_flag("mba_mpbs",	Opt_mba_mpbs),
+	fsparam_flag("mba_MBps",	Opt_mba_mbps),
 	{}
 };
 
@@ -2072,7 +2072,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_cdpl2:
 		ctx->enable_cdpl2 = true;
 		return 0;
-	case Opt_mba_mpbs:
+	case Opt_mba_mbps:
 		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 			return -EINVAL;
 		ctx->enable_mba_mbps = true;
-- 
GitLab


From b506bc975f60f06e13e74adb35e708a23dc4e87c Mon Sep 17 00:00:00 2001
From: Dust Li <dust.li@linux.alibaba.com>
Date: Mon, 1 Apr 2019 16:04:53 +0800
Subject: [PATCH 0401/1861] tcp: fix a potential NULL pointer dereference in
 tcp_sk_exit

 When tcp_sk_init() failed in inet_ctl_sock_create(),
 'net->ipv4.tcp_congestion_control' will be left
 uninitialized, but tcp_sk_exit() hasn't check for
 that.

 This patch add checking on 'net->ipv4.tcp_congestion_control'
 in tcp_sk_exit() to prevent NULL-ptr dereference.

Fixes: 6670e1524477 ("tcp: Namespace-ify sysctl_tcp_default_congestion_control")
Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 277d71239d755..2f8039a26b08f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2578,7 +2578,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
 {
 	int cpu;
 
-	module_put(net->ipv4.tcp_congestion_control->owner);
+	if (net->ipv4.tcp_congestion_control)
+		module_put(net->ipv4.tcp_congestion_control->owner);
 
 	for_each_possible_cpu(cpu)
 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
-- 
GitLab


From a145b5b0e48783d0cd3ee605ed00b133d5c8ffed Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 29 Mar 2019 16:51:52 +0000
Subject: [PATCH 0402/1861] drm/i915: Always backoff after a drm_modeset_lock()
 deadlock

If drm_modeset_lock() reports a deadlock it sets the ctx->contexted
field and insists that the caller calls drm_modeset_backoff() or else it
generates a WARN on cleanup.

<4> [1601.870376] WARNING: CPU: 3 PID: 8445 at drivers/gpu/drm/drm_modeset_lock.c:228 drm_modeset_drop_locks+0x35/0x40
<4> [1601.870395] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_temp_thermal i915 coretemp crct10dif_pclmul
<6> [1601.870403] Console: switching
<4> [1601.870403]  snd_hda_intel
<4> [1601.870406] to colour frame buffer device 320x90
<4> [1601.870406]  crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel e1000e snd_hda_core cdc_ether ptp usbnet mii pps_core snd_pcm i2c_i801 mei_me mei prime_numbers
<4> [1601.870422] CPU: 3 PID: 8445 Comm: cat Tainted: G     U            5.0.0-rc7-CI-CI_DRM_5650+ #1
<4> [1601.870424] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.2402.AD3.1810170014 10/17/2018
<4> [1601.870427] RIP: 0010:drm_modeset_drop_locks+0x35/0x40
<4> [1601.870430] Code: 29 48 8b 43 60 48 8d 6b 60 48 39 c5 74 19 48 8b 43 60 48 8d b8 70 ff ff ff e8 87 ff ff ff 48 8b 43 60 48 39 c5 75 e7 5b 5d c3 <0f> 0b eb d3 0f 1f 80 00 00 00 00 41 56 41 55 41 54 55 53 48 8b 6f
<4> [1601.870432] RSP: 0018:ffffc90000d67ce8 EFLAGS: 00010282
<4> [1601.870435] RAX: 00000000ffffffdd RBX: ffffc90000d67d00 RCX: 5dbbe23d00000000
<4> [1601.870437] RDX: 0000000000000000 RSI: 0000000093e6194a RDI: ffffc90000d67d00
<4> [1601.870439] RBP: ffff88849e62e678 R08: 0000000003b7329a R09: 0000000000000001
<4> [1601.870441] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888492100410
<4> [1601.870442] R13: ffff88849ea50958 R14: ffff8884a67eb028 R15: ffff8884a67eb028
<4> [1601.870445] FS:  00007fa7a27745c0(0000) GS:ffff8884aff80000(0000) knlGS:0000000000000000
<4> [1601.870447] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [1601.870449] CR2: 000055af07e66000 CR3: 00000004a8cc2006 CR4: 0000000000760ee0
<4> [1601.870451] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
<4> [1601.870453] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
<4> [1601.870454] PKRU: 55555554
<4> [1601.870456] Call Trace:
<4> [1601.870505]  i915_dsc_fec_support_show+0x91/0x190 [i915]
<4> [1601.870522]  seq_read+0xdb/0x3c0
<4> [1601.870531]  full_proxy_read+0x51/0x80
<4> [1601.870538]  __vfs_read+0x31/0x190
<4> [1601.870546]  ? __se_sys_newfstat+0x3c/0x60
<4> [1601.870552]  vfs_read+0x9e/0x150
<4> [1601.870557]  ksys_read+0x50/0xc0
<4> [1601.870564]  do_syscall_64+0x55/0x190
<4> [1601.870569]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [1601.870572] RIP: 0033:0x7fa7a226d081
<4> [1601.870574] Code: fe ff ff 48 8d 3d 67 9c 0a 00 48 83 ec 08 e8 a6 4c 02 00 66 0f 1f 44 00 00 48 8d 05 81 08 2e 00 8b 00 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53
<4> [1601.870576] RSP: 002b:00007ffcc05140c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
<4> [1601.870579] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fa7a226d081
<4> [1601.870581] RDX: 0000000000020000 RSI: 000055af07e63000 RDI: 0000000000000007
<4> [1601.870583] RBP: 0000000000020000 R08: 000000000000007b R09: 0000000000000000
<4> [1601.870585] R10: 000055af07e60010 R11: 0000000000000246 R12: 000055af07e63000
<4> [1601.870587] R13: 0000000000000007 R14: 000055af07e634bf R15: 0000000000020000

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109745
Fixes: e845f099f1c6 ("drm/i915/dsc: Add Per connector debugfs node for DSC support/enable")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Manasi Navare <manasi.d.navare@intel.com>
Reviewed-by: Manasi Navare <manasi.d.navare@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190329165152.29259-1-chris@chris-wilson.co.uk
(cherry picked from commit ee6df5694a9a2e30566ae05e9c145a0f6d5e087f)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0bd890c04fe4f..f6f6e5b78e978 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4830,7 +4830,10 @@ static int i915_dsc_fec_support_show(struct seq_file *m, void *data)
 		ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
 				       &ctx);
 		if (ret) {
-			ret = -EINTR;
+			if (ret == -EDEADLK && !drm_modeset_backoff(&ctx)) {
+				try_again = true;
+				continue;
+			}
 			break;
 		}
 		crtc = connector->state->crtc;
-- 
GitLab


From 8c1074f690bca6c6c8d79fc4a5752635cd0bfbe0 Mon Sep 17 00:00:00 2001
From: Bert Kenward <bkenward@solarflare.com>
Date: Mon, 1 Apr 2019 13:24:00 +0100
Subject: [PATCH 0403/1861] MAINTAINERS: net: update Solarflare maintainers

Cc: Martin Habets <mhabets@solarflare.com>
Signed-off-by: Bert Kenward <bkenward@solarflare.com>
Acked-by: Martin Habets <mhabets@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1fdc8970e8161..ade9e18488d23 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13982,7 +13982,7 @@ F:	drivers/media/rc/serial_ir.c
 SFC NETWORK DRIVER
 M:	Solarflare linux maintainers <linux-net-drivers@solarflare.com>
 M:	Edward Cree <ecree@solarflare.com>
-M:	Bert Kenward <bkenward@solarflare.com>
+M:	Martin Habets <mhabets@solarflare.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/sfc/
-- 
GitLab


From 8c83f2df9c6578ea4c5b940d8238ad8a41b87e9e Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Mon, 1 Apr 2019 09:17:32 -0400
Subject: [PATCH 0404/1861] vrf: check accept_source_route on the original
 netdevice

Configuration check to accept source route IP options should be made on
the incoming netdevice when the skb->dev is an l3mdev master. The route
lookup for the source route next hop also needs the incoming netdev.

v2->v3:
- Simplify by passing the original netdevice down the stack (per David
  Ahern).

Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      | 2 +-
 net/ipv4/ip_input.c   | 7 +++----
 net/ipv4/ip_options.c | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index be3cad9c2e4c3..583526aad1d0a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -677,7 +677,7 @@ int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
 			     unsigned char __user *data, int optlen);
 void ip_options_undo(struct ip_options *opt);
 void ip_forward_options(struct sk_buff *skb);
-int ip_options_rcv_srr(struct sk_buff *skb);
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
 
 /*
  *	Functions provided by ip_sockglue.c
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ecce2dc78f17e..1132d6d1796a4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -257,11 +257,10 @@ int ip_local_deliver(struct sk_buff *skb)
 		       ip_local_deliver_finish);
 }
 
-static inline bool ip_rcv_options(struct sk_buff *skb)
+static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_options *opt;
 	const struct iphdr *iph;
-	struct net_device *dev = skb->dev;
 
 	/* It looks as overkill, because not all
 	   IP options require packet mangling.
@@ -297,7 +296,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
 			}
 		}
 
-		if (ip_options_rcv_srr(skb))
+		if (ip_options_rcv_srr(skb, dev))
 			goto drop;
 	}
 
@@ -353,7 +352,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 	}
 #endif
 
-	if (iph->ihl > 5 && ip_rcv_options(skb))
+	if (iph->ihl > 5 && ip_rcv_options(skb, dev))
 		goto drop;
 
 	rt = skb_rtable(skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 32a35043c9f59..3db31bb9df506 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -612,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb)
 	}
 }
 
-int ip_options_rcv_srr(struct sk_buff *skb)
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	int srrspace, srrptr;
@@ -647,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 
 		orefdst = skb->_skb_refdst;
 		skb_dst_set(skb, NULL);
-		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
+		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
 		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
 			skb_dst_drop(skb);
-- 
GitLab


From b83f28e1e38a8324eaa5e55f2c7ee2f75e748f08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Tue, 12 Feb 2019 09:52:04 +0100
Subject: [PATCH 0405/1861] i40e: move i40e_xsk_umem function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The i40e_xsk_umem function was explicitly inlined in i40e.h. There is
no reason for that, so move it to i40e_main.c instead.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h      | 14 --------------
 drivers/net/ethernet/intel/i40e/i40e_main.c | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d684998ba2b03..cc583ad5236b7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1096,20 +1096,6 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
 	return !!vsi->xdp_prog;
 }
 
-static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
-{
-	bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
-	int qid = ring->queue_index;
-
-	if (ring_is_xdp(ring))
-		qid -= ring->vsi->alloc_queue_pairs;
-
-	if (!xdp_on)
-		return NULL;
-
-	return xdp_get_umem_from_qid(ring->vsi->netdev, qid);
-}
-
 int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
 int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
 int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index da62218eb70ad..dd77793a08a0d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3063,6 +3063,26 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
 			    ring->queue_index);
 }
 
+/**
+ * i40e_xsk_umem - Retrieve the AF_XDP ZC if XDP and ZC is enabled
+ * @ring: The Tx or Rx ring
+ *
+ * Returns the UMEM or NULL.
+ **/
+static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
+{
+	bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
+	int qid = ring->queue_index;
+
+	if (ring_is_xdp(ring))
+		qid -= ring->vsi->alloc_queue_pairs;
+
+	if (!xdp_on)
+		return NULL;
+
+	return xdp_get_umem_from_qid(ring->vsi->netdev, qid);
+}
+
 /**
  * i40e_configure_tx_ring - Configure a transmit ring context and rest
  * @ring: The Tx ring to configure
-- 
GitLab


From 2b64b2ed277ff23e785fbdb65098ee7e1252d64f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 Mar 2019 14:18:21 -0300
Subject: [PATCH 0406/1861] perf trace: Add 'string' event alias to select
 syscalls with string args
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Will be used in conjunction with the change to augmented_raw_syscalls.c
in the next cset that adds all syscalls with a first or second arg
string.

With just what we have in the syscall tracepoints we get:

  # perf trace -e string ls > /dev/null
         ? (         ): ls/22382  ... [continued]: execve())                                           = 0
     0.043 ( 0.004 ms): ls/22382 access(filename: 0x51ad420, mode: R)                                  = -1 ENOENT (No such file or directory)
     0.051 ( 0.004 ms): ls/22382 openat(dfd: CWD, filename: 0x51aa8b3, flags: RDONLY|CLOEXEC)          = 3
     0.071 ( 0.004 ms): ls/22382 openat(dfd: CWD, filename: 0x51b4d00, flags: RDONLY|CLOEXEC)          = 3
     0.138 ( 0.009 ms): ls/22382 openat(dfd: CWD, filename: 0x51684d0, flags: RDONLY|CLOEXEC)          = 3
     0.192 ( 0.004 ms): ls/22382 openat(dfd: CWD, filename: 0x51689c0, flags: RDONLY|CLOEXEC)          = 3
     0.255 ( 0.004 ms): ls/22382 openat(dfd: CWD, filename: 0x5168eb0, flags: RDONLY|CLOEXEC)          = 3
     0.342 ( 0.003 ms): ls/22382 openat(dfd: CWD, filename: 0x51693a0, flags: RDONLY|CLOEXEC)          = 3
     0.380 ( 0.003 ms): ls/22382 openat(dfd: CWD, filename: 0x5169950, flags: RDONLY|CLOEXEC)          = 3
     0.670 ( 0.011 ms): ls/22382 statfs(pathname: 0x515c783, buf: 0x7fff54d75b70)                      = 0
     0.683 ( 0.005 ms): ls/22382 statfs(pathname: 0x515c783, buf: 0x7fff54d75a60)                      = 0
     0.725 ( 0.004 ms): ls/22382 access(filename: 0x515c7ab)                                           = 0
     0.744 ( 0.005 ms): ls/22382 openat(dfd: CWD, filename: 0x50fba20, flags: RDONLY|CLOEXEC)          = 3
     0.793 ( 0.004 ms): ls/22382 openat(dfd: CWD, filename: 0x9e3e8390, flags: RDONLY|CLOEXEC|DIRECTORY|NONBLOCK) = 3
     0.921 ( 0.006 ms): ls/22382 openat(dfd: CWD, filename: 0x50f7d90)                                 = 3
  #

If we put the vfs_getname probe point in place:

  # perf probe 'vfs_getname=getname_flags:73 pathname=result->name:string'
  Added new events:
    probe:vfs_getname    (on getname_flags:73 with pathname=result->name:string)
    probe:vfs_getname_1  (on getname_flags:73 with pathname=result->name:string)

  You can now use it in all perf tools, such as:

	perf record -e probe:vfs_getname_1 -aR sleep 1

  # perf trace -e string ls > /dev/null
         ? (         ): ls/22440  ... [continued]: execve())                                           = 0
     0.048 ( 0.008 ms): ls/22440 access(filename: /etc/ld.so.preload, mode: R)                         = -1 ENOENT (No such file or directory)
     0.061 ( 0.007 ms): ls/22440 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: RDONLY|CLOEXEC)   = 3
     0.092 ( 0.008 ms): ls/22440 openat(dfd: CWD, filename: /lib64/libselinux.so.1, flags: RDONLY|CLOEXEC) = 3
     0.165 ( 0.007 ms): ls/22440 openat(dfd: CWD, filename: /lib64/libcap.so.2, flags: RDONLY|CLOEXEC) = 3
     0.216 ( 0.007 ms): ls/22440 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: RDONLY|CLOEXEC)   = 3
     0.282 ( 0.007 ms): ls/22440 openat(dfd: CWD, filename: /lib64/libpcre2-8.so.0, flags: RDONLY|CLOEXEC) = 3
     0.340 ( 0.007 ms): ls/22440 openat(dfd: CWD, filename: /lib64/libdl.so.2, flags: RDONLY|CLOEXEC)  = 3
     0.383 ( 0.007 ms): ls/22440 openat(dfd: CWD, filename: /lib64/libpthread.so.0, flags: RDONLY|CLOEXEC) = 3
     0.697 ( 0.021 ms): ls/22440 statfs(pathname: /sys/fs/selinux, buf: 0x7ffee7dc9010)                = 0
     0.720 ( 0.007 ms): ls/22440 statfs(pathname: /sys/fs/selinux, buf: 0x7ffee7dc8f00)                = 0
     0.757 ( 0.007 ms): ls/22440 access(filename: /etc/selinux/config)                                 = 0
     0.779 ( 0.009 ms): ls/22440 openat(dfd: CWD, filename: /usr/lib/locale/locale-archive, flags: RDONLY|CLOEXEC) = 3
     0.830 ( 0.006 ms): ls/22440 openat(dfd: CWD, filename: ., flags: RDONLY|CLOEXEC|DIRECTORY|NONBLOCK) = 3
     0.958 ( 0.010 ms): ls/22440 openat(dfd: CWD, filename: /usr/lib64/gconv/gconv-modules.cache)      = 3
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Luis Cláudio Gonçalves <lclaudio@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-6fh1myvn7ulf4xwq9iz3o776@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/strace/groups/string | 65 +++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 tools/perf/trace/strace/groups/string

diff --git a/tools/perf/trace/strace/groups/string b/tools/perf/trace/strace/groups/string
new file mode 100644
index 0000000000000..c87129a3e3c43
--- /dev/null
+++ b/tools/perf/trace/strace/groups/string
@@ -0,0 +1,65 @@
+access
+acct
+add_key
+chdir
+chmod
+chown
+chroot
+creat
+delete_module
+execve
+execveat
+faccessat
+fchmodat
+fchownat
+fgetxattr
+finit_module
+fremovexattr
+fsetxattr
+futimesat
+getxattr
+inotify_add_watch
+lchown
+lgetxattr
+link
+linkat
+listxattr
+llistxattr
+lremovexattr
+lsetxattr
+lstat
+memfd_create
+mkdir
+mkdirat
+mknod
+mknodat
+mq_open
+mq_timedsend
+mq_unlink
+name_to_handle_at
+newfstatat
+open
+openat
+pivot_root
+pwrite64
+quotactl
+readlink
+readlinkat
+removexattr
+rename
+renameat
+renameat2
+request_key
+rmdir
+setxattr
+stat
+statfs
+statx
+swapoff
+swapon
+symlink
+symlinkat
+truncate
+unlink
+unlinkat
+utimensat
-- 
GitLab


From c52a82f7796306d2d2aafadba47617da619b0918 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 Mar 2019 14:25:14 -0300
Subject: [PATCH 0407/1861] perf augmented_raw_syscalls: Copy strings from all
 syscalls with 1st or 2nd string arg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gets the augmented_raw_syscalls a bit more useful as-is, add a comment
stating that the intent is to have all this in a map populated by
userspace via the 'syscalls' BPF map, that right now has only a flag
stating if the syscall is filtered or not.

With it:

  # grep -B1 augmented_raw ~/.perfconfig
  [trace]
	add_events = /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o
  #
  # perf trace -e string
  weechat/6001 stat("/etc/localtime", 0x7ffe22c23d10)  = 0
  gnome-shell/1943 openat(AT_FDCWD, "/proc/self/stat", O_RDONLY) = 81
  weechat/6001 stat("/etc/localtime", 0x7ffe22c23d10)  = 0
  gmain/2475 inotify_add_watch(20<anon_inode:inotify>, "/home/acme/.config/firewall", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2391 inotify_add_watch(3<anon_inode:inotify>, "", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2391 inotify_add_watch(3<anon_inode:inotify>, "/var/cache/app-info/yaml", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2391 inotify_add_watch(3<anon_inode:inotify>, "/var/lib/app-info/xmls", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2391 inotify_add_watch(3<anon_inode:inotify>, "/var/lib/app-info/yaml", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2391 inotify_add_watch(3<anon_inode:inotify>, "/usr/share/app-info/yaml", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2391 inotify_add_watch(3<anon_inode:inotify>, "/usr/local/share/app-info/xmls", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2391 inotify_add_watch(3<anon_inode:inotify>, "/usr/local/share/app-info/yaml", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2391 inotify_add_watch(3<anon_inode:inotify>, "/home/acme/.local/share/app-info/yaml", 16789454) = -1 ENOENT (No such file or directory)
  gmain/1121 inotify_add_watch(12<anon_inode:inotify>, "/etc/NetworkManager/VPN", 16789454) = -1 ENOENT (No such file or directory)
  weechat/6001 stat("/etc/localtime", 0x7ffe22c23d10)  = 0
  gmain/2050 inotify_add_watch(8<anon_inode:inotify>, "/home/acme/~", 16789454) = -1 ENOENT (No such file or directory)
  gmain/2521 inotify_add_watch(6<anon_inode:inotify>, "/var/lib/fwupd/remotes.d/lvfs-testing", 16789454) = -1 ENOENT (No such file or directory)
  weechat/6001 stat("/etc/localtime", 0x7ffe22c23d10)  = 0
  DOM Worker/22714  ... [continued]: openat())             = 257
  FS Broker 3982/3990 openat(AT_FDCWD, "/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY) = 187
  DOMCacheThread/16652 mkdir("/home/acme/.mozilla/firefox/ina67tev.default/storage/default/https+++web.whatsapp.com/cache/morgue/192", S_IRUGO|S_IXUGO|S_IWUSR) = -1 EEXIST (File exists)
  ^C#

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Luis Cláudio Gonçalves <lclaudio@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-a1hxffoy8t43e0wq6bzhp23u@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../examples/bpf/augmented_raw_syscalls.c     | 150 +++++++++++++++++-
 1 file changed, 147 insertions(+), 3 deletions(-)

diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index f9b2161e1ca49..9f8b31ad7a490 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -44,9 +44,78 @@ struct augmented_filename {
 	char		value[256];
 };
 
-#define SYS_OPEN 2
-#define SYS_ACCESS 21
-#define SYS_OPENAT 257
+/* syscalls where the first arg is a string */
+#define SYS_OPEN                 2
+#define SYS_STAT                 4
+#define SYS_LSTAT                6
+#define SYS_ACCESS              21
+#define SYS_EXECVE              59
+#define SYS_TRUNCATE            76
+#define SYS_CHDIR               80
+#define SYS_RENAME              82
+#define SYS_MKDIR               83
+#define SYS_RMDIR               84
+#define SYS_CREAT               85
+#define SYS_LINK                86
+#define SYS_UNLINK              87
+#define SYS_SYMLINK             88
+#define SYS_READLINK            89
+#define SYS_CHMOD               90
+#define SYS_CHOWN               92
+#define SYS_LCHOWN              94
+#define SYS_MKNOD              133
+#define SYS_STATFS             137
+#define SYS_PIVOT_ROOT         155
+#define SYS_CHROOT             161
+#define SYS_ACCT               163
+#define SYS_SWAPON             167
+#define SYS_SWAPOFF            168
+#define SYS_DELETE_MODULE      176
+#define SYS_SETXATTR           188
+#define SYS_LSETXATTR          189
+#define SYS_GETXATTR           191
+#define SYS_LGETXATTR          192
+#define SYS_LISTXATTR          194
+#define SYS_LLISTXATTR         195
+#define SYS_REMOVEXATTR        197
+#define SYS_LREMOVEXATTR       198
+#define SYS_MQ_OPEN            240
+#define SYS_MQ_UNLINK          241
+#define SYS_ADD_KEY            248
+#define SYS_REQUEST_KEY        249
+#define SYS_SYMLINKAT          266
+#define SYS_MEMFD_CREATE       319
+
+/* syscalls where the first arg is a string */
+
+#define SYS_PWRITE64            18
+#define SYS_EXECVE              59
+#define SYS_RENAME              82
+#define SYS_QUOTACTL           179
+#define SYS_FSETXATTR          190
+#define SYS_FGETXATTR          193
+#define SYS_FREMOVEXATTR       199
+#define SYS_MQ_TIMEDSEND       242
+#define SYS_REQUEST_KEY        249
+#define SYS_INOTIFY_ADD_WATCH  254
+#define SYS_OPENAT             257
+#define SYS_MKDIRAT            258
+#define SYS_MKNODAT            259
+#define SYS_FCHOWNAT           260
+#define SYS_FUTIMESAT          261
+#define SYS_NEWFSTATAT         262
+#define SYS_UNLINKAT           263
+#define SYS_RENAMEAT           264
+#define SYS_LINKAT             265
+#define SYS_READLINKAT         267
+#define SYS_FCHMODAT           268
+#define SYS_FACCESSAT          269
+#define SYS_UTIMENSAT          280
+#define SYS_NAME_TO_HANDLE_AT  303
+#define SYS_FINIT_MODULE       313
+#define SYS_RENAMEAT2          316
+#define SYS_EXECVEAT           322
+#define SYS_STATX              332
 
 pid_filter(pids_filtered);
 
@@ -109,11 +178,86 @@ int sys_enter(struct syscall_enter_args *args)
 	 *
 	 * 	 after the ctx memory access to prevent their down stream merging.
 	 */
+	/*
+	 * This table of what args are strings will be provided by userspace,
+	 * in the syscalls map, i.e. we will already have to do the lookup to
+	 * see if this specific syscall is filtered, so we can as well get more
+	 * info about what syscall args are strings or pointers, and how many
+	 * bytes to copy, per arg, etc.
+	 *
+	 * For now hard code it, till we have all the basic mechanisms in place
+	 * to automate everything and make the kernel part be completely driven
+	 * by information obtained in userspace for each kernel version and
+	 * processor architecture, making the kernel part the same no matter what
+	 * kernel version or processor architecture it runs on.
+	 */
 	switch (augmented_args.args.syscall_nr) {
+	case SYS_ACCT:
+	case SYS_ADD_KEY:
+	case SYS_CHDIR:
+	case SYS_CHMOD:
+	case SYS_CHOWN:
+	case SYS_CHROOT:
+	case SYS_CREAT:
+	case SYS_DELETE_MODULE:
+	case SYS_EXECVE:
+	case SYS_GETXATTR:
+	case SYS_LCHOWN:
+	case SYS_LGETXATTR:
+	case SYS_LINK:
+	case SYS_LISTXATTR:
+	case SYS_LLISTXATTR:
+	case SYS_LREMOVEXATTR:
+	case SYS_LSETXATTR:
+	case SYS_LSTAT:
+	case SYS_MEMFD_CREATE:
+	case SYS_MKDIR:
+	case SYS_MKNOD:
+	case SYS_MQ_OPEN:
+	case SYS_MQ_UNLINK:
+	case SYS_PIVOT_ROOT:
+	case SYS_READLINK:
+	case SYS_REMOVEXATTR:
+	case SYS_RENAME:
+	case SYS_REQUEST_KEY:
+	case SYS_RMDIR:
+	case SYS_SETXATTR:
+	case SYS_STAT:
+	case SYS_STATFS:
+	case SYS_SWAPOFF:
+	case SYS_SWAPON:
+	case SYS_SYMLINK:
+	case SYS_SYMLINKAT:
+	case SYS_TRUNCATE:
+	case SYS_UNLINK:
 	case SYS_ACCESS:
 	case SYS_OPEN:	 filename_arg = (const void *)args->args[0];
 			__asm__ __volatile__("": : :"memory");
 			 break;
+	case SYS_EXECVEAT:
+	case SYS_FACCESSAT:
+	case SYS_FCHMODAT:
+	case SYS_FCHOWNAT:
+	case SYS_FGETXATTR:
+	case SYS_FINIT_MODULE:
+	case SYS_FREMOVEXATTR:
+	case SYS_FSETXATTR:
+	case SYS_FUTIMESAT:
+	case SYS_INOTIFY_ADD_WATCH:
+	case SYS_LINKAT:
+	case SYS_MKDIRAT:
+	case SYS_MKNODAT:
+	case SYS_MQ_TIMEDSEND:
+	case SYS_NAME_TO_HANDLE_AT:
+	case SYS_NEWFSTATAT:
+	case SYS_PWRITE64:
+	case SYS_QUOTACTL:
+	case SYS_READLINKAT:
+	case SYS_RENAMEAT:
+	case SYS_RENAMEAT2:
+	case SYS_STATX:
+	case SYS_UNLINKAT:
+	case SYS_UTIMENSAT:
 	case SYS_OPENAT: filename_arg = (const void *)args->args[1];
 			 break;
 	}
-- 
GitLab


From 59f3bd7802d3ff7e6ddcce600f361bed288a97dd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 Mar 2019 14:48:26 -0300
Subject: [PATCH 0408/1861] perf augmented_raw_syscalls: Use a PERCPU_ARRAY map
 to copy more string bytes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous method, copying to the BPF stack limited us in how many
bytes we could copy from strings, use a PERCPU_ARRAY map like devised by
the sysdig guys[1] to copy more bytes:

Before:

  # trace --no-inherit -e openat touch `python -c "print "$s" 'a' * 2000"`
  touch: cannot touch 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa': File name too long
  openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", O_CREAT|O_NOCTTY|O_NONBLOCK|O_WRONLY, S_IRUGO|S_IWUGO) = -1 ENAMETOOLONG (File name too long)
  openat(AT_FDCWD, "/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/usr/share/locale/en_US.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
  openat(AT_FDCWD, "/usr/share/locale/en_US.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
  <SNIP some openat calls>
  #

After:

  [root@quaco acme]# trace --no-inherit -e openat touch `python -c "print "$s" 'a' * 2000"`
  <STRIP what is the same as in the 'before' part>
  openat(AT_FDCWD, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", O_CREAT|O_NOCTTY|O_NONBLOC) = -1 ENAMETOOLONG (File name too long)
  <STRIP what is the same as in the 'before' part>

If we leave something like 'perf trace -e string' to trace all syscalls
with a string, and then do some 'perf top', to get some annotation for
the augmented_raw_syscalls.o BPF program we get:

       │     → callq  *ffffffffc45576d1                                                                                                          ▒
       │                augmented_args->filename.size = probe_read_str(&augmented_args->filename.value,                                          ▒
  0.05 │       mov    %eax,0x40(%r13)

Looking with pahole, expanding types, asking for hex offsets and sizes,
and use of BTF type information to see what is at that 0x40 offset from
%r13:

  # pahole -F btf -C augmented_args_filename --expand_types --hex /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o
  struct augmented_args_filename {
	struct syscall_enter_args {
		long long unsigned int common_tp_fields;                                 /*     0   0x8 */
		long int           syscall_nr;                                           /*   0x8   0x8 */
		long unsigned int  args[6];                                              /*  0x10  0x30 */
	} args; /*     0  0x40 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct augmented_filename {
		unsigned int       size;                                                 /*  0x40   0x4 */
		int                reserved;                                             /*  0x44   0x4 */
		char               value[4096];                                          /*  0x48 0x1000 */
	} filename; /*  0x40 0x1008 */

	/* size: 4168, cachelines: 66, members: 2 */
	/* last cacheline: 8 bytes */
  };
  #

Then looking if PATH_MAX leaves some signature in the tests:

       │                if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) {                                            ▒
       │       cmp    $0xfff,%rdi

0xfff == 4095
sizeof(augmented_args->filename.value) == PATH_MAX == 4096

[1] https://sysdig.com/blog/the-art-of-writing-ebpf-programs-a-primer/

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Daniel Borkmann <borkmann@iogearbox.net>
Cc: Gianluca Borello <g.borello@gmail.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Luis Cláudio Gonçalves <lclaudio@redhat.com>
cc: Martin Lau <kafai@fb.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Yonghong Song <yhs@fb.com>
Link: https://lkml.kernel.org/n/tip-76gce2d2ghzq537ubwhjkone@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../examples/bpf/augmented_raw_syscalls.c     | 46 +++++++++++--------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 9f8b31ad7a490..2422894a81946 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -15,6 +15,7 @@
  */
 
 #include <unistd.h>
+#include <linux/limits.h>
 #include <pid_filter.h>
 
 /* bpf-output associated map */
@@ -41,7 +42,7 @@ struct syscall_exit_args {
 struct augmented_filename {
 	unsigned int	size;
 	int		reserved;
-	char		value[256];
+	char		value[PATH_MAX];
 };
 
 /* syscalls where the first arg is a string */
@@ -119,23 +120,32 @@ struct augmented_filename {
 
 pid_filter(pids_filtered);
 
+struct augmented_args_filename {
+       struct syscall_enter_args args;
+       struct augmented_filename filename;
+};
+
+bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1);
+
 SEC("raw_syscalls:sys_enter")
 int sys_enter(struct syscall_enter_args *args)
 {
-	struct {
-		struct syscall_enter_args args;
-		struct augmented_filename filename;
-	} augmented_args;
-	struct syscall *syscall;
-	unsigned int len = sizeof(augmented_args);
+	struct augmented_args_filename *augmented_args;
+	unsigned int len = sizeof(*augmented_args);
 	const void *filename_arg = NULL;
+	struct syscall *syscall;
+	int key = 0;
+
+        augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);
+        if (augmented_args == NULL)
+                return 1;
 
 	if (pid_filter__has(&pids_filtered, getpid()))
 		return 0;
 
-	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+	probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
 
-	syscall = bpf_map_lookup_elem(&syscalls, &augmented_args.args.syscall_nr);
+	syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);
 	if (syscall == NULL || !syscall->enabled)
 		return 0;
 	/*
@@ -191,7 +201,7 @@ int sys_enter(struct syscall_enter_args *args)
 	 * processor architecture, making the kernel part the same no matter what
 	 * kernel version or processor architecture it runs on.
 	 */
-	switch (augmented_args.args.syscall_nr) {
+	switch (augmented_args->args.syscall_nr) {
 	case SYS_ACCT:
 	case SYS_ADD_KEY:
 	case SYS_CHDIR:
@@ -263,20 +273,20 @@ int sys_enter(struct syscall_enter_args *args)
 	}
 
 	if (filename_arg != NULL) {
-		augmented_args.filename.reserved = 0;
-		augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
-							      sizeof(augmented_args.filename.value),
+		augmented_args->filename.reserved = 0;
+		augmented_args->filename.size = probe_read_str(&augmented_args->filename.value,
+							      sizeof(augmented_args->filename.value),
 							      filename_arg);
-		if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
-			len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
-			len &= sizeof(augmented_args.filename.value) - 1;
+		if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) {
+			len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size;
+			len &= sizeof(augmented_args->filename.value) - 1;
 		}
 	} else {
-		len = sizeof(augmented_args.args);
+		len = sizeof(augmented_args->args);
 	}
 
 	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
-	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len);
 }
 
 SEC("raw_syscalls:sys_exit")
-- 
GitLab


From b64f1cc6d02ce92f7752545c0bd82dc052013167 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 27 Mar 2019 10:16:43 -0300
Subject: [PATCH 0409/1861] perf trace beauty renameat: No need to include
 linux/fs.h

There is no use for what is in that file, as everything is
built by the tools/perf/trace/beauty/rename_flags.sh script from
the copied kernel headers, the end result being:

  $ cat /tmp/build/perf/trace/beauty/generated/rename_flags_array.c
  static const char *rename_flags[] = {
	[0 + 1] = "NOREPLACE",
	[1 + 1] = "EXCHANGE",
	[2 + 1] = "WHITEOUT",
  };
  $

I.e. no use of any defines from uapi/linux/fs.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-lgugmfa8z4bpw5zsbuoitllb@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/renameat.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/trace/beauty/renameat.c b/tools/perf/trace/beauty/renameat.c
index 6dab340cc506b..852d2e271833f 100644
--- a/tools/perf/trace/beauty/renameat.c
+++ b/tools/perf/trace/beauty/renameat.c
@@ -2,7 +2,6 @@
 // Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 
 #include "trace/beauty/beauty.h"
-#include <uapi/linux/fs.h>
 
 static size_t renameat2__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
 {
-- 
GitLab


From 514c54039da970f953164c1960d0284f87db969d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 27 Mar 2019 15:22:52 -0300
Subject: [PATCH 0410/1861] perf tools: Add header defining used namespace
 struct to event.h

When adding the 'struct namespaces_event' to event.h, referencing the
'struct perf_ns_link_info' type, we forgot to add the header where it is
defined, getting that definition only by sheer luck.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Fixes: f3b3614a284d ("perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info")
Link: https://lkml.kernel.org/n/tip-qkrld0v7boc9uabjbd8csxux@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 36ae7e92dab1d..4e908ec1ef649 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -6,6 +6,7 @@
 #include <stdio.h>
 #include <linux/kernel.h>
 #include <linux/bpf.h>
+#include <linux/perf_event.h>
 
 #include "../perf.h"
 #include "build-id.h"
-- 
GitLab


From 7fcfa9a2d9a7c1b428d61992c2deaa9e37a437b0 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Fri, 29 Mar 2019 14:33:37 +0100
Subject: [PATCH 0411/1861] perf list: Fix s390 counter long description for
 L1D_RO_EXCL_WRITES

Command

  # perf list --long-desc pmu

lists the long description of the available counters. For counter
named L1D_RO_EXCL_WRITES on machine types 3906 and 3907 the long
description contains the counter number 'Counter:128 Name:'
prefix. This is wrong.

The fix changes the description text and removes this prefix.

Output before:

  [root@m35lp76 perf]# ./perf list --long-desc pmu
   ...
   L1D_ONDRAWER_L4_SOURCED_WRITES
    [A directory write to the Level-1 Data cache directory where the
     returned cache line was sourced from On-Drawer Level-4 cache]

   L1D_RO_EXCL_WRITES
    [Counter:128 Name:L1D_RO_EXCL_WRITES A directory write to the Level-1
     Data cache where the line was originally in a Read-Only state in the
     cache but has been updated to be in the Exclusive state that allows
     stores to the cache line]

   ...

Output after:

  [root@m35lp76 perf]# ./perf list --long-desc pmu
   ...
   L1D_ONDRAWER_L4_SOURCED_WRITES
    [A directory write to the Level-1 Data cache directory where the
     returned cache line was sourced from On-Drawer Level-4 cache]

   L1D_RO_EXCL_WRITES
    [L1D_RO_EXCL_WRITES A directory write to the Level-1
     Data cache where the line was originally in a Read-Only state in the
     cache but has been updated to be in the Exclusive state that allows
     stores to the cache line]

   ...

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Fixes: 109d59b900e7 ("perf vendor events s390: Add JSON files for IBM z14")
Link: http://lkml.kernel.org/r/20190329133337.60255-1-tmricht@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/s390/cf_z14/extended.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
index e7a3524b748f0..68618152ea2c6 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
@@ -4,7 +4,7 @@
 		"EventCode": "128",
 		"EventName": "L1D_RO_EXCL_WRITES",
 		"BriefDescription": "L1D Read-only Exclusive Writes",
-		"PublicDescription": "Counter:128	Name:L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+		"PublicDescription": "L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
 	},
 	{
 		"Unit": "CPU-M-CF",
-- 
GitLab


From c2b3c170db610896e4e633cba2135045333811c2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 26 Mar 2019 15:18:20 -0700
Subject: [PATCH 0412/1861] perf stat: Revert checks for duration_time

This reverts e864c5ca145e ("perf stat: Hide internal duration_time
counter") but doing it manually since the code has now moved to a
different file.

The next patch will properly implement duration_time as a full event, so
no need to hide it anymore.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190326221823.11518-2-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 6d043c78f3c20..3324f23c7efcf 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -18,11 +18,6 @@
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
 
-static bool is_duration_time(struct perf_evsel *evsel)
-{
-	return !strcmp(evsel->name, "duration_time");
-}
-
 static void print_running(struct perf_stat_config *config,
 			  u64 run, u64 ena)
 {
@@ -628,9 +623,6 @@ static void print_aggr(struct perf_stat_config *config,
 		ad.id = id = config->aggr_map->map[s];
 		first = true;
 		evlist__for_each_entry(evlist, counter) {
-			if (is_duration_time(counter))
-				continue;
-
 			ad.val = ad.ena = ad.run = 0;
 			ad.nr = 0;
 			if (!collect_data(config, counter, aggr_cb, &ad))
@@ -848,8 +840,6 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 		if (prefix)
 			fputs(prefix, config->output);
 		evlist__for_each_entry(evlist, counter) {
-			if (is_duration_time(counter))
-				continue;
 			if (first) {
 				aggr_printout(config, counter, cpu, 0);
 				first = false;
@@ -906,8 +896,6 @@ static void print_metric_headers(struct perf_stat_config *config,
 
 	/* Print metrics headers only */
 	evlist__for_each_entry(evlist, counter) {
-		if (is_duration_time(counter))
-			continue;
 		os.evsel = counter;
 		out.ctx = &os;
 		out.print_metric = print_metric_header;
@@ -1136,15 +1124,11 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
 		break;
 	case AGGR_THREAD:
 		evlist__for_each_entry(evlist, counter) {
-			if (is_duration_time(counter))
-				continue;
 			print_aggr_thread(config, _target, counter, prefix);
 		}
 		break;
 	case AGGR_GLOBAL:
 		evlist__for_each_entry(evlist, counter) {
-			if (is_duration_time(counter))
-				continue;
 			print_counter_aggr(config, counter, prefix);
 		}
 		if (metric_only)
@@ -1155,8 +1139,6 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
 			print_no_aggr_metric(config, evlist, prefix);
 		else {
 			evlist__for_each_entry(evlist, counter) {
-				if (is_duration_time(counter))
-					continue;
 				print_counter(config, counter, prefix);
 			}
 		}
-- 
GitLab


From f0fbb114e3025f3f737a1e1c5c39c5b2b2e671bd Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 26 Mar 2019 15:18:21 -0700
Subject: [PATCH 0413/1861] perf stat: Implement duration_time as a proper
 event

The perf metric expression use 'duration_time' internally to normalize
events.  Normal 'perf stat' without -x also prints the duration time.
But when using -x, the interval is not output anywhere, which is
inconvenient for any post processing which often wants to normalize
values to time.

So implement 'duration_time' as a proper perf event that can be
specified explicitely with -e.

The previous implementation of 'duration_time' only worked for metric
processing. This adds the concept of a tool event that is handled by the
tool. On the kernel level it is still mapped to the dummy software
event, but the values are not read anymore, but instead computed by the
tool.

Add proper plumbing to handle this in the event parser, and display it
in 'perf stat'. We don't want 'duration_time' to be added up, so it's
only printed for the first CPU.

% perf stat -e duration_time,cycles true

 Performance counter stats for 'true':

           555,476 ns   duration_time
           771,958      cycles

       0.000555476 seconds time elapsed

       0.000644000 seconds user
       0.000000000 seconds sys

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190326221823.11518-3-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      | 28 ++++++++++++++++++-------
 tools/perf/util/evsel.h        |  6 ++++++
 tools/perf/util/parse-events.c | 38 +++++++++++++++++++++++++++++-----
 tools/perf/util/parse-events.h |  4 ++++
 tools/perf/util/parse-events.l | 11 +++++++++-
 tools/perf/util/parse-events.y | 12 +++++++++++
 6 files changed, 86 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 49ee3c2033ecb..7f9c4b7f5d69a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -244,11 +244,25 @@ perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
 					   process_synthesized_event, NULL);
 }
 
+static int read_single_counter(struct perf_evsel *counter, int cpu,
+			       int thread, struct timespec *rs)
+{
+	if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
+		u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
+		struct perf_counts_values *count =
+			perf_counts(counter->counts, cpu, thread);
+		count->ena = count->run = val;
+		count->val = val;
+		return 0;
+	}
+	return perf_evsel__read_counter(counter, cpu, thread);
+}
+
 /*
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
  */
-static int read_counter(struct perf_evsel *counter)
+static int read_counter(struct perf_evsel *counter, struct timespec *rs)
 {
 	int nthreads = thread_map__nr(evsel_list->threads);
 	int ncpus, cpu, thread;
@@ -275,7 +289,7 @@ static int read_counter(struct perf_evsel *counter)
 			 * (via perf_evsel__read_counter) and sets threir count->loaded.
 			 */
 			if (!count->loaded &&
-			    perf_evsel__read_counter(counter, cpu, thread)) {
+			    read_single_counter(counter, cpu, thread, rs)) {
 				counter->counts->scaled = -1;
 				perf_counts(counter->counts, cpu, thread)->ena = 0;
 				perf_counts(counter->counts, cpu, thread)->run = 0;
@@ -304,13 +318,13 @@ static int read_counter(struct perf_evsel *counter)
 	return 0;
 }
 
-static void read_counters(void)
+static void read_counters(struct timespec *rs)
 {
 	struct perf_evsel *counter;
 	int ret;
 
 	evlist__for_each_entry(evsel_list, counter) {
-		ret = read_counter(counter);
+		ret = read_counter(counter, rs);
 		if (ret)
 			pr_debug("failed to read counter %s\n", counter->name);
 
@@ -323,11 +337,11 @@ static void process_interval(void)
 {
 	struct timespec ts, rs;
 
-	read_counters();
-
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	diff_timespec(&rs, &ts, &ref_time);
 
+	read_counters(&rs);
+
 	if (STAT_RECORD) {
 		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
 			pr_err("failed to write stat round event\n");
@@ -593,7 +607,7 @@ try_again:
 	 * avoid arbitrary skew, we must read all counters before closing any
 	 * group leaders.
 	 */
-	read_counters();
+	read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
 	perf_evlist__close(evsel_list);
 
 	return WEXITSTATUS(status);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 0f2c6c93d7215..6d190cbf10702 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -75,6 +75,11 @@ struct perf_stat_evsel;
 
 typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data);
 
+enum perf_tool_event {
+	PERF_TOOL_NONE		= 0,
+	PERF_TOOL_DURATION_TIME = 1,
+};
+
 /** struct perf_evsel - event selector
  *
  * @evlist - evlist this evsel is in, if it is in one.
@@ -121,6 +126,7 @@ struct perf_evsel {
 	unsigned int		sample_size;
 	int			id_pos;
 	int			is_pos;
+	enum perf_tool_event	tool_event;
 	bool			uniquified_name;
 	bool			snapshot;
 	bool 			supported;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5ef4939408f2a..98c0fadaedb92 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -317,10 +317,12 @@ static struct perf_evsel *
 __add_event(struct list_head *list, int *idx,
 	    struct perf_event_attr *attr,
 	    char *name, struct perf_pmu *pmu,
-	    struct list_head *config_terms, bool auto_merge_stats)
+	    struct list_head *config_terms, bool auto_merge_stats,
+	    const char *cpu_list)
 {
 	struct perf_evsel *evsel;
-	struct cpu_map *cpus = pmu ? pmu->cpus : NULL;
+	struct cpu_map *cpus = pmu ? pmu->cpus :
+			       cpu_list ? cpu_map__new(cpu_list) : NULL;
 
 	event_attr_init(attr);
 
@@ -348,7 +350,25 @@ static int add_event(struct list_head *list, int *idx,
 		     struct perf_event_attr *attr, char *name,
 		     struct list_head *config_terms)
 {
-	return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
+	return __add_event(list, idx, attr, name, NULL, config_terms, false, NULL) ? 0 : -ENOMEM;
+}
+
+static int add_event_tool(struct list_head *list, int *idx,
+			  enum perf_tool_event tool_event)
+{
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_DUMMY,
+	};
+
+	evsel = __add_event(list, idx, &attr, NULL, NULL, NULL, false, "0");
+	if (!evsel)
+		return -ENOMEM;
+	evsel->tool_event = tool_event;
+	if (tool_event == PERF_TOOL_DURATION_TIME)
+		evsel->unit = strdup("ns");
+	return 0;
 }
 
 static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -1233,6 +1253,13 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
 			 get_config_name(head_config), &config_terms);
 }
 
+int parse_events_add_tool(struct parse_events_state *parse_state,
+			  struct list_head *list,
+			  enum perf_tool_event tool_event)
+{
+	return add_event_tool(list, &parse_state->idx, tool_event);
+}
+
 int parse_events_add_pmu(struct parse_events_state *parse_state,
 			 struct list_head *list, char *name,
 			 struct list_head *head_config,
@@ -1267,7 +1294,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 	if (!head_config) {
 		attr.type = pmu->type;
-		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
+		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL,
+				    auto_merge_stats, NULL);
 		if (evsel) {
 			evsel->pmu_name = name;
 			evsel->use_uncore_alias = use_uncore_alias;
@@ -1295,7 +1323,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 	evsel = __add_event(list, &parse_state->idx, &attr,
 			    get_config_name(head_config), pmu,
-			    &config_terms, auto_merge_stats);
+			    &config_terms, auto_merge_stats, NULL);
 	if (evsel) {
 		evsel->unit = info.unit;
 		evsel->scale = info.scale;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 5ed035cbcbb72..0c1f5b98f6366 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -160,6 +160,10 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
 			     struct list_head *list,
 			     u32 type, u64 config,
 			     struct list_head *head_config);
+enum perf_tool_event;
+int parse_events_add_tool(struct parse_events_state *parse_state,
+			  struct list_head *list,
+			  enum perf_tool_event tool_event);
 int parse_events_add_cache(struct list_head *list, int *idx,
 			   char *type, char *op_result1, char *op_result2,
 			   struct parse_events_error *error,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 7805c71aaae2e..c54bfe88626c1 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -15,6 +15,7 @@
 #include "../perf.h"
 #include "parse-events.h"
 #include "parse-events-bison.h"
+#include "evsel.h"
 
 char *parse_events_get_text(yyscan_t yyscanner);
 YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
@@ -154,6 +155,14 @@ static int sym(yyscan_t scanner, int type, int config)
 	return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
 }
 
+static int tool(yyscan_t scanner, enum perf_tool_event event)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = event;
+	return PE_VALUE_SYM_TOOL;
+}
+
 static int term(yyscan_t scanner, int type)
 {
 	YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -322,7 +331,7 @@ cpu-migrations|migrations			{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU
 alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
 emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
 dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
-duration_time					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+duration_time					{ return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
 bpf-output					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
 
 	/*
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 44819bdb037da..6ad8d4914969b 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include "util.h"
 #include "pmu.h"
+#include "evsel.h"
 #include "debug.h"
 #include "parse-events.h"
 #include "parse-events-bison.h"
@@ -45,6 +46,7 @@ static void inc_group_count(struct list_head *list,
 
 %token PE_START_EVENTS PE_START_TERMS
 %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM
+%token PE_VALUE_SYM_TOOL
 %token PE_EVENT_NAME
 %token PE_NAME
 %token PE_BPF_OBJECT PE_BPF_SOURCE
@@ -58,6 +60,7 @@ static void inc_group_count(struct list_head *list,
 %type <num> PE_VALUE
 %type <num> PE_VALUE_SYM_HW
 %type <num> PE_VALUE_SYM_SW
+%type <num> PE_VALUE_SYM_TOOL
 %type <num> PE_RAW
 %type <num> PE_TERM
 %type <str> PE_NAME
@@ -321,6 +324,15 @@ value_sym sep_slash_slash_dc
 	ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL));
 	$$ = list;
 }
+|
+PE_VALUE_SYM_TOOL sep_slash_slash_dc
+{
+	struct list_head *list;
+
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_tool(_parse_state, list, $1));
+	$$ = list;
+}
 
 event_legacy_cache:
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
-- 
GitLab


From 3371f389e4be6efc496ca395b21911a8f2c2d23f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 26 Mar 2019 15:18:22 -0700
Subject: [PATCH 0414/1861] perf evsel: Support printing evsel name for
 'duration_time'

Implement printing the correct name for duration_time

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190326221823.11518-4-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 66d066f18b5b2..84cfb9fe2fc6a 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -580,6 +580,12 @@ static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
 	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
+static int perf_evsel__tool_name(char *bf, size_t size)
+{
+	int ret = scnprintf(bf, size, "duration_time");
+	return ret;
+}
+
 const char *perf_evsel__name(struct perf_evsel *evsel)
 {
 	char bf[128];
@@ -601,7 +607,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
 		break;
 
 	case PERF_TYPE_SOFTWARE:
-		perf_evsel__sw_name(evsel, bf, sizeof(bf));
+		if (evsel->tool_event)
+			perf_evsel__tool_name(bf, sizeof(bf));
+		else
+			perf_evsel__sw_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_TRACEPOINT:
-- 
GitLab


From 5e0861baa3fa73e8bd861a4b7ba7fa992b1dff82 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 26 Mar 2019 15:18:23 -0700
Subject: [PATCH 0415/1861] perf list: Output tool events

Add support in 'perf list' to output tool internal events, currently
only 'duration_time'.

Committer testing:

  $ perf list dur*

  List of pre-defined events (to be used in -e):

    duration_time                                      [Tool event]

  Metric Groups:

  $ perf list sw

  List of pre-defined events (to be used in -e):

    alignment-faults                                   [Software event]
    bpf-output                                         [Software event]
    context-switches OR cs                             [Software event]
    cpu-clock                                          [Software event]
    cpu-migrations OR migrations                       [Software event]
    dummy                                              [Software event]
    emulation-faults                                   [Software event]
    major-faults                                       [Software event]
    minor-faults                                       [Software event]
    page-faults OR faults                              [Software event]
    task-clock                                         [Software event]

    duration_time                                      [Tool event]

  $ perf list | grep duration
    duration_time                                      [Tool event]
         [L1D miss outstandings duration in cycles]
          page walk duration are excluded in Skylake]
          load. EPT page walk duration are excluded in Skylake]
          page walk duration are excluded in Skylake]
          store. EPT page walk duration are excluded in Skylake]
          (instruction fetch) request. EPT page walk duration are excluded in
          instruction fetch request. EPT page walk duration are excluded in
  $

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20190326221823.11518-5-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-list.c      |  6 ++++--
 tools/perf/util/parse-events.c | 20 ++++++++++++++++++++
 tools/perf/util/parse-events.h |  1 +
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index a8394b4f11674..e0312a1c4792a 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -70,10 +70,11 @@ int cmd_list(int argc, const char **argv)
 			print_symbol_events(NULL, PERF_TYPE_HARDWARE,
 					event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
 		else if (strcmp(argv[i], "sw") == 0 ||
-			 strcmp(argv[i], "software") == 0)
+			 strcmp(argv[i], "software") == 0) {
 			print_symbol_events(NULL, PERF_TYPE_SOFTWARE,
 					event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
-		else if (strcmp(argv[i], "cache") == 0 ||
+			print_tool_events(NULL, raw_dump);
+		} else if (strcmp(argv[i], "cache") == 0 ||
 			 strcmp(argv[i], "hwcache") == 0)
 			print_hwcache_events(NULL, raw_dump);
 		else if (strcmp(argv[i], "pmu") == 0)
@@ -113,6 +114,7 @@ int cmd_list(int argc, const char **argv)
 					    event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
 			print_symbol_events(s, PERF_TYPE_SOFTWARE,
 					    event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
+			print_tool_events(s, raw_dump);
 			print_hwcache_events(s, raw_dump);
 			print_pmu_events(s, raw_dump, !desc_flag,
 						long_desc_flag,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 98c0fadaedb92..4432bfe039fd9 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2457,6 +2457,25 @@ out_enomem:
 	return evt_num;
 }
 
+static void print_tool_event(const char *name, const char *event_glob,
+			     bool name_only)
+{
+	if (event_glob && !strglobmatch(name, event_glob))
+		return;
+	if (name_only)
+		printf("%s ", name);
+	else
+		printf("  %-50s [%s]\n", name, "Tool event");
+
+}
+
+void print_tool_events(const char *event_glob, bool name_only)
+{
+	print_tool_event("duration_time", event_glob, name_only);
+	if (pager_in_use())
+		printf("\n");
+}
+
 void print_symbol_events(const char *event_glob, unsigned type,
 				struct event_symbol *syms, unsigned max,
 				bool name_only)
@@ -2540,6 +2559,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
 
 	print_symbol_events(event_glob, PERF_TYPE_SOFTWARE,
 			    event_symbols_sw, PERF_COUNT_SW_MAX, name_only);
+	print_tool_events(event_glob, name_only);
 
 	print_hwcache_events(event_glob, name_only);
 
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 0c1f5b98f6366..a052cd6ac63e4 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -204,6 +204,7 @@ extern struct event_symbol event_symbols_sw[];
 void print_symbol_events(const char *event_glob, unsigned type,
 				struct event_symbol *syms, unsigned max,
 				bool name_only);
+void print_tool_events(const char *event_glob, bool name_only);
 void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
 			     bool name_only);
 int print_hwcache_events(const char *event_glob, bool name_only);
-- 
GitLab


From 328b82b7497787cf4e4856a216dbf4e8a54c06a6 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 1 Apr 2019 12:43:06 -0400
Subject: [PATCH 0416/1861] tools lib traceevent: Handle trace_printk() "%px"

With security updates, %p in the kernel is hashed to protect true kernel
locations. But trace_printk() is not allowed in production systems, and
when a pointer is used, there are many times that the actual address is
needed. "%px" produces the real address. But libtraceevent does not know how
to handle that extension. Add it.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Link: http://lkml.kernel.org/r/20190401164342.837312153@goodmis.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 87494c7c619d8..2ebb257ce7ba9 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4338,6 +4338,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
 					case 'S':
 					case 'f':
 					case 'F':
+					case 'x':
 						break;
 					default:
 						/*
-- 
GitLab


From fed33e905c4beea96a066686f8be2b489eb5068e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 1 Apr 2019 12:43:07 -0400
Subject: [PATCH 0417/1861] tools lib traceevent: Add mono clocks to be parsed
 in seconds

The mono clocks can display in seconds instead of whole numbers:

       trace-cmd-521   [001] 99176715281005: sched_waking:         comm=kworker/u16:2 pid=32118 prio=120 target_cpu=002
       trace-cmd-521   [001] 99176715286349: sched_wake_idle_without_ipi: cpu=2
       trace-cmd-521   [001] 99176715288047: sched_wakeup:         kworker/u16:2:32118 [120] success=1 CPU:002
       trace-cmd-521   [001] 99176715290022: sched_waking:         comm=trace-cmd pid=523 prio=120 target_cpu=000
       trace-cmd-521   [001] 99176715292332: sched_wake_idle_without_ipi: cpu=0
       trace-cmd-521   [001] 99176715292855: sched_wakeup:         trace-cmd:523 [120] success=1 CPU:000
       trace-cmd-521   [001] 99176715300697: sched_stat_runtime:   comm=trace-cmd pid=521 runtime=80233 [ns] vruntime=66705540554 [ns

Break it up in seconds:

       trace-cmd-521   [001] 99176.715281: sched_waking:         comm=kworker/u16:2 pid=32118 prio=120 target_cpu=002
       trace-cmd-521   [001] 99176.715286: sched_wake_idle_without_ipi: cpu=2
       trace-cmd-521   [001] 99176.715288: sched_wakeup:         kworker/u16:2:32118 [120] success=1 CPU:002
       trace-cmd-521   [001] 99176.715290: sched_waking:         comm=trace-cmd pid=523 prio=120 target_cpu=000
       trace-cmd-521   [001] 99176.715292: sched_wake_idle_without_ipi: cpu=0
       trace-cmd-521   [001] 99176.715293: sched_wakeup:         trace-cmd:523 [120] success=1 CPU:000
       trace-cmd-521   [001] 99176.715301: sched_stat_runtime:   comm=trace-cmd pid=521 runtime=80233 [ns] vruntime=66705540554 [ns]

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Link: http://lkml.kernel.org/r/20190401164342.976554023@goodmis.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 2ebb257ce7ba9..666e23ded6ed4 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5445,7 +5445,8 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
 		return true;
 
 	if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global")
-	    || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf"))
+	    || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf")
+	    || !strncmp(trace_clock, "mono", 4))
 		return true;
 
 	/* trace_clock is setting in tsc or counter mode */
-- 
GitLab


From 6699ed712a97f70267e017f509126b890f6f6b28 Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:08 -0400
Subject: [PATCH 0418/1861] tools lib traceevent: Implement a new API,
 tep_list_events_copy()

Existing API tep_list_events() is not thread safe, it uses the internal
array sort_events to keep cache of the sorted events and reuses it. This
patch implements a new API, tep_list_events_copy(), which allocates new
sorted array each time it is called. It could be used when a sorted
events functionality is needed in thread safe use cases. It is up to the
caller to free the array.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/linux-trace-devel/20181218133013.31094-1-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164343.117437443@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 109 +++++++++++++++++++++++------
 tools/lib/traceevent/event-parse.h |   5 +-
 2 files changed, 91 insertions(+), 23 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 666e23ded6ed4..c00aebab3c335 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5651,32 +5651,26 @@ static int events_system_cmp(const void *a, const void *b)
 	return events_id_cmp(a, b);
 }
 
-struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type sort_type)
+static struct tep_event **list_events_copy(struct tep_handle *tep)
 {
 	struct tep_event **events;
-	int (*sort)(const void *a, const void *b);
-
-	events = pevent->sort_events;
-
-	if (events && pevent->last_type == sort_type)
-		return events;
 
-	if (!events) {
-		events = malloc(sizeof(*events) * (pevent->nr_events + 1));
-		if (!events)
-			return NULL;
+	if (!tep)
+		return NULL;
 
-		memcpy(events, pevent->events, sizeof(*events) * pevent->nr_events);
-		events[pevent->nr_events] = NULL;
+	events = malloc(sizeof(*events) * (tep->nr_events + 1));
+	if (!events)
+		return NULL;
 
-		pevent->sort_events = events;
+	memcpy(events, tep->events, sizeof(*events) * tep->nr_events);
+	events[tep->nr_events] = NULL;
+	return events;
+}
 
-		/* the internal events are sorted by id */
-		if (sort_type == TEP_EVENT_SORT_ID) {
-			pevent->last_type = sort_type;
-			return events;
-		}
-	}
+static void list_events_sort(struct tep_event **events, int nr_events,
+			     enum tep_event_sort_type sort_type)
+{
+	int (*sort)(const void *a, const void *b);
 
 	switch (sort_type) {
 	case TEP_EVENT_SORT_ID:
@@ -5689,11 +5683,82 @@ struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sor
 		sort = events_system_cmp;
 		break;
 	default:
+		sort = NULL;
+	}
+
+	if (sort)
+		qsort(events, nr_events, sizeof(*events), sort);
+}
+
+/**
+ * tep_list_events - Get events, sorted by given criteria.
+ * @tep: a handle to the tep context
+ * @sort_type: desired sort order of the events in the array
+ *
+ * Returns an array of pointers to all events, sorted by the given
+ * @sort_type criteria. The last element of the array is NULL. The returned
+ * memory must not be freed, it is managed by the library.
+ * The function is not thread safe.
+ */
+struct tep_event **tep_list_events(struct tep_handle *tep,
+				   enum tep_event_sort_type sort_type)
+{
+	struct tep_event **events;
+
+	if (!tep)
+		return NULL;
+
+	events = tep->sort_events;
+	if (events && tep->last_type == sort_type)
 		return events;
+
+	if (!events) {
+		events = list_events_copy(tep);
+		if (!events)
+			return NULL;
+
+		tep->sort_events = events;
+
+		/* the internal events are sorted by id */
+		if (sort_type == TEP_EVENT_SORT_ID) {
+			tep->last_type = sort_type;
+			return events;
+		}
 	}
 
-	qsort(events, pevent->nr_events, sizeof(*events), sort);
-	pevent->last_type = sort_type;
+	list_events_sort(events, tep->nr_events, sort_type);
+	tep->last_type = sort_type;
+
+	return events;
+}
+
+
+/**
+ * tep_list_events_copy - Thread safe version of tep_list_events()
+ * @tep: a handle to the tep context
+ * @sort_type: desired sort order of the events in the array
+ *
+ * Returns an array of pointers to all events, sorted by the given
+ * @sort_type criteria. The last element of the array is NULL. The returned
+ * array is newly allocated inside the function and must be freed by the caller
+ */
+struct tep_event **tep_list_events_copy(struct tep_handle *tep,
+					enum tep_event_sort_type sort_type)
+{
+	struct tep_event **events;
+
+	if (!tep)
+		return NULL;
+
+	events = list_events_copy(tep);
+	if (!events)
+		return NULL;
+
+	/* the internal events are sorted by id */
+	if (sort_type == TEP_EVENT_SORT_ID)
+		return events;
+
+	list_events_sort(events, tep->nr_events, sort_type);
 
 	return events;
 }
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index aec48f2aea8af..41159358abc21 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -544,7 +544,10 @@ void tep_event_info(struct trace_seq *s, struct tep_event *event,
 int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum,
 		 char *buf, size_t buflen);
 
-struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type);
+struct tep_event **tep_list_events(struct tep_handle *tep,
+				   enum tep_event_sort_type);
+struct tep_event **tep_list_events_copy(struct tep_handle *tep,
+					enum tep_event_sort_type);
 struct tep_format_field **tep_event_common_fields(struct tep_event *event);
 struct tep_format_field **tep_event_fields(struct tep_event *event);
 
-- 
GitLab


From 70df6a7311186a7ab0b19f481dee4ca540a73837 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 1 Apr 2019 12:43:09 -0400
Subject: [PATCH 0419/1861] tools lib traceevent: Add more debugging to see
 various internal ring buffer entries

When trace-cmd report --debug is set, show the internal ring buffer
entries like time-extends and padding. This requires adding new kbuffer
API to retrieve these items.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Link: http://lkml.kernel.org/r/20190401164343.257591565@goodmis.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/kbuffer-parse.c | 49 ++++++++++++++++++++++++++++
 tools/lib/traceevent/kbuffer.h       | 13 ++++++++
 2 files changed, 62 insertions(+)

diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
index af2a1f3b74241..b887e7437d674 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -727,3 +727,52 @@ int kbuffer_start_of_data(struct kbuffer *kbuf)
 {
 	return kbuf->start;
 }
+
+/**
+ * kbuffer_raw_get - get raw buffer info
+ * @kbuf:	The kbuffer
+ * @subbuf:	Start of mapped subbuffer
+ * @info:	Info descriptor to fill in
+ *
+ * For debugging. This can return internals of the ring buffer.
+ * Expects to have info->next set to what it will read.
+ * The type, length and timestamp delta will be filled in, and
+ * @info->next will be updated to the next element.
+ * The @subbuf is used to know if the info is passed the end of
+ * data and NULL will be returned if it is.
+ */
+struct kbuffer_raw_info *
+kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf, struct kbuffer_raw_info *info)
+{
+	unsigned long long flags;
+	unsigned long long delta;
+	unsigned int type_len;
+	unsigned int size;
+	int start;
+	int length;
+	void *ptr = info->next;
+
+	if (!kbuf || !subbuf)
+		return NULL;
+
+	if (kbuf->flags & KBUFFER_FL_LONG_8)
+		start = 16;
+	else
+		start = 12;
+
+	flags = read_long(kbuf, subbuf + 8);
+	size = (unsigned int)flags & COMMIT_MASK;
+
+	if (ptr < subbuf || ptr >= subbuf + start + size)
+		return NULL;
+
+	type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
+
+	info->next = ptr + length;
+
+	info->type = type_len;
+	info->delta = delta;
+	info->length = length;
+
+	return info;
+}
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
index 03dce757553f1..ed4d697fc1378 100644
--- a/tools/lib/traceevent/kbuffer.h
+++ b/tools/lib/traceevent/kbuffer.h
@@ -65,4 +65,17 @@ int kbuffer_subbuffer_size(struct kbuffer *kbuf);
 void kbuffer_set_old_format(struct kbuffer *kbuf);
 int kbuffer_start_of_data(struct kbuffer *kbuf);
 
+/* Debugging */
+
+struct kbuffer_raw_info {
+	int			type;
+	int			length;
+	unsigned long long	delta;
+	void			*next;
+};
+
+/* Read raw data */
+struct kbuffer_raw_info *kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf,
+					 struct kbuffer_raw_info *info);
+
 #endif /* _K_BUFFER_H */
-- 
GitLab


From 489b34948cbbc725755e1b1125dbdb16401eea07 Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:10 -0400
Subject: [PATCH 0420/1861] tools lib traceevent: Change description of few
 APIs

APIs descriptions should describe the purpose of the function, its
parameters and return value. While working  on man pages implementation,
I noticed mismatches in the  descriptions of few APIs.  This patch
changes the description of these APIs, making them consistent with the
man pages:

 - tep_print_num_field()
 - tep_print_func_field()
 - tep_get_header_page_size()
 - tep_get_long_size()
 - tep_set_long_size()
 - tep_get_page_size()
 - tep_set_page_size()

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/linux-trace-devel/20190325145017.30246-2-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164343.396759247@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse-api.c | 20 ++++++++++----------
 tools/lib/traceevent/event-parse.c     |  6 ++++--
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index d463761a58f43..3686a221e981d 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -43,8 +43,8 @@ int tep_get_events_count(struct tep_handle *tep)
  * @flag: flag, or combination of flags to be set
  * can be any combination from enum tep_flag
  *
- * This sets a flag or mbination of flags  from enum tep_flag
-  */
+ * This sets a flag or combination of flags from enum tep_flag
+ */
 void tep_set_flag(struct tep_handle *tep, int flag)
 {
 	if(tep)
@@ -140,10 +140,10 @@ void tep_set_cpus(struct tep_handle *pevent, int cpus)
 }
 
 /**
- * tep_get_long_size - get the size of a long integer on the current machine
+ * tep_get_long_size - get the size of a long integer on the traced machine
  * @pevent: a handle to the tep_handle
  *
- * This returns the size of a long integer on the current machine
+ * This returns the size of a long integer on the traced machine
  * If @pevent is NULL, 0 is returned.
  */
 int tep_get_long_size(struct tep_handle *pevent)
@@ -154,11 +154,11 @@ int tep_get_long_size(struct tep_handle *pevent)
 }
 
 /**
- * tep_set_long_size - set the size of a long integer on the current machine
+ * tep_set_long_size - set the size of a long integer on the traced machine
  * @pevent: a handle to the tep_handle
  * @size: size, in bytes, of a long integer
  *
- * This sets the size of a long integer on the current machine
+ * This sets the size of a long integer on the traced machine
  */
 void tep_set_long_size(struct tep_handle *pevent, int long_size)
 {
@@ -167,10 +167,10 @@ void tep_set_long_size(struct tep_handle *pevent, int long_size)
 }
 
 /**
- * tep_get_page_size - get the size of a memory page on the current machine
+ * tep_get_page_size - get the size of a memory page on the traced machine
  * @pevent: a handle to the tep_handle
  *
- * This returns the size of a memory page on the current machine
+ * This returns the size of a memory page on the traced machine
  * If @pevent is NULL, 0 is returned.
  */
 int tep_get_page_size(struct tep_handle *pevent)
@@ -181,11 +181,11 @@ int tep_get_page_size(struct tep_handle *pevent)
 }
 
 /**
- * tep_set_page_size - set the size of a memory page on the current machine
+ * tep_set_page_size - set the size of a memory page on the traced machine
  * @pevent: a handle to the tep_handle
  * @_page_size: size of a memory page, in bytes
  *
- * This sets the size of a memory page on the current machine
+ * This sets the size of a memory page on the traced machine
  */
 void tep_set_page_size(struct tep_handle *pevent, int _page_size)
 {
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index c00aebab3c335..000ab7514be7c 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -6453,7 +6453,8 @@ int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
  * @record: The record with the field name.
  * @err: print default error if failed.
  *
- * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
+ * Returns positive value on success, negative in case of an error,
+ * or 0 if buffer is full.
  */
 int tep_print_num_field(struct trace_seq *s, const char *fmt,
 			struct tep_event *event, const char *name,
@@ -6485,7 +6486,8 @@ int tep_print_num_field(struct trace_seq *s, const char *fmt,
  * @record: The record with the field name.
  * @err: print default error if failed.
  *
- * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
+ * Returns positive value on success, negative in case of an error,
+ * or 0 if buffer is full.
  */
 int tep_print_func_field(struct trace_seq *s, const char *fmt,
 			 struct tep_event *event, const char *name,
-- 
GitLab


From d5d2d05bd5b02efa8545a3323a60465de9efb21e Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:11 -0400
Subject: [PATCH 0421/1861] tools lib traceevent: Coding style fixes

Fixed few coding style problems in event-parse-api.c

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/linux-trace-devel/20190325145017.30246-3-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164343.537086316@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse-api.c | 30 +++++++++++++-------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index 3686a221e981d..3716a9142aef9 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -32,7 +32,7 @@ struct tep_event *tep_get_first_event(struct tep_handle *tep)
  */
 int tep_get_events_count(struct tep_handle *tep)
 {
-	if(tep)
+	if (tep)
 		return tep->nr_events;
 	return 0;
 }
@@ -47,7 +47,7 @@ int tep_get_events_count(struct tep_handle *tep)
  */
 void tep_set_flag(struct tep_handle *tep, int flag)
 {
-	if(tep)
+	if (tep)
 		tep->flags |= flag;
 }
 
@@ -108,7 +108,7 @@ tep_data2host8(struct tep_handle *pevent, unsigned long long data)
  */
 int tep_get_header_page_size(struct tep_handle *pevent)
 {
-	if(pevent)
+	if (pevent)
 		return pevent->header_page_size_size;
 	return 0;
 }
@@ -122,7 +122,7 @@ int tep_get_header_page_size(struct tep_handle *pevent)
  */
 int tep_get_cpus(struct tep_handle *pevent)
 {
-	if(pevent)
+	if (pevent)
 		return pevent->cpus;
 	return 0;
 }
@@ -135,7 +135,7 @@ int tep_get_cpus(struct tep_handle *pevent)
  */
 void tep_set_cpus(struct tep_handle *pevent, int cpus)
 {
-	if(pevent)
+	if (pevent)
 		pevent->cpus = cpus;
 }
 
@@ -148,7 +148,7 @@ void tep_set_cpus(struct tep_handle *pevent, int cpus)
  */
 int tep_get_long_size(struct tep_handle *pevent)
 {
-	if(pevent)
+	if (pevent)
 		return pevent->long_size;
 	return 0;
 }
@@ -162,7 +162,7 @@ int tep_get_long_size(struct tep_handle *pevent)
  */
 void tep_set_long_size(struct tep_handle *pevent, int long_size)
 {
-	if(pevent)
+	if (pevent)
 		pevent->long_size = long_size;
 }
 
@@ -175,7 +175,7 @@ void tep_set_long_size(struct tep_handle *pevent, int long_size)
  */
 int tep_get_page_size(struct tep_handle *pevent)
 {
-	if(pevent)
+	if (pevent)
 		return pevent->page_size;
 	return 0;
 }
@@ -189,7 +189,7 @@ int tep_get_page_size(struct tep_handle *pevent)
  */
 void tep_set_page_size(struct tep_handle *pevent, int _page_size)
 {
-	if(pevent)
+	if (pevent)
 		pevent->page_size = _page_size;
 }
 
@@ -202,7 +202,7 @@ void tep_set_page_size(struct tep_handle *pevent, int _page_size)
  */
 int tep_file_bigendian(struct tep_handle *pevent)
 {
-	if(pevent)
+	if (pevent)
 		return pevent->file_bigendian;
 	return 0;
 }
@@ -216,7 +216,7 @@ int tep_file_bigendian(struct tep_handle *pevent)
  */
 void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian)
 {
-	if(pevent)
+	if (pevent)
 		pevent->file_bigendian = endian;
 }
 
@@ -229,7 +229,7 @@ void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian)
  */
 int tep_is_host_bigendian(struct tep_handle *pevent)
 {
-	if(pevent)
+	if (pevent)
 		return pevent->host_bigendian;
 	return 0;
 }
@@ -243,7 +243,7 @@ int tep_is_host_bigendian(struct tep_handle *pevent)
  */
 void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian)
 {
-	if(pevent)
+	if (pevent)
 		pevent->host_bigendian = endian;
 }
 
@@ -256,7 +256,7 @@ void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian)
  */
 int tep_is_latency_format(struct tep_handle *pevent)
 {
-	if(pevent)
+	if (pevent)
 		return pevent->latency_format;
 	return 0;
 }
@@ -270,6 +270,6 @@ int tep_is_latency_format(struct tep_handle *pevent)
   */
 void tep_set_latency_format(struct tep_handle *pevent, int lat)
 {
-	if(pevent)
+	if (pevent)
 		pevent->latency_format = lat;
 }
-- 
GitLab


From 80c5526c8544ae76cba31fb9702ab8accac1f0f3 Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:12 -0400
Subject: [PATCH 0422/1861] tools lib traceevent: Implement new traceevent APIs
 for accessing struct tep_handler fields

As struct tep_handler definition is not exposed as part of libtraceevent
API, its fields cannot be accessed directly by the library users. This
patch implements new APIs, which can be used to access the struct
tep_handler fields:

  tep_get_event()        - retrieves an event pointer at a specific index
  tep_get_first_event()  - is modified to use tep_get_event()
  tep_clear_flag()       - clears a tep handle flag
  tep_test_flag()        - test if a given flag is set
  tep_get_header_timestamp_size() - returns the size of the timestamp stored
                           in the header.
  tep_get_cpus()         - returns the number of CPUs
  tep_is_old_format()    - returns true if data was created by an
                           older kernel with the old data format
  tep_set_print_raw()    - have the output print in the raw format
  tep_set_test_filters() - debugging utility for testing tep filters

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/linux-trace-devel/20190325145017.30246-4-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164343.679629539@goodmis.org
[ Renamed some newly added "pevent" to "tep" ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse-api.c | 106 ++++++++++++++++++++++++-
 tools/lib/traceevent/event-parse.h     |   6 ++
 2 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index 3716a9142aef9..2ac8b44854ce0 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -8,6 +8,22 @@
 #include "event-parse-local.h"
 #include "event-utils.h"
 
+/**
+ * tep_get_event - returns the event with the given index
+ * @tep: a handle to the tep_handle
+ * @index: index of the requested event, in the range 0 .. nr_events
+ *
+ * This returns pointer to the element of the events array with the given index
+ * If @tep is NULL, or @index is not in the range 0 .. nr_events, NULL is returned.
+ */
+struct tep_event *tep_get_event(struct tep_handle *tep, int index)
+{
+	if (tep && tep->events && index < tep->nr_events)
+		return tep->events[index];
+
+	return NULL;
+}
+
 /**
  * tep_get_first_event - returns the first event in the events array
  * @tep: a handle to the tep_handle
@@ -17,10 +33,7 @@
  */
 struct tep_event *tep_get_first_event(struct tep_handle *tep)
 {
-	if (tep && tep->events)
-		return tep->events[0];
-
-	return NULL;
+	return tep_get_event(tep, 0);
 }
 
 /**
@@ -51,6 +64,34 @@ void tep_set_flag(struct tep_handle *tep, int flag)
 		tep->flags |= flag;
 }
 
+/**
+ * tep_clear_flag - clear event parser flag
+ * @tep: a handle to the tep_handle
+ * @flag: flag to be cleared
+ *
+ * This clears a tep flag
+ */
+void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag)
+{
+	if (tep)
+		tep->flags &= ~flag;
+}
+
+/**
+ * tep_test_flag - check the state of event parser flag
+ * @tep: a handle to the tep_handle
+ * @flag: flag to be checked
+ *
+ * This returns the state of the requested tep flag.
+ * Returns: true if the flag is set, false otherwise.
+ */
+bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag)
+{
+	if (tep)
+		return (tep->flags & flag);
+	return false;
+}
+
 unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data)
 {
 	unsigned short swap;
@@ -113,6 +154,20 @@ int tep_get_header_page_size(struct tep_handle *pevent)
 	return 0;
 }
 
+/**
+ * tep_get_header_timestamp_size - get size of the time stamp in the header page
+ * @tep: a handle to the tep_handle
+ *
+ * This returns size of the time stamp in the header page
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_header_timestamp_size(struct tep_handle *tep)
+{
+	if (tep)
+		return tep->header_page_ts_size;
+	return 0;
+}
+
 /**
  * tep_get_cpus - get the number of CPUs
  * @pevent: a handle to the tep_handle
@@ -273,3 +328,46 @@ void tep_set_latency_format(struct tep_handle *pevent, int lat)
 	if (pevent)
 		pevent->latency_format = lat;
 }
+
+/**
+ * tep_is_old_format - get if an old kernel is used
+ * @tep: a handle to the tep_handle
+ *
+ * This returns true, if an old kernel is used to generate the tracing events or
+ * false if a new kernel is used. Old kernels did not have header page info.
+ * If @tep is NULL, false is returned.
+ */
+bool tep_is_old_format(struct tep_handle *tep)
+{
+	if (tep)
+		return !!(tep->old_format);
+	return false;
+}
+
+/**
+ * tep_set_print_raw - set a flag to force print in raw format
+ * @tep: a handle to the tep_handle
+ * @print_raw: the new value of the print_raw flag
+ *
+ * This sets a flag to force print in raw format
+ */
+void tep_set_print_raw(struct tep_handle *tep, int print_raw)
+{
+	if (tep)
+		tep->print_raw = print_raw;
+}
+
+/**
+ * tep_set_test_filters - set a flag to test a filter string
+ * @tep: a handle to the tep_handle
+ * @test_filters: the new value of the test_filters flag
+ *
+ * This sets a flag to test a filter string. If this flag is set, when
+ * tep_filter_add_filter_str() API as called,it will print the filter string
+ * instead of adding it.
+ */
+void tep_set_test_filters(struct tep_handle *tep, int test_filters)
+{
+	if (tep)
+		tep->test_filters = test_filters;
+}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 41159358abc21..be082f9692f1e 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -409,6 +409,8 @@ void tep_print_plugins(struct trace_seq *s,
 typedef char *(tep_func_resolver_t)(void *priv,
 				    unsigned long long *addrp, char **modp);
 void tep_set_flag(struct tep_handle *tep, int flag);
+void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag);
+bool tep_check_flags(struct tep_handle *tep, enum tep_flag flags);
 
 static inline int tep_host_bigendian(void)
 {
@@ -568,6 +570,10 @@ void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian);
 int tep_is_latency_format(struct tep_handle *pevent);
 void tep_set_latency_format(struct tep_handle *pevent, int lat);
 int tep_get_header_page_size(struct tep_handle *pevent);
+int tep_get_header_timestamp_size(struct tep_handle *tep);
+bool tep_is_old_format(struct tep_handle *tep);
+void tep_set_print_raw(struct tep_handle *tep, int print_raw);
+void tep_set_test_filters(struct tep_handle *tep, int test_filters);
 
 struct tep_handle *tep_alloc(void);
 void tep_free(struct tep_handle *pevent);
-- 
GitLab


From 2ce4639f6936261bf6e0bae5c21a0c09a8daeacc Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 1 Apr 2019 12:43:13 -0400
Subject: [PATCH 0423/1861] tools lib traceevent: Removed unneeded !! and
 return parenthesis

As return is not a function we do not need parenthesis around the return
value. Also, a function returning bool does not need to add !!.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Link: http://lkml.kernel.org/r/20190401164343.817886725@goodmis.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse-api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index 2ac8b44854ce0..002b3f73862b3 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -88,7 +88,7 @@ void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag)
 bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag)
 {
 	if (tep)
-		return (tep->flags & flag);
+		return tep->flags & flag;
 	return false;
 }
 
@@ -340,7 +340,7 @@ void tep_set_latency_format(struct tep_handle *pevent, int lat)
 bool tep_is_old_format(struct tep_handle *tep)
 {
 	if (tep)
-		return !!(tep->old_format);
+		return tep->old_format;
 	return false;
 }
 
-- 
GitLab


From a634b278ec2504fdd2acd694f5158ae79016fb70 Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:14 -0400
Subject: [PATCH 0424/1861] tools lib traceevent: Remove tep filter trivial
 APIs

This patch removes trivial filter tep APIs:

  enum tep_filter_trivial_type
  tep_filter_event_has_trivial()
  tep_update_trivial()
  tep_filter_clear_trivial()

Trivial filters is an optimization, used only in the first version of
KernelShark. The API is deprecated, the next KernelShark release does
not use it.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20190326154328.28718-4-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164343.968458918@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.h  |  16 ---
 tools/lib/traceevent/parse-filter.c | 169 ----------------------------
 2 files changed, 185 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index be082f9692f1e..3833d13845995 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -732,12 +732,6 @@ struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent);
 #define FILTER_MISS		TEP_ERRNO__FILTER_MISS
 #define FILTER_MATCH		TEP_ERRNO__FILTER_MATCH
 
-enum tep_filter_trivial_type {
-	TEP_FILTER_TRIVIAL_FALSE,
-	TEP_FILTER_TRIVIAL_TRUE,
-	TEP_FILTER_TRIVIAL_BOTH,
-};
-
 enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
 					 const char *filter_str);
 
@@ -752,9 +746,6 @@ int tep_event_filtered(struct tep_event_filter *filter,
 
 void tep_filter_reset(struct tep_event_filter *filter);
 
-int tep_filter_clear_trivial(struct tep_event_filter *filter,
-			     enum tep_filter_trivial_type type);
-
 void tep_filter_free(struct tep_event_filter *filter);
 
 char *tep_filter_make_string(struct tep_event_filter *filter, int event_id);
@@ -762,15 +753,8 @@ char *tep_filter_make_string(struct tep_event_filter *filter, int event_id);
 int tep_filter_remove_event(struct tep_event_filter *filter,
 			    int event_id);
 
-int tep_filter_event_has_trivial(struct tep_event_filter *filter,
-				 int event_id,
-				 enum tep_filter_trivial_type type);
-
 int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source);
 
-int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *source,
-			enum tep_filter_trivial_type type);
-
 int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2);
 
 #endif /* _PARSE_EVENTS_H */
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index cb5ce66dab6e0..4ffd8b25a8527 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1522,167 +1522,6 @@ int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *sour
 	return ret;
 }
 
-
-/**
- * tep_update_trivial - update the trivial filters with the given filter
- * @dest - the filter to update
- * @source - the filter as the source of the update
- * @type - the type of trivial filter to update.
- *
- * Scan dest for trivial events matching @type to replace with the source.
- *
- * Returns 0 on success and -1 if there was a problem updating, but
- *   events may have still been updated on error.
- */
-int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *source,
-		       enum tep_filter_trivial_type type)
-{
-	struct tep_handle *src_pevent;
-	struct tep_handle *dest_pevent;
-	struct tep_event *event;
-	struct tep_filter_type *filter_type;
-	struct tep_filter_arg *arg;
-	char *str;
-	int i;
-
-	src_pevent = source->pevent;
-	dest_pevent = dest->pevent;
-
-	/* Do nothing if either of the filters has nothing to filter */
-	if (!dest->filters || !source->filters)
-		return 0;
-
-	for (i = 0; i < dest->filters; i++) {
-		filter_type = &dest->event_filters[i];
-		arg = filter_type->filter;
-		if (arg->type != TEP_FILTER_ARG_BOOLEAN)
-			continue;
-		if ((arg->boolean.value && type == TEP_FILTER_TRIVIAL_FALSE) ||
-		    (!arg->boolean.value && type == TEP_FILTER_TRIVIAL_TRUE))
-			continue;
-
-		event = filter_type->event;
-
-		if (src_pevent != dest_pevent) {
-			/* do a look up */
-			event = tep_find_event_by_name(src_pevent,
-						       event->system,
-						       event->name);
-			if (!event)
-				return -1;
-		}
-
-		str = tep_filter_make_string(source, event->id);
-		if (!str)
-			continue;
-
-		/* Don't bother if the filter is trivial too */
-		if (strcmp(str, "TRUE") != 0 && strcmp(str, "FALSE") != 0)
-			filter_event(dest, event, str, NULL);
-		free(str);
-	}
-	return 0;
-}
-
-/**
- * tep_filter_clear_trivial - clear TRUE and FALSE filters
- * @filter: the filter to remove trivial filters from
- * @type: remove only true, false, or both
- *
- * Removes filters that only contain a TRUE or FALES boolean arg.
- *
- * Returns 0 on success and -1 if there was a problem.
- */
-int tep_filter_clear_trivial(struct tep_event_filter *filter,
-			     enum tep_filter_trivial_type type)
-{
-	struct tep_filter_type *filter_type;
-	int count = 0;
-	int *ids = NULL;
-	int i;
-
-	if (!filter->filters)
-		return 0;
-
-	/*
-	 * Two steps, first get all ids with trivial filters.
-	 *  then remove those ids.
-	 */
-	for (i = 0; i < filter->filters; i++) {
-		int *new_ids;
-
-		filter_type = &filter->event_filters[i];
-		if (filter_type->filter->type != TEP_FILTER_ARG_BOOLEAN)
-			continue;
-		switch (type) {
-		case TEP_FILTER_TRIVIAL_FALSE:
-			if (filter_type->filter->boolean.value)
-				continue;
-			break;
-		case TEP_FILTER_TRIVIAL_TRUE:
-			if (!filter_type->filter->boolean.value)
-				continue;
-		default:
-			break;
-		}
-
-		new_ids = realloc(ids, sizeof(*ids) * (count + 1));
-		if (!new_ids) {
-			free(ids);
-			return -1;
-		}
-
-		ids = new_ids;
-		ids[count++] = filter_type->event_id;
-	}
-
-	if (!count)
-		return 0;
-
-	for (i = 0; i < count; i++)
-		tep_filter_remove_event(filter, ids[i]);
-
-	free(ids);
-	return 0;
-}
-
-/**
- * tep_filter_event_has_trivial - return true event contains trivial filter
- * @filter: the filter with the information
- * @event_id: the id of the event to test
- * @type: trivial type to test for (TRUE, FALSE, EITHER)
- *
- * Returns 1 if the event contains a matching trivial type
- *  otherwise 0.
- */
-int tep_filter_event_has_trivial(struct tep_event_filter *filter,
-				 int event_id,
-				 enum tep_filter_trivial_type type)
-{
-	struct tep_filter_type *filter_type;
-
-	if (!filter->filters)
-		return 0;
-
-	filter_type = find_filter_type(filter, event_id);
-
-	if (!filter_type)
-		return 0;
-
-	if (filter_type->filter->type != TEP_FILTER_ARG_BOOLEAN)
-		return 0;
-
-	switch (type) {
-	case TEP_FILTER_TRIVIAL_FALSE:
-		return !filter_type->filter->boolean.value;
-
-	case TEP_FILTER_TRIVIAL_TRUE:
-		return filter_type->filter->boolean.value;
-	default:
-		return 1;
-	}
-}
-
 static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
 		       struct tep_record *record, enum tep_errno *err);
 
@@ -2409,14 +2248,6 @@ int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter
 			break;
 		if (filter_type1->filter->type != filter_type2->filter->type)
 			break;
-		switch (filter_type1->filter->type) {
-		case TEP_FILTER_TRIVIAL_FALSE:
-		case TEP_FILTER_TRIVIAL_TRUE:
-			/* trivial types just need the type compared */
-			continue;
-		default:
-			break;
-		}
 		/* The best way to compare complex filters is with strings */
 		str1 = arg_to_str(filter1, filter_type1->filter);
 		str2 = arg_to_str(filter2, filter_type2->filter);
-- 
GitLab


From fea6b632235b9bedc58c72cd24f1865bb0c365db Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:15 -0400
Subject: [PATCH 0425/1861] tools lib traceevent: Remove call to exit() from
 tep_filter_add_filter_str()

This patch removes call to exit() from tep_filter_add_filter_str(). A
library function should not force the application to exit. In the
current implementation tep_filter_add_filter_str() calls exit() when a
special "test_filters" mode is set, used only for debugging purposes.
When this mode is set and a filter is added - its string is printed to
the console and exit() is called. This patch changes the logic - when in
"test_filters" mode, the filter string is still printed, but the exit()
is not called. It is up to the application to track when "test_filters"
mode is set and to call exit, if needed.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20190326154328.28718-9-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164344.121717482@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/parse-filter.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 4ffd8b25a8527..3320c0a0e343a 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1346,9 +1346,6 @@ enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
 
 	free_events(events);
 
-	if (rtn >= 0 && pevent->test_filters)
-		exit(0);
-
 	return rtn;
 }
 
-- 
GitLab


From 55c34ae076f62ed7ad0fc86cd8b697a6f577c431 Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:16 -0400
Subject: [PATCH 0426/1861] tools tools, tools lib traceevent: Make traceevent
 APIs more consistent

Rename some traceevent APIs for consistency:

tep_pid_is_registered() to tep_is_pid_registered()
tep_file_bigendian() to tep_is_file_bigendian()

  to make the names and return values consistent with other tep_is_... APIs

tep_data_lat_fmt() to tep_data_latency_format()

  to make the name more descriptive

tep_host_bigendian() to tep_is_bigendian()
tep_set_host_bigendian() to tep_set_local_bigendian()
tep_is_host_bigendian() to tep_is_local_bigendian()

  "host" can be confused with VMs, and "local" is about the local
  machine. All tep_is_..._bigendian(struct tep_handle *tep) APIs return
  the saved data in the tep handle, while tep_is_bigendian() returns
  the running machine's endianness.

All tep_is_... functions are modified to return bool value, instead of int.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20190327141946.4353-2-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164344.288624897@goodmis.org
[ Removed some extra parenthesis around return statements ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse-api.c | 40 +++++++++++++-------------
 tools/lib/traceevent/event-parse.c     | 26 ++++++++---------
 tools/lib/traceevent/event-parse.h     | 16 +++++------
 tools/lib/traceevent/plugin_kvm.c      |  4 +--
 tools/perf/util/trace-event-read.c     |  2 +-
 tools/perf/util/trace-event.c          |  4 +--
 6 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index 002b3f73862b3..f7184575f0d98 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -155,10 +155,10 @@ int tep_get_header_page_size(struct tep_handle *pevent)
 }
 
 /**
- * tep_get_header_timestamp_size - get size of the time stamp in the header page
+ * tep_get_header_timestamp_size - get size of the timestamp in the header page
  * @tep: a handle to the tep_handle
  *
- * This returns size of the time stamp in the header page
+ * This returns size of the timestamp in the header page
  * If @tep is NULL, 0 is returned.
  */
 int tep_get_header_timestamp_size(struct tep_handle *tep)
@@ -249,17 +249,17 @@ void tep_set_page_size(struct tep_handle *pevent, int _page_size)
 }
 
 /**
- * tep_file_bigendian - get if the file is in big endian order
+ * tep_is_file_bigendian - return the endian of the file
  * @pevent: a handle to the tep_handle
  *
- * This returns if the file is in big endian order
- * If @pevent is NULL, 0 is returned.
+ * This returns true if the file is in big endian order
+ * If @pevent is NULL, false is returned.
  */
-int tep_file_bigendian(struct tep_handle *pevent)
+bool tep_is_file_bigendian(struct tep_handle *pevent)
 {
 	if (pevent)
-		return pevent->file_bigendian;
-	return 0;
+		return pevent->file_bigendian == TEP_BIG_ENDIAN;
+	return false;
 }
 
 /**
@@ -276,27 +276,27 @@ void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian)
 }
 
 /**
- * tep_is_host_bigendian - get if the order of the current host is big endian
+ * tep_is_local_bigendian - return the endian of the saved local machine
  * @pevent: a handle to the tep_handle
  *
- * This gets if the order of the current host is big endian
- * If @pevent is NULL, 0 is returned.
+ * This returns true if the saved local machine in @pevent is big endian.
+ * If @pevent is NULL, false is returned.
  */
-int tep_is_host_bigendian(struct tep_handle *pevent)
+bool tep_is_local_bigendian(struct tep_handle *pevent)
 {
 	if (pevent)
-		return pevent->host_bigendian;
+		return pevent->host_bigendian == TEP_BIG_ENDIAN;
 	return 0;
 }
 
 /**
- * tep_set_host_bigendian - set the order of the local host
+ * tep_set_local_bigendian - set the stored local machine endian order
  * @pevent: a handle to the tep_handle
  * @endian: non zero, if the local host has big endian order
  *
- * This sets the order of the local host
+ * This sets the endian order for the local machine.
  */
-void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian)
+void tep_set_local_bigendian(struct tep_handle *pevent, enum tep_endian endian)
 {
 	if (pevent)
 		pevent->host_bigendian = endian;
@@ -306,14 +306,14 @@ void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian)
  * tep_is_latency_format - get if the latency output format is configured
  * @pevent: a handle to the tep_handle
  *
- * This gets if the latency output format is configured
- * If @pevent is NULL, 0 is returned.
+ * This returns true if the latency output format is configured
+ * If @pevent is NULL, false is returned.
  */
-int tep_is_latency_format(struct tep_handle *pevent)
+bool tep_is_latency_format(struct tep_handle *pevent)
 {
 	if (pevent)
 		return pevent->latency_format;
-	return 0;
+	return false;
 }
 
 /**
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 000ab7514be7c..8836702122beb 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -199,23 +199,23 @@ static const char *find_cmdline(struct tep_handle *pevent, int pid)
 }
 
 /**
- * tep_pid_is_registered - return if a pid has a cmdline registered
+ * tep_is_pid_registered - return if a pid has a cmdline registered
  * @pevent: handle for the pevent
  * @pid: The pid to check if it has a cmdline registered with.
  *
- * Returns 1 if the pid has a cmdline mapped to it
- * 0 otherwise.
+ * Returns true if the pid has a cmdline mapped to it
+ * false otherwise.
  */
-int tep_pid_is_registered(struct tep_handle *pevent, int pid)
+bool tep_is_pid_registered(struct tep_handle *pevent, int pid)
 {
 	const struct tep_cmdline *comm;
 	struct tep_cmdline key;
 
 	if (!pid)
-		return 1;
+		return true;
 
 	if (!pevent->cmdlines && cmdline_init(pevent))
-		return 0;
+		return false;
 
 	key.pid = pid;
 
@@ -223,8 +223,8 @@ int tep_pid_is_registered(struct tep_handle *pevent, int pid)
 		       sizeof(*pevent->cmdlines), cmdline_cmp);
 
 	if (comm)
-		return 1;
-	return 0;
+		return true;
+	return false;
 }
 
 /*
@@ -5172,7 +5172,7 @@ out_failed:
 }
 
 /**
- * tep_data_lat_fmt - parse the data for the latency format
+ * tep_data_latency_format - parse the data for the latency format
  * @pevent: a handle to the pevent
  * @s: the trace_seq to write to
  * @record: the record to read from
@@ -5181,8 +5181,8 @@ out_failed:
  * need rescheduling, in hard/soft interrupt, preempt count
  * and lock depth) and places it into the trace_seq.
  */
-void tep_data_lat_fmt(struct tep_handle *pevent,
-		      struct trace_seq *s, struct tep_record *record)
+void tep_data_latency_format(struct tep_handle *pevent,
+			     struct trace_seq *s, struct tep_record *record)
 {
 	static int check_lock_depth = 1;
 	static int check_migrate_disable = 1;
@@ -5532,7 +5532,7 @@ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
 	}
 
 	if (pevent->latency_format) {
-		tep_data_lat_fmt(pevent, s, record);
+		tep_data_latency_format(pevent, s, record);
 	}
 
 	if (use_usec_format) {
@@ -6827,7 +6827,7 @@ struct tep_handle *tep_alloc(void)
 
 	if (pevent) {
 		pevent->ref_count = 1;
-		pevent->host_bigendian = tep_host_bigendian();
+		pevent->host_bigendian = tep_is_bigendian();
 	}
 
 	return pevent;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 3833d13845995..d473dc5579781 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -412,7 +412,7 @@ void tep_set_flag(struct tep_handle *tep, int flag);
 void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag);
 bool tep_check_flags(struct tep_handle *tep, enum tep_flag flags);
 
-static inline int tep_host_bigendian(void)
+static inline int tep_is_bigendian(void)
 {
 	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
 	unsigned int val;
@@ -440,7 +440,7 @@ int tep_register_function(struct tep_handle *pevent, char *name,
 			  unsigned long long addr, char *mod);
 int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
 			      unsigned long long addr);
-int tep_pid_is_registered(struct tep_handle *pevent, int pid);
+bool tep_is_pid_registered(struct tep_handle *pevent, int pid);
 
 void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
 			  struct tep_event *event,
@@ -525,8 +525,8 @@ tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *n
 struct tep_event *
 tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record);
 
-void tep_data_lat_fmt(struct tep_handle *pevent,
-		      struct trace_seq *s, struct tep_record *record);
+void tep_data_latency_format(struct tep_handle *pevent,
+			     struct trace_seq *s, struct tep_record *record);
 int tep_data_type(struct tep_handle *pevent, struct tep_record *rec);
 int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec);
 int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec);
@@ -563,11 +563,11 @@ int tep_get_long_size(struct tep_handle *pevent);
 void tep_set_long_size(struct tep_handle *pevent, int long_size);
 int tep_get_page_size(struct tep_handle *pevent);
 void tep_set_page_size(struct tep_handle *pevent, int _page_size);
-int tep_file_bigendian(struct tep_handle *pevent);
+bool tep_is_file_bigendian(struct tep_handle *pevent);
 void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian);
-int tep_is_host_bigendian(struct tep_handle *pevent);
-void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian);
-int tep_is_latency_format(struct tep_handle *pevent);
+bool tep_is_local_bigendian(struct tep_handle *pevent);
+void tep_set_local_bigendian(struct tep_handle *pevent, enum tep_endian endian);
+bool tep_is_latency_format(struct tep_handle *pevent);
 void tep_set_latency_format(struct tep_handle *pevent, int lat);
 int tep_get_header_page_size(struct tep_handle *pevent);
 int tep_get_header_timestamp_size(struct tep_handle *tep);
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
index 64b9c25a1fd3f..688e5d97d7a73 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -389,8 +389,8 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
 	 * We can only use the structure if file is of the same
 	 * endianness.
 	 */
-	if (tep_file_bigendian(event->pevent) ==
-	    tep_is_host_bigendian(event->pevent)) {
+	if (tep_is_file_bigendian(event->pevent) ==
+	    tep_is_local_bigendian(event->pevent)) {
 
 		trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
 				 role.level,
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index efe2f58cff4e4..48d53d8e3e168 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -442,7 +442,7 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
 
 	tep_set_flag(pevent, TEP_NSEC_OUTPUT);
 	tep_set_file_bigendian(pevent, file_bigendian);
-	tep_set_host_bigendian(pevent, host_bigendian);
+	tep_set_local_bigendian(pevent, host_bigendian);
 
 	if (do_read(buf, 1) < 0)
 		goto out;
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index cbe0dd758e3ad..01b9d89bf5bfc 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -40,7 +40,7 @@ int trace_event__init(struct trace_event *t)
 
 static int trace_event__init2(void)
 {
-	int be = tep_host_bigendian();
+	int be = tep_is_bigendian();
 	struct tep_handle *pevent;
 
 	if (trace_event__init(&tevent))
@@ -49,7 +49,7 @@ static int trace_event__init2(void)
 	pevent = tevent.pevent;
 	tep_set_flag(pevent, TEP_NSEC_OUTPUT);
 	tep_set_file_bigendian(pevent, be);
-	tep_set_host_bigendian(pevent, be);
+	tep_set_local_bigendian(pevent, be);
 	tevent_initialized = true;
 	return 0;
 }
-- 
GitLab


From 047ff221e3ab07129a3566683e95a4d142f7c3c0 Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:17 -0400
Subject: [PATCH 0427/1861] tools lib traceevent: Rename input arguments of
 libtraceevent APIs from pevent to tep

Input arguments of libtraceevent APIs are renamed from "struct
tep_handle *pevent" to "struct tep_handle *tep". This makes the API
consistent with the chosen naming convention: tep (trace event parser),
instead of the old pevent.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/linux-trace-devel/20190401132111.13727-2-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164344.465573837@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse-api.c     | 132 +++---
 tools/lib/traceevent/event-parse-local.h   |   6 +-
 tools/lib/traceevent/event-parse.c         | 452 ++++++++++-----------
 tools/lib/traceevent/event-parse.h         | 123 +++---
 tools/lib/traceevent/event-plugin.c        |   8 +-
 tools/lib/traceevent/parse-filter.c        |   8 +-
 tools/lib/traceevent/plugin_cfg80211.c     |   8 +-
 tools/lib/traceevent/plugin_function.c     |   8 +-
 tools/lib/traceevent/plugin_hrtimer.c      |  12 +-
 tools/lib/traceevent/plugin_jbd2.c         |  12 +-
 tools/lib/traceevent/plugin_kmem.c         |  28 +-
 tools/lib/traceevent/plugin_kvm.c          |  44 +-
 tools/lib/traceevent/plugin_mac80211.c     |   8 +-
 tools/lib/traceevent/plugin_sched_switch.c |  16 +-
 tools/lib/traceevent/plugin_scsi.c         |   8 +-
 tools/lib/traceevent/plugin_xen.c          |   8 +-
 16 files changed, 439 insertions(+), 442 deletions(-)

diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index f7184575f0d98..988587840c801 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -92,11 +92,11 @@ bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag)
 	return false;
 }
 
-unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data)
+unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data)
 {
 	unsigned short swap;
 
-	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+	if (!tep || tep->host_bigendian == tep->file_bigendian)
 		return data;
 
 	swap = ((data & 0xffULL) << 8) |
@@ -105,11 +105,11 @@ unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data)
 	return swap;
 }
 
-unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data)
+unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data)
 {
 	unsigned int swap;
 
-	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+	if (!tep || tep->host_bigendian == tep->file_bigendian)
 		return data;
 
 	swap = ((data & 0xffULL) << 24) |
@@ -121,11 +121,11 @@ unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data)
 }
 
 unsigned long long
-tep_data2host8(struct tep_handle *pevent, unsigned long long data)
+tep_data2host8(struct tep_handle *tep, unsigned long long data)
 {
 	unsigned long long swap;
 
-	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+	if (!tep || tep->host_bigendian == tep->file_bigendian)
 		return data;
 
 	swap = ((data & 0xffULL) << 56) |
@@ -142,15 +142,15 @@ tep_data2host8(struct tep_handle *pevent, unsigned long long data)
 
 /**
  * tep_get_header_page_size - get size of the header page
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  *
  * This returns size of the header page
- * If @pevent is NULL, 0 is returned.
+ * If @tep is NULL, 0 is returned.
  */
-int tep_get_header_page_size(struct tep_handle *pevent)
+int tep_get_header_page_size(struct tep_handle *tep)
 {
-	if (pevent)
-		return pevent->header_page_size_size;
+	if (tep)
+		return tep->header_page_size_size;
 	return 0;
 }
 
@@ -170,163 +170,163 @@ int tep_get_header_timestamp_size(struct tep_handle *tep)
 
 /**
  * tep_get_cpus - get the number of CPUs
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  *
  * This returns the number of CPUs
- * If @pevent is NULL, 0 is returned.
+ * If @tep is NULL, 0 is returned.
  */
-int tep_get_cpus(struct tep_handle *pevent)
+int tep_get_cpus(struct tep_handle *tep)
 {
-	if (pevent)
-		return pevent->cpus;
+	if (tep)
+		return tep->cpus;
 	return 0;
 }
 
 /**
  * tep_set_cpus - set the number of CPUs
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  *
  * This sets the number of CPUs
  */
-void tep_set_cpus(struct tep_handle *pevent, int cpus)
+void tep_set_cpus(struct tep_handle *tep, int cpus)
 {
-	if (pevent)
-		pevent->cpus = cpus;
+	if (tep)
+		tep->cpus = cpus;
 }
 
 /**
  * tep_get_long_size - get the size of a long integer on the traced machine
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  *
  * This returns the size of a long integer on the traced machine
- * If @pevent is NULL, 0 is returned.
+ * If @tep is NULL, 0 is returned.
  */
-int tep_get_long_size(struct tep_handle *pevent)
+int tep_get_long_size(struct tep_handle *tep)
 {
-	if (pevent)
-		return pevent->long_size;
+	if (tep)
+		return tep->long_size;
 	return 0;
 }
 
 /**
  * tep_set_long_size - set the size of a long integer on the traced machine
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  * @size: size, in bytes, of a long integer
  *
  * This sets the size of a long integer on the traced machine
  */
-void tep_set_long_size(struct tep_handle *pevent, int long_size)
+void tep_set_long_size(struct tep_handle *tep, int long_size)
 {
-	if (pevent)
-		pevent->long_size = long_size;
+	if (tep)
+		tep->long_size = long_size;
 }
 
 /**
  * tep_get_page_size - get the size of a memory page on the traced machine
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  *
  * This returns the size of a memory page on the traced machine
- * If @pevent is NULL, 0 is returned.
+ * If @tep is NULL, 0 is returned.
  */
-int tep_get_page_size(struct tep_handle *pevent)
+int tep_get_page_size(struct tep_handle *tep)
 {
-	if (pevent)
-		return pevent->page_size;
+	if (tep)
+		return tep->page_size;
 	return 0;
 }
 
 /**
  * tep_set_page_size - set the size of a memory page on the traced machine
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  * @_page_size: size of a memory page, in bytes
  *
  * This sets the size of a memory page on the traced machine
  */
-void tep_set_page_size(struct tep_handle *pevent, int _page_size)
+void tep_set_page_size(struct tep_handle *tep, int _page_size)
 {
-	if (pevent)
-		pevent->page_size = _page_size;
+	if (tep)
+		tep->page_size = _page_size;
 }
 
 /**
  * tep_is_file_bigendian - return the endian of the file
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  *
  * This returns true if the file is in big endian order
- * If @pevent is NULL, false is returned.
+ * If @tep is NULL, false is returned.
  */
-bool tep_is_file_bigendian(struct tep_handle *pevent)
+bool tep_is_file_bigendian(struct tep_handle *tep)
 {
-	if (pevent)
-		return pevent->file_bigendian == TEP_BIG_ENDIAN;
+	if (tep)
+		return (tep->file_bigendian == TEP_BIG_ENDIAN);
 	return false;
 }
 
 /**
  * tep_set_file_bigendian - set if the file is in big endian order
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  * @endian: non zero, if the file is in big endian order
  *
  * This sets if the file is in big endian order
  */
-void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian)
+void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian)
 {
-	if (pevent)
-		pevent->file_bigendian = endian;
+	if (tep)
+		tep->file_bigendian = endian;
 }
 
 /**
  * tep_is_local_bigendian - return the endian of the saved local machine
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  *
- * This returns true if the saved local machine in @pevent is big endian.
- * If @pevent is NULL, false is returned.
+ * This returns true if the saved local machine in @tep is big endian.
+ * If @tep is NULL, false is returned.
  */
-bool tep_is_local_bigendian(struct tep_handle *pevent)
+bool tep_is_local_bigendian(struct tep_handle *tep)
 {
-	if (pevent)
-		return pevent->host_bigendian == TEP_BIG_ENDIAN;
+	if (tep)
+		return (tep->host_bigendian == TEP_BIG_ENDIAN);
 	return 0;
 }
 
 /**
  * tep_set_local_bigendian - set the stored local machine endian order
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  * @endian: non zero, if the local host has big endian order
  *
  * This sets the endian order for the local machine.
  */
-void tep_set_local_bigendian(struct tep_handle *pevent, enum tep_endian endian)
+void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian)
 {
-	if (pevent)
-		pevent->host_bigendian = endian;
+	if (tep)
+		tep->host_bigendian = endian;
 }
 
 /**
  * tep_is_latency_format - get if the latency output format is configured
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  *
  * This returns true if the latency output format is configured
- * If @pevent is NULL, false is returned.
+ * If @tep is NULL, false is returned.
  */
-bool tep_is_latency_format(struct tep_handle *pevent)
+bool tep_is_latency_format(struct tep_handle *tep)
 {
-	if (pevent)
-		return pevent->latency_format;
+	if (tep)
+		return (tep->latency_format);
 	return false;
 }
 
 /**
  * tep_set_latency_format - set the latency output format
- * @pevent: a handle to the tep_handle
+ * @tep: a handle to the tep_handle
  * @lat: non zero for latency output format
  *
  * This sets the latency output format
   */
-void tep_set_latency_format(struct tep_handle *pevent, int lat)
+void tep_set_latency_format(struct tep_handle *tep, int lat)
 {
-	if (pevent)
-		pevent->latency_format = lat;
+	if (tep)
+		tep->latency_format = lat;
 }
 
 /**
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
index 35833ee32d6c3..09aa142f7fdd8 100644
--- a/tools/lib/traceevent/event-parse-local.h
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -92,8 +92,8 @@ struct tep_handle {
 void tep_free_event(struct tep_event *event);
 void tep_free_format_field(struct tep_format_field *field);
 
-unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data);
-unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data);
-unsigned long long tep_data2host8(struct tep_handle *pevent, unsigned long long data);
+unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data);
+unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data);
+unsigned long long tep_data2host8(struct tep_handle *tep, unsigned long long data);
 
 #endif /* _PARSE_EVENTS_INT_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 8836702122beb..ceefa9b3d7265 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -200,13 +200,13 @@ static const char *find_cmdline(struct tep_handle *pevent, int pid)
 
 /**
  * tep_is_pid_registered - return if a pid has a cmdline registered
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @pid: The pid to check if it has a cmdline registered with.
  *
  * Returns true if the pid has a cmdline mapped to it
  * false otherwise.
  */
-bool tep_is_pid_registered(struct tep_handle *pevent, int pid)
+bool tep_is_pid_registered(struct tep_handle *tep, int pid)
 {
 	const struct tep_cmdline *comm;
 	struct tep_cmdline key;
@@ -214,13 +214,13 @@ bool tep_is_pid_registered(struct tep_handle *pevent, int pid)
 	if (!pid)
 		return true;
 
-	if (!pevent->cmdlines && cmdline_init(pevent))
+	if (!tep->cmdlines && cmdline_init(tep))
 		return false;
 
 	key.pid = pid;
 
-	comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
-		       sizeof(*pevent->cmdlines), cmdline_cmp);
+	comm = bsearch(&key, tep->cmdlines, tep->cmdline_count,
+		       sizeof(*tep->cmdlines), cmdline_cmp);
 
 	if (comm)
 		return true;
@@ -288,13 +288,13 @@ static int add_new_comm(struct tep_handle *pevent,
 	return 0;
 }
 
-static int _tep_register_comm(struct tep_handle *pevent,
+static int _tep_register_comm(struct tep_handle *tep,
 			      const char *comm, int pid, bool override)
 {
 	struct cmdline_list *item;
 
-	if (pevent->cmdlines)
-		return add_new_comm(pevent, comm, pid, override);
+	if (tep->cmdlines)
+		return add_new_comm(tep, comm, pid, override);
 
 	item = malloc(sizeof(*item));
 	if (!item)
@@ -309,17 +309,17 @@ static int _tep_register_comm(struct tep_handle *pevent,
 		return -1;
 	}
 	item->pid = pid;
-	item->next = pevent->cmdlist;
+	item->next = tep->cmdlist;
 
-	pevent->cmdlist = item;
-	pevent->cmdline_count++;
+	tep->cmdlist = item;
+	tep->cmdline_count++;
 
 	return 0;
 }
 
 /**
  * tep_register_comm - register a pid / comm mapping
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @comm: the command line to register
  * @pid: the pid to map the command line to
  *
@@ -327,14 +327,14 @@ static int _tep_register_comm(struct tep_handle *pevent,
  * a given pid. The comm is duplicated. If a command with the same pid
  * already exist, -1 is returned and errno is set to EEXIST
  */
-int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid)
+int tep_register_comm(struct tep_handle *tep, const char *comm, int pid)
 {
-	return _tep_register_comm(pevent, comm, pid, false);
+	return _tep_register_comm(tep, comm, pid, false);
 }
 
 /**
  * tep_override_comm - register a pid / comm mapping
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @comm: the command line to register
  * @pid: the pid to map the command line to
  *
@@ -342,19 +342,19 @@ int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid)
  * a given pid. The comm is duplicated. If a command with the same pid
  * already exist, the command string is udapted with the new one
  */
-int tep_override_comm(struct tep_handle *pevent, const char *comm, int pid)
+int tep_override_comm(struct tep_handle *tep, const char *comm, int pid)
 {
-	if (!pevent->cmdlines && cmdline_init(pevent)) {
+	if (!tep->cmdlines && cmdline_init(tep)) {
 		errno = ENOMEM;
 		return -1;
 	}
-	return _tep_register_comm(pevent, comm, pid, true);
+	return _tep_register_comm(tep, comm, pid, true);
 }
 
-int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock)
+int tep_register_trace_clock(struct tep_handle *tep, const char *trace_clock)
 {
-	pevent->trace_clock = strdup(trace_clock);
-	if (!pevent->trace_clock) {
+	tep->trace_clock = strdup(trace_clock);
+	if (!tep->trace_clock) {
 		errno = ENOMEM;
 		return -1;
 	}
@@ -472,15 +472,14 @@ struct func_resolver {
 
 /**
  * tep_set_function_resolver - set an alternative function resolver
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @resolver: function to be used
  * @priv: resolver function private state.
  *
  * Some tools may have already a way to resolve kernel functions, allow them to
- * keep using it instead of duplicating all the entries inside
- * pevent->funclist.
+ * keep using it instead of duplicating all the entries inside tep->funclist.
  */
-int tep_set_function_resolver(struct tep_handle *pevent,
+int tep_set_function_resolver(struct tep_handle *tep,
 			      tep_func_resolver_t *func, void *priv)
 {
 	struct func_resolver *resolver = malloc(sizeof(*resolver));
@@ -491,23 +490,23 @@ int tep_set_function_resolver(struct tep_handle *pevent,
 	resolver->func = func;
 	resolver->priv = priv;
 
-	free(pevent->func_resolver);
-	pevent->func_resolver = resolver;
+	free(tep->func_resolver);
+	tep->func_resolver = resolver;
 
 	return 0;
 }
 
 /**
  * tep_reset_function_resolver - reset alternative function resolver
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  *
  * Stop using whatever alternative resolver was set, use the default
  * one instead.
  */
-void tep_reset_function_resolver(struct tep_handle *pevent)
+void tep_reset_function_resolver(struct tep_handle *tep)
 {
-	free(pevent->func_resolver);
-	pevent->func_resolver = NULL;
+	free(tep->func_resolver);
+	tep->func_resolver = NULL;
 }
 
 static struct func_map *
@@ -531,18 +530,18 @@ find_func(struct tep_handle *pevent, unsigned long long addr)
 
 /**
  * tep_find_function - find a function by a given address
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @addr: the address to find the function with
  *
  * Returns a pointer to the function stored that has the given
  * address. Note, the address does not have to be exact, it
  * will select the function that would contain the address.
  */
-const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr)
+const char *tep_find_function(struct tep_handle *tep, unsigned long long addr)
 {
 	struct func_map *map;
 
-	map = find_func(pevent, addr);
+	map = find_func(tep, addr);
 	if (!map)
 		return NULL;
 
@@ -551,7 +550,7 @@ const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr
 
 /**
  * tep_find_function_address - find a function address by a given address
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @addr: the address to find the function with
  *
  * Returns the address the function starts at. This can be used in
@@ -559,11 +558,11 @@ const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr
  * name and the function offset.
  */
 unsigned long long
-tep_find_function_address(struct tep_handle *pevent, unsigned long long addr)
+tep_find_function_address(struct tep_handle *tep, unsigned long long addr)
 {
 	struct func_map *map;
 
-	map = find_func(pevent, addr);
+	map = find_func(tep, addr);
 	if (!map)
 		return 0;
 
@@ -572,7 +571,7 @@ tep_find_function_address(struct tep_handle *pevent, unsigned long long addr)
 
 /**
  * tep_register_function - register a function with a given address
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @function: the function name to register
  * @addr: the address the function starts at
  * @mod: the kernel module the function may be in (NULL for none)
@@ -580,7 +579,7 @@ tep_find_function_address(struct tep_handle *pevent, unsigned long long addr)
  * This registers a function name with an address and module.
  * The @func passed in is duplicated.
  */
-int tep_register_function(struct tep_handle *pevent, char *func,
+int tep_register_function(struct tep_handle *tep, char *func,
 			  unsigned long long addr, char *mod)
 {
 	struct func_list *item = malloc(sizeof(*item));
@@ -588,7 +587,7 @@ int tep_register_function(struct tep_handle *pevent, char *func,
 	if (!item)
 		return -1;
 
-	item->next = pevent->funclist;
+	item->next = tep->funclist;
 	item->func = strdup(func);
 	if (!item->func)
 		goto out_free;
@@ -601,8 +600,8 @@ int tep_register_function(struct tep_handle *pevent, char *func,
 		item->mod = NULL;
 	item->addr = addr;
 
-	pevent->funclist = item;
-	pevent->func_count++;
+	tep->funclist = item;
+	tep->func_count++;
 
 	return 0;
 
@@ -617,23 +616,23 @@ out_free:
 
 /**
  * tep_print_funcs - print out the stored functions
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  *
  * This prints out the stored functions.
  */
-void tep_print_funcs(struct tep_handle *pevent)
+void tep_print_funcs(struct tep_handle *tep)
 {
 	int i;
 
-	if (!pevent->func_map)
-		func_map_init(pevent);
+	if (!tep->func_map)
+		func_map_init(tep);
 
-	for (i = 0; i < (int)pevent->func_count; i++) {
+	for (i = 0; i < (int)tep->func_count; i++) {
 		printf("%016llx %s",
-		       pevent->func_map[i].addr,
-		       pevent->func_map[i].func);
-		if (pevent->func_map[i].mod)
-			printf(" [%s]\n", pevent->func_map[i].mod);
+		       tep->func_map[i].addr,
+		       tep->func_map[i].func);
+		if (tep->func_map[i].mod)
+			printf(" [%s]\n", tep->func_map[i].mod);
 		else
 			printf("\n");
 	}
@@ -713,14 +712,14 @@ find_printk(struct tep_handle *pevent, unsigned long long addr)
 
 /**
  * tep_register_print_string - register a string by its address
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @fmt: the string format to register
  * @addr: the address the string was located at
  *
  * This registers a string by the address it was stored in the kernel.
  * The @fmt passed in is duplicated.
  */
-int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
+int tep_register_print_string(struct tep_handle *tep, const char *fmt,
 			      unsigned long long addr)
 {
 	struct printk_list *item = malloc(sizeof(*item));
@@ -729,7 +728,7 @@ int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
 	if (!item)
 		return -1;
 
-	item->next = pevent->printklist;
+	item->next = tep->printklist;
 	item->addr = addr;
 
 	/* Strip off quotes and '\n' from the end */
@@ -747,8 +746,8 @@ int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
 	if (strcmp(p, "\\n") == 0)
 		*p = 0;
 
-	pevent->printklist = item;
-	pevent->printk_count++;
+	tep->printklist = item;
+	tep->printk_count++;
 
 	return 0;
 
@@ -760,21 +759,21 @@ out_free:
 
 /**
  * tep_print_printk - print out the stored strings
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  *
  * This prints the string formats that were stored.
  */
-void tep_print_printk(struct tep_handle *pevent)
+void tep_print_printk(struct tep_handle *tep)
 {
 	int i;
 
-	if (!pevent->printk_map)
-		printk_map_init(pevent);
+	if (!tep->printk_map)
+		printk_map_init(tep);
 
-	for (i = 0; i < (int)pevent->printk_count; i++) {
+	for (i = 0; i < (int)tep->printk_count; i++) {
 		printf("%016llx %s\n",
-		       pevent->printk_map[i].addr,
-		       pevent->printk_map[i].printk);
+		       tep->printk_map[i].addr,
+		       tep->printk_map[i].printk);
 	}
 }
 
@@ -1184,7 +1183,7 @@ static enum tep_event_type read_token(char **tok)
 }
 
 /**
- * tep_read_token - access to utilities to use the pevent parser
+ * tep_read_token - access to utilities to use the tep parser
  * @tok: The token to return
  *
  * This will parse tokens from the string given by
@@ -3357,14 +3356,14 @@ tep_find_any_field(struct tep_event *event, const char *name)
 
 /**
  * tep_read_number - read a number from data
- * @pevent: handle for the pevent
+ * @tep: a handle to the trace event parser context
  * @ptr: the raw data
  * @size: the size of the data that holds the number
  *
  * Returns the number (converted to host) from the
  * raw data.
  */
-unsigned long long tep_read_number(struct tep_handle *pevent,
+unsigned long long tep_read_number(struct tep_handle *tep,
 				   const void *ptr, int size)
 {
 	unsigned long long val;
@@ -3373,12 +3372,12 @@ unsigned long long tep_read_number(struct tep_handle *pevent,
 	case 1:
 		return *(unsigned char *)ptr;
 	case 2:
-		return tep_data2host2(pevent, *(unsigned short *)ptr);
+		return tep_data2host2(tep, *(unsigned short *)ptr);
 	case 4:
-		return tep_data2host4(pevent, *(unsigned int *)ptr);
+		return tep_data2host4(tep, *(unsigned int *)ptr);
 	case 8:
 		memcpy(&val, (ptr), sizeof(unsigned long long));
-		return tep_data2host8(pevent, val);
+		return tep_data2host8(tep, val);
 	default:
 		/* BUG! */
 		return 0;
@@ -3499,28 +3498,28 @@ static int events_id_cmp(const void *a, const void *b);
 
 /**
  * tep_find_event - find an event by given id
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @id: the id of the event
  *
  * Returns an event that has a given @id.
  */
-struct tep_event *tep_find_event(struct tep_handle *pevent, int id)
+struct tep_event *tep_find_event(struct tep_handle *tep, int id)
 {
 	struct tep_event **eventptr;
 	struct tep_event key;
 	struct tep_event *pkey = &key;
 
 	/* Check cache first */
-	if (pevent->last_event && pevent->last_event->id == id)
-		return pevent->last_event;
+	if (tep->last_event && tep->last_event->id == id)
+		return tep->last_event;
 
 	key.id = id;
 
-	eventptr = bsearch(&pkey, pevent->events, pevent->nr_events,
-			   sizeof(*pevent->events), events_id_cmp);
+	eventptr = bsearch(&pkey, tep->events, tep->nr_events,
+			   sizeof(*tep->events), events_id_cmp);
 
 	if (eventptr) {
-		pevent->last_event = *eventptr;
+		tep->last_event = *eventptr;
 		return *eventptr;
 	}
 
@@ -3529,7 +3528,7 @@ struct tep_event *tep_find_event(struct tep_handle *pevent, int id)
 
 /**
  * tep_find_event_by_name - find an event by given name
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @sys: the system name to search for
  * @name: the name of the event to search for
  *
@@ -3537,19 +3536,19 @@ struct tep_event *tep_find_event(struct tep_handle *pevent, int id)
  * @sys. If @sys is NULL the first event with @name is returned.
  */
 struct tep_event *
-tep_find_event_by_name(struct tep_handle *pevent,
+tep_find_event_by_name(struct tep_handle *tep,
 		       const char *sys, const char *name)
 {
 	struct tep_event *event = NULL;
 	int i;
 
-	if (pevent->last_event &&
-	    strcmp(pevent->last_event->name, name) == 0 &&
-	    (!sys || strcmp(pevent->last_event->system, sys) == 0))
-		return pevent->last_event;
+	if (tep->last_event &&
+	    strcmp(tep->last_event->name, name) == 0 &&
+	    (!sys || strcmp(tep->last_event->system, sys) == 0))
+		return tep->last_event;
 
-	for (i = 0; i < pevent->nr_events; i++) {
-		event = pevent->events[i];
+	for (i = 0; i < tep->nr_events; i++) {
+		event = tep->events[i];
 		if (strcmp(event->name, name) == 0) {
 			if (!sys)
 				break;
@@ -3557,10 +3556,10 @@ tep_find_event_by_name(struct tep_handle *pevent,
 				break;
 		}
 	}
-	if (i == pevent->nr_events)
+	if (i == tep->nr_events)
 		event = NULL;
 
-	pevent->last_event = event;
+	tep->last_event = event;
 	return event;
 }
 
@@ -5173,7 +5172,7 @@ out_failed:
 
 /**
  * tep_data_latency_format - parse the data for the latency format
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @s: the trace_seq to write to
  * @record: the record to read from
  *
@@ -5181,7 +5180,7 @@ out_failed:
  * need rescheduling, in hard/soft interrupt, preempt count
  * and lock depth) and places it into the trace_seq.
  */
-void tep_data_latency_format(struct tep_handle *pevent,
+void tep_data_latency_format(struct tep_handle *tep,
 			     struct trace_seq *s, struct tep_record *record)
 {
 	static int check_lock_depth = 1;
@@ -5196,13 +5195,13 @@ void tep_data_latency_format(struct tep_handle *pevent,
 	int softirq;
 	void *data = record->data;
 
-	lat_flags = parse_common_flags(pevent, data);
-	pc = parse_common_pc(pevent, data);
+	lat_flags = parse_common_flags(tep, data);
+	pc = parse_common_pc(tep, data);
 	/* lock_depth may not always exist */
 	if (lock_depth_exists)
-		lock_depth = parse_common_lock_depth(pevent, data);
+		lock_depth = parse_common_lock_depth(tep, data);
 	else if (check_lock_depth) {
-		lock_depth = parse_common_lock_depth(pevent, data);
+		lock_depth = parse_common_lock_depth(tep, data);
 		if (lock_depth < 0)
 			check_lock_depth = 0;
 		else
@@ -5211,9 +5210,9 @@ void tep_data_latency_format(struct tep_handle *pevent,
 
 	/* migrate_disable may not always exist */
 	if (migrate_disable_exists)
-		migrate_disable = parse_common_migrate_disable(pevent, data);
+		migrate_disable = parse_common_migrate_disable(tep, data);
 	else if (check_migrate_disable) {
-		migrate_disable = parse_common_migrate_disable(pevent, data);
+		migrate_disable = parse_common_migrate_disable(tep, data);
 		if (migrate_disable < 0)
 			check_migrate_disable = 0;
 		else
@@ -5256,67 +5255,67 @@ void tep_data_latency_format(struct tep_handle *pevent,
 
 /**
  * tep_data_type - parse out the given event type
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @rec: the record to read from
  *
  * This returns the event id from the @rec.
  */
-int tep_data_type(struct tep_handle *pevent, struct tep_record *rec)
+int tep_data_type(struct tep_handle *tep, struct tep_record *rec)
 {
-	return trace_parse_common_type(pevent, rec->data);
+	return trace_parse_common_type(tep, rec->data);
 }
 
 /**
  * tep_data_pid - parse the PID from record
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @rec: the record to parse
  *
  * This returns the PID from a record.
  */
-int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec)
+int tep_data_pid(struct tep_handle *tep, struct tep_record *rec)
 {
-	return parse_common_pid(pevent, rec->data);
+	return parse_common_pid(tep, rec->data);
 }
 
 /**
  * tep_data_preempt_count - parse the preempt count from the record
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @rec: the record to parse
  *
  * This returns the preempt count from a record.
  */
-int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec)
+int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec)
 {
-	return parse_common_pc(pevent, rec->data);
+	return parse_common_pc(tep, rec->data);
 }
 
 /**
  * tep_data_flags - parse the latency flags from the record
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @rec: the record to parse
  *
  * This returns the latency flags from a record.
  *
  *  Use trace_flag_type enum for the flags (see event-parse.h).
  */
-int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec)
+int tep_data_flags(struct tep_handle *tep, struct tep_record *rec)
 {
-	return parse_common_flags(pevent, rec->data);
+	return parse_common_flags(tep, rec->data);
 }
 
 /**
  * tep_data_comm_from_pid - return the command line from PID
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @pid: the PID of the task to search for
  *
  * This returns a pointer to the command line that has the given
  * @pid.
  */
-const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid)
+const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid)
 {
 	const char *comm;
 
-	comm = find_cmdline(pevent, pid);
+	comm = find_cmdline(tep, pid);
 	return comm;
 }
 
@@ -5338,7 +5337,7 @@ pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct tep_cmdline
 
 /**
  * tep_data_pid_from_comm - return the pid from a given comm
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @comm: the cmdline to find the pid from
  * @next: the cmdline structure to find the next comm
  *
@@ -5349,7 +5348,7 @@ pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct tep_cmdline
  * next pid.
  * Also, it does a linear search, so it may be slow.
  */
-struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
+struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm,
 					   struct tep_cmdline *next)
 {
 	struct tep_cmdline *cmdline;
@@ -5358,25 +5357,25 @@ struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char
 	 * If the cmdlines have not been converted yet, then use
 	 * the list.
 	 */
-	if (!pevent->cmdlines)
-		return pid_from_cmdlist(pevent, comm, next);
+	if (!tep->cmdlines)
+		return pid_from_cmdlist(tep, comm, next);
 
 	if (next) {
 		/*
 		 * The next pointer could have been still from
 		 * a previous call before cmdlines were created
 		 */
-		if (next < pevent->cmdlines ||
-		    next >= pevent->cmdlines + pevent->cmdline_count)
+		if (next < tep->cmdlines ||
+		    next >= tep->cmdlines + tep->cmdline_count)
 			next = NULL;
 		else
 			cmdline  = next++;
 	}
 
 	if (!next)
-		cmdline = pevent->cmdlines;
+		cmdline = tep->cmdlines;
 
-	while (cmdline < pevent->cmdlines + pevent->cmdline_count) {
+	while (cmdline < tep->cmdlines + tep->cmdline_count) {
 		if (strcmp(cmdline->comm, comm) == 0)
 			return cmdline;
 		cmdline++;
@@ -5386,12 +5385,13 @@ struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char
 
 /**
  * tep_cmdline_pid - return the pid associated to a given cmdline
+ * @tep: a handle to the trace event parser context
  * @cmdline: The cmdline structure to get the pid from
  *
  * Returns the pid for a give cmdline. If @cmdline is NULL, then
  * -1 is returned.
  */
-int tep_cmdline_pid(struct tep_handle *pevent, struct tep_cmdline *cmdline)
+int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline)
 {
 	struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline;
 
@@ -5402,9 +5402,9 @@ int tep_cmdline_pid(struct tep_handle *pevent, struct tep_cmdline *cmdline)
 	 * If cmdlines have not been created yet, or cmdline is
 	 * not part of the array, then treat it as a cmdlist instead.
 	 */
-	if (!pevent->cmdlines ||
-	    cmdline < pevent->cmdlines ||
-	    cmdline >= pevent->cmdlines + pevent->cmdline_count)
+	if (!tep->cmdlines ||
+	    cmdline < tep->cmdlines ||
+	    cmdline >= tep->cmdlines + tep->cmdline_count)
 		return cmdlist->pid;
 
 	return cmdline->pid;
@@ -5455,14 +5455,14 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
 
 /**
  * tep_find_event_by_record - return the event from a given record
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @record: The record to get the event from
  *
  * Returns the associated event for a given record, or NULL if non is
  * is found.
  */
 struct tep_event *
-tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record)
+tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record)
 {
 	int type;
 
@@ -5471,21 +5471,21 @@ tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record)
 		return NULL;
 	}
 
-	type = trace_parse_common_type(pevent, record->data);
+	type = trace_parse_common_type(tep, record->data);
 
-	return tep_find_event(pevent, type);
+	return tep_find_event(tep, type);
 }
 
 /**
  * tep_print_event_task - Write the event task comm, pid and CPU
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @s: the trace_seq to write to
  * @event: the handle to the record's event
  * @record: The record to get the event from
  *
  * Writes the tasks comm, pid and CPU to @s.
  */
-void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
+void tep_print_event_task(struct tep_handle *tep, struct trace_seq *s,
 			  struct tep_event *event,
 			  struct tep_record *record)
 {
@@ -5493,27 +5493,26 @@ void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
 	const char *comm;
 	int pid;
 
-	pid = parse_common_pid(pevent, data);
-	comm = find_cmdline(pevent, pid);
+	pid = parse_common_pid(tep, data);
+	comm = find_cmdline(tep, pid);
 
-	if (pevent->latency_format) {
-		trace_seq_printf(s, "%8.8s-%-5d %3d",
-		       comm, pid, record->cpu);
-	} else
+	if (tep->latency_format)
+		trace_seq_printf(s, "%8.8s-%-5d %3d", comm, pid, record->cpu);
+	else
 		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
 }
 
 /**
  * tep_print_event_time - Write the event timestamp
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @s: the trace_seq to write to
  * @event: the handle to the record's event
  * @record: The record to get the event from
- * @use_trace_clock: Set to parse according to the @pevent->trace_clock
+ * @use_trace_clock: Set to parse according to the @tep->trace_clock
  *
  * Writes the timestamp of the record into @s.
  */
-void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
+void tep_print_event_time(struct tep_handle *tep, struct trace_seq *s,
 			  struct tep_event *event,
 			  struct tep_record *record,
 			  bool use_trace_clock)
@@ -5524,19 +5523,18 @@ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
 	int p;
 	bool use_usec_format;
 
-	use_usec_format = is_timestamp_in_us(pevent->trace_clock,
-							use_trace_clock);
+	use_usec_format = is_timestamp_in_us(tep->trace_clock, use_trace_clock);
 	if (use_usec_format) {
 		secs = record->ts / NSEC_PER_SEC;
 		nsecs = record->ts - secs * NSEC_PER_SEC;
 	}
 
-	if (pevent->latency_format) {
-		tep_data_latency_format(pevent, s, record);
+	if (tep->latency_format) {
+		tep_data_latency_format(tep, s, record);
 	}
 
 	if (use_usec_format) {
-		if (pevent->flags & TEP_NSEC_OUTPUT) {
+		if (tep->flags & TEP_NSEC_OUTPUT) {
 			usecs = nsecs;
 			p = 9;
 		} else {
@@ -5556,14 +5554,14 @@ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
 
 /**
  * tep_print_event_data - Write the event data section
- * @pevent: a handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @s: the trace_seq to write to
  * @event: the handle to the record's event
  * @record: The record to get the event from
  *
  * Writes the parsing of the record's data to @s.
  */
-void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
+void tep_print_event_data(struct tep_handle *tep, struct trace_seq *s,
 			  struct tep_event *event,
 			  struct tep_record *record)
 {
@@ -5580,15 +5578,15 @@ void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
 	tep_event_info(s, event, record);
 }
 
-void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
+void tep_print_event(struct tep_handle *tep, struct trace_seq *s,
 		     struct tep_record *record, bool use_trace_clock)
 {
 	struct tep_event *event;
 
-	event = tep_find_event_by_record(pevent, record);
+	event = tep_find_event_by_record(tep, record);
 	if (!event) {
 		int i;
-		int type = trace_parse_common_type(pevent, record->data);
+		int type = trace_parse_common_type(tep, record->data);
 
 		do_warning("ug! no event found for type %d", type);
 		trace_seq_printf(s, "[UNKNOWN TYPE %d]", type);
@@ -5598,9 +5596,9 @@ void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
 		return;
 	}
 
-	tep_print_event_task(pevent, s, event, record);
-	tep_print_event_time(pevent, s, event, record, use_trace_clock);
-	tep_print_event_data(pevent, s, event, record);
+	tep_print_event_task(tep, s, event, record);
+	tep_print_event_time(tep, s, event, record, use_trace_clock);
+	tep_print_event_data(tep, s, event, record);
 }
 
 static int events_id_cmp(const void *a, const void *b)
@@ -6017,7 +6015,7 @@ static void parse_header_field(const char *field,
 
 /**
  * tep_parse_header_page - parse the data stored in the header page
- * @pevent: the handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @buf: the buffer storing the header page format string
  * @size: the size of @buf
  * @long_size: the long size to use if there is no header
@@ -6027,7 +6025,7 @@ static void parse_header_field(const char *field,
  *
  * /sys/kernel/debug/tracing/events/header_page
  */
-int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size,
+int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size,
 			  int long_size)
 {
 	int ignore;
@@ -6037,22 +6035,22 @@ int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long si
 		 * Old kernels did not have header page info.
 		 * Sorry but we just use what we find here in user space.
 		 */
-		pevent->header_page_ts_size = sizeof(long long);
-		pevent->header_page_size_size = long_size;
-		pevent->header_page_data_offset = sizeof(long long) + long_size;
-		pevent->old_format = 1;
+		tep->header_page_ts_size = sizeof(long long);
+		tep->header_page_size_size = long_size;
+		tep->header_page_data_offset = sizeof(long long) + long_size;
+		tep->old_format = 1;
 		return -1;
 	}
 	init_input_buf(buf, size);
 
-	parse_header_field("timestamp", &pevent->header_page_ts_offset,
-			   &pevent->header_page_ts_size, 1);
-	parse_header_field("commit", &pevent->header_page_size_offset,
-			   &pevent->header_page_size_size, 1);
-	parse_header_field("overwrite", &pevent->header_page_overwrite,
+	parse_header_field("timestamp", &tep->header_page_ts_offset,
+			   &tep->header_page_ts_size, 1);
+	parse_header_field("commit", &tep->header_page_size_offset,
+			   &tep->header_page_size_size, 1);
+	parse_header_field("overwrite", &tep->header_page_overwrite,
 			   &ignore, 0);
-	parse_header_field("data", &pevent->header_page_data_offset,
-			   &pevent->header_page_data_size, 1);
+	parse_header_field("data", &tep->header_page_data_offset,
+			   &tep->header_page_data_size, 1);
 
 	return 0;
 }
@@ -6258,7 +6256,7 @@ event_add_failed:
 
 /**
  * tep_parse_format - parse the event format
- * @pevent: the handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @eventp: returned format
  * @buf: the buffer storing the event format string
  * @size: the size of @buf
@@ -6271,17 +6269,17 @@ event_add_failed:
  *
  * /sys/kernel/debug/tracing/events/.../.../format
  */
-enum tep_errno tep_parse_format(struct tep_handle *pevent,
+enum tep_errno tep_parse_format(struct tep_handle *tep,
 				struct tep_event **eventp,
 				const char *buf,
 				unsigned long size, const char *sys)
 {
-	return __parse_event(pevent, eventp, buf, size, sys);
+	return __parse_event(tep, eventp, buf, size, sys);
 }
 
 /**
  * tep_parse_event - parse the event format
- * @pevent: the handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @buf: the buffer storing the event format string
  * @size: the size of @buf
  * @sys: the system the event belongs to
@@ -6293,11 +6291,11 @@ enum tep_errno tep_parse_format(struct tep_handle *pevent,
  *
  * /sys/kernel/debug/tracing/events/.../.../format
  */
-enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
+enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf,
 			       unsigned long size, const char *sys)
 {
 	struct tep_event *event = NULL;
-	return __parse_event(pevent, &event, buf, size, sys);
+	return __parse_event(tep, &event, buf, size, sys);
 }
 
 int get_field_val(struct trace_seq *s, struct tep_format_field *field,
@@ -6537,7 +6535,7 @@ static void free_func_handle(struct tep_function_handler *func)
 
 /**
  * tep_register_print_function - register a helper function
- * @pevent: the handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @func: the function to process the helper function
  * @ret_type: the return type of the helper function
  * @name: the name of the helper function
@@ -6550,7 +6548,7 @@ static void free_func_handle(struct tep_function_handler *func)
  * The @parameters is a variable list of tep_func_arg_type enums that
  * must end with TEP_FUNC_ARG_VOID.
  */
-int tep_register_print_function(struct tep_handle *pevent,
+int tep_register_print_function(struct tep_handle *tep,
 				tep_func_handler func,
 				enum tep_func_arg_type ret_type,
 				char *name, ...)
@@ -6562,7 +6560,7 @@ int tep_register_print_function(struct tep_handle *pevent,
 	va_list ap;
 	int ret;
 
-	func_handle = find_func_handler(pevent, name);
+	func_handle = find_func_handler(tep, name);
 	if (func_handle) {
 		/*
 		 * This is most like caused by the users own
@@ -6570,7 +6568,7 @@ int tep_register_print_function(struct tep_handle *pevent,
 		 * system defaults.
 		 */
 		pr_stat("override of function helper '%s'", name);
-		remove_func_handler(pevent, name);
+		remove_func_handler(tep, name);
 	}
 
 	func_handle = calloc(1, sizeof(*func_handle));
@@ -6617,8 +6615,8 @@ int tep_register_print_function(struct tep_handle *pevent,
 	}
 	va_end(ap);
 
-	func_handle->next = pevent->func_handlers;
-	pevent->func_handlers = func_handle;
+	func_handle->next = tep->func_handlers;
+	tep->func_handlers = func_handle;
 
 	return 0;
  out_free:
@@ -6629,7 +6627,7 @@ int tep_register_print_function(struct tep_handle *pevent,
 
 /**
  * tep_unregister_print_function - unregister a helper function
- * @pevent: the handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @func: the function to process the helper function
  * @name: the name of the helper function
  *
@@ -6637,14 +6635,14 @@ int tep_register_print_function(struct tep_handle *pevent,
  *
  * Returns 0 if the handler was removed successully, -1 otherwise.
  */
-int tep_unregister_print_function(struct tep_handle *pevent,
+int tep_unregister_print_function(struct tep_handle *tep,
 				  tep_func_handler func, char *name)
 {
 	struct tep_function_handler *func_handle;
 
-	func_handle = find_func_handler(pevent, name);
+	func_handle = find_func_handler(tep, name);
 	if (func_handle && func_handle->func == func) {
-		remove_func_handler(pevent, name);
+		remove_func_handler(tep, name);
 		return 0;
 	}
 	return -1;
@@ -6675,7 +6673,7 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id,
 
 /**
  * tep_register_event_handler - register a way to parse an event
- * @pevent: the handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @id: the id of the event to register
  * @sys_name: the system name the event belongs to
  * @event_name: the name of the event
@@ -6696,14 +6694,14 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id,
  *  negative TEP_ERRNO_... in case of an error
  *
  */
-int tep_register_event_handler(struct tep_handle *pevent, int id,
+int tep_register_event_handler(struct tep_handle *tep, int id,
 			       const char *sys_name, const char *event_name,
 			       tep_event_handler_func func, void *context)
 {
 	struct tep_event *event;
 	struct event_handler *handle;
 
-	event = search_event(pevent, id, sys_name, event_name);
+	event = search_event(tep, id, sys_name, event_name);
 	if (event == NULL)
 		goto not_found;
 
@@ -6738,8 +6736,8 @@ int tep_register_event_handler(struct tep_handle *pevent, int id,
 	}
 
 	handle->func = func;
-	handle->next = pevent->handlers;
-	pevent->handlers = handle;
+	handle->next = tep->handlers;
+	tep->handlers = handle;
 	handle->context = context;
 
 	return TEP_REGISTER_SUCCESS;
@@ -6766,7 +6764,7 @@ static int handle_matches(struct event_handler *handler, int id,
 
 /**
  * tep_unregister_event_handler - unregister an existing event handler
- * @pevent: the handle to the pevent
+ * @tep: a handle to the trace event parser context
  * @id: the id of the event to unregister
  * @sys_name: the system name the handler belongs to
  * @event_name: the name of the event handler
@@ -6780,7 +6778,7 @@ static int handle_matches(struct event_handler *handler, int id,
  *
  * Returns 0 if handler was removed successfully, -1 if event was not found.
  */
-int tep_unregister_event_handler(struct tep_handle *pevent, int id,
+int tep_unregister_event_handler(struct tep_handle *tep, int id,
 				 const char *sys_name, const char *event_name,
 				 tep_event_handler_func func, void *context)
 {
@@ -6788,7 +6786,7 @@ int tep_unregister_event_handler(struct tep_handle *pevent, int id,
 	struct event_handler *handle;
 	struct event_handler **next;
 
-	event = search_event(pevent, id, sys_name, event_name);
+	event = search_event(tep, id, sys_name, event_name);
 	if (event == NULL)
 		goto not_found;
 
@@ -6802,7 +6800,7 @@ int tep_unregister_event_handler(struct tep_handle *pevent, int id,
 	}
 
 not_found:
-	for (next = &pevent->handlers; *next; next = &(*next)->next) {
+	for (next = &tep->handlers; *next; next = &(*next)->next) {
 		handle = *next;
 		if (handle_matches(handle, id, sys_name, event_name,
 				   func, context))
@@ -6819,7 +6817,7 @@ not_found:
 }
 
 /**
- * tep_alloc - create a pevent handle
+ * tep_alloc - create a tep handle
  */
 struct tep_handle *tep_alloc(void)
 {
@@ -6833,9 +6831,9 @@ struct tep_handle *tep_alloc(void)
 	return pevent;
 }
 
-void tep_ref(struct tep_handle *pevent)
+void tep_ref(struct tep_handle *tep)
 {
-	pevent->ref_count++;
+	tep->ref_count++;
 }
 
 int tep_get_ref(struct tep_handle *tep)
@@ -6885,10 +6883,10 @@ void tep_free_event(struct tep_event *event)
 }
 
 /**
- * tep_free - free a pevent handle
- * @pevent: the pevent handle to free
+ * tep_free - free a tep handle
+ * @tep: the tep handle to free
  */
-void tep_free(struct tep_handle *pevent)
+void tep_free(struct tep_handle *tep)
 {
 	struct cmdline_list *cmdlist, *cmdnext;
 	struct func_list *funclist, *funcnext;
@@ -6897,21 +6895,21 @@ void tep_free(struct tep_handle *pevent)
 	struct event_handler *handle;
 	int i;
 
-	if (!pevent)
+	if (!tep)
 		return;
 
-	cmdlist = pevent->cmdlist;
-	funclist = pevent->funclist;
-	printklist = pevent->printklist;
+	cmdlist = tep->cmdlist;
+	funclist = tep->funclist;
+	printklist = tep->printklist;
 
-	pevent->ref_count--;
-	if (pevent->ref_count)
+	tep->ref_count--;
+	if (tep->ref_count)
 		return;
 
-	if (pevent->cmdlines) {
-		for (i = 0; i < pevent->cmdline_count; i++)
-			free(pevent->cmdlines[i].comm);
-		free(pevent->cmdlines);
+	if (tep->cmdlines) {
+		for (i = 0; i < tep->cmdline_count; i++)
+			free(tep->cmdlines[i].comm);
+		free(tep->cmdlines);
 	}
 
 	while (cmdlist) {
@@ -6921,12 +6919,12 @@ void tep_free(struct tep_handle *pevent)
 		cmdlist = cmdnext;
 	}
 
-	if (pevent->func_map) {
-		for (i = 0; i < (int)pevent->func_count; i++) {
-			free(pevent->func_map[i].func);
-			free(pevent->func_map[i].mod);
+	if (tep->func_map) {
+		for (i = 0; i < (int)tep->func_count; i++) {
+			free(tep->func_map[i].func);
+			free(tep->func_map[i].mod);
 		}
-		free(pevent->func_map);
+		free(tep->func_map);
 	}
 
 	while (funclist) {
@@ -6937,16 +6935,16 @@ void tep_free(struct tep_handle *pevent)
 		funclist = funcnext;
 	}
 
-	while (pevent->func_handlers) {
-		func_handler = pevent->func_handlers;
-		pevent->func_handlers = func_handler->next;
+	while (tep->func_handlers) {
+		func_handler = tep->func_handlers;
+		tep->func_handlers = func_handler->next;
 		free_func_handle(func_handler);
 	}
 
-	if (pevent->printk_map) {
-		for (i = 0; i < (int)pevent->printk_count; i++)
-			free(pevent->printk_map[i].printk);
-		free(pevent->printk_map);
+	if (tep->printk_map) {
+		for (i = 0; i < (int)tep->printk_count; i++)
+			free(tep->printk_map[i].printk);
+		free(tep->printk_map);
 	}
 
 	while (printklist) {
@@ -6956,24 +6954,24 @@ void tep_free(struct tep_handle *pevent)
 		printklist = printknext;
 	}
 
-	for (i = 0; i < pevent->nr_events; i++)
-		tep_free_event(pevent->events[i]);
+	for (i = 0; i < tep->nr_events; i++)
+		tep_free_event(tep->events[i]);
 
-	while (pevent->handlers) {
-		handle = pevent->handlers;
-		pevent->handlers = handle->next;
+	while (tep->handlers) {
+		handle = tep->handlers;
+		tep->handlers = handle->next;
 		free_handler(handle);
 	}
 
-	free(pevent->trace_clock);
-	free(pevent->events);
-	free(pevent->sort_events);
-	free(pevent->func_resolver);
+	free(tep->trace_clock);
+	free(tep->events);
+	free(tep->sort_events);
+	free(tep->func_resolver);
 
-	free(pevent);
+	free(tep);
 }
 
-void tep_unref(struct tep_handle *pevent)
+void tep_unref(struct tep_handle *tep)
 {
-	tep_free(pevent);
+	tep_free(tep);
 }
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index d473dc5579781..2720a30ef86ba 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -64,8 +64,8 @@ typedef int (*tep_event_handler_func)(struct trace_seq *s,
 				      struct tep_event *event,
 				      void *context);
 
-typedef int (*tep_plugin_load_func)(struct tep_handle *pevent);
-typedef int (*tep_plugin_unload_func)(struct tep_handle *pevent);
+typedef int (*tep_plugin_load_func)(struct tep_handle *tep);
+typedef int (*tep_plugin_unload_func)(struct tep_handle *tep);
 
 struct tep_plugin_option {
 	struct tep_plugin_option	*next;
@@ -85,12 +85,12 @@ struct tep_plugin_option {
  * TEP_PLUGIN_LOADER:  (required)
  *   The function name to initialized the plugin.
  *
- *   int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+ *   int TEP_PLUGIN_LOADER(struct tep_handle *tep)
  *
  * TEP_PLUGIN_UNLOADER:  (optional)
  *   The function called just before unloading
  *
- *   int TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+ *   int TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
  *
  * TEP_PLUGIN_OPTIONS:  (optional)
  *   Plugin options that can be set before loading
@@ -393,9 +393,9 @@ struct tep_plugin_list;
 
 #define INVALID_PLUGIN_LIST_OPTION	((char **)((unsigned long)-1))
 
-struct tep_plugin_list *tep_load_plugins(struct tep_handle *pevent);
+struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep);
 void tep_unload_plugins(struct tep_plugin_list *plugin_list,
-			struct tep_handle *pevent);
+			struct tep_handle *tep);
 char **tep_plugin_list_options(void);
 void tep_plugin_free_options_list(char **list);
 int tep_plugin_add_options(const char *name,
@@ -410,7 +410,7 @@ typedef char *(tep_func_resolver_t)(void *priv,
 				    unsigned long long *addrp, char **modp);
 void tep_set_flag(struct tep_handle *tep, int flag);
 void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag);
-bool tep_check_flags(struct tep_handle *tep, enum tep_flag flags);
+bool tep_test_flag(struct tep_handle *tep, enum tep_flag flags);
 
 static inline int tep_is_bigendian(void)
 {
@@ -430,37 +430,37 @@ enum trace_flag_type {
 	TRACE_FLAG_SOFTIRQ		= 0x10,
 };
 
-int tep_set_function_resolver(struct tep_handle *pevent,
+int tep_set_function_resolver(struct tep_handle *tep,
 			      tep_func_resolver_t *func, void *priv);
-void tep_reset_function_resolver(struct tep_handle *pevent);
-int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid);
-int tep_override_comm(struct tep_handle *pevent, const char *comm, int pid);
-int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock);
-int tep_register_function(struct tep_handle *pevent, char *name,
+void tep_reset_function_resolver(struct tep_handle *tep);
+int tep_register_comm(struct tep_handle *tep, const char *comm, int pid);
+int tep_override_comm(struct tep_handle *tep, const char *comm, int pid);
+int tep_register_trace_clock(struct tep_handle *tep, const char *trace_clock);
+int tep_register_function(struct tep_handle *tep, char *name,
 			  unsigned long long addr, char *mod);
-int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
+int tep_register_print_string(struct tep_handle *tep, const char *fmt,
 			      unsigned long long addr);
-bool tep_is_pid_registered(struct tep_handle *pevent, int pid);
+bool tep_is_pid_registered(struct tep_handle *tep, int pid);
 
-void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
+void tep_print_event_task(struct tep_handle *tep, struct trace_seq *s,
 			  struct tep_event *event,
 			  struct tep_record *record);
-void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
+void tep_print_event_time(struct tep_handle *tep, struct trace_seq *s,
 			  struct tep_event *event,
 			  struct tep_record *record,
 			  bool use_trace_clock);
-void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
+void tep_print_event_data(struct tep_handle *tep, struct trace_seq *s,
 			  struct tep_event *event,
 			  struct tep_record *record);
-void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
+void tep_print_event(struct tep_handle *tep, struct trace_seq *s,
 		     struct tep_record *record, bool use_trace_clock);
 
-int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size,
+int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size,
 			  int long_size);
 
-enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
+enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf,
 			       unsigned long size, const char *sys);
-enum tep_errno tep_parse_format(struct tep_handle *pevent,
+enum tep_errno tep_parse_format(struct tep_handle *tep,
 				struct tep_event **eventp,
 				const char *buf,
 				unsigned long size, const char *sys);
@@ -492,50 +492,50 @@ enum tep_reg_handler {
 	TEP_REGISTER_SUCCESS_OVERWRITE,
 };
 
-int tep_register_event_handler(struct tep_handle *pevent, int id,
+int tep_register_event_handler(struct tep_handle *tep, int id,
 			       const char *sys_name, const char *event_name,
 			       tep_event_handler_func func, void *context);
-int tep_unregister_event_handler(struct tep_handle *pevent, int id,
+int tep_unregister_event_handler(struct tep_handle *tep, int id,
 				 const char *sys_name, const char *event_name,
 				 tep_event_handler_func func, void *context);
-int tep_register_print_function(struct tep_handle *pevent,
+int tep_register_print_function(struct tep_handle *tep,
 				tep_func_handler func,
 				enum tep_func_arg_type ret_type,
 				char *name, ...);
-int tep_unregister_print_function(struct tep_handle *pevent,
+int tep_unregister_print_function(struct tep_handle *tep,
 				  tep_func_handler func, char *name);
 
 struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name);
 struct tep_format_field *tep_find_field(struct tep_event *event, const char *name);
 struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name);
 
-const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr);
+const char *tep_find_function(struct tep_handle *tep, unsigned long long addr);
 unsigned long long
-tep_find_function_address(struct tep_handle *pevent, unsigned long long addr);
-unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, int size);
+tep_find_function_address(struct tep_handle *tep, unsigned long long addr);
+unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size);
 int tep_read_number_field(struct tep_format_field *field, const void *data,
 			  unsigned long long *value);
 
 struct tep_event *tep_get_first_event(struct tep_handle *tep);
 int tep_get_events_count(struct tep_handle *tep);
-struct tep_event *tep_find_event(struct tep_handle *pevent, int id);
+struct tep_event *tep_find_event(struct tep_handle *tep, int id);
 
 struct tep_event *
-tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name);
+tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name);
 struct tep_event *
-tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record);
+tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record);
 
-void tep_data_latency_format(struct tep_handle *pevent,
+void tep_data_latency_format(struct tep_handle *tep,
 			     struct trace_seq *s, struct tep_record *record);
-int tep_data_type(struct tep_handle *pevent, struct tep_record *rec);
-int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec);
-int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec);
-int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec);
-const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid);
+int tep_data_type(struct tep_handle *tep, struct tep_record *rec);
+int tep_data_pid(struct tep_handle *tep, struct tep_record *rec);
+int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec);
+int tep_data_flags(struct tep_handle *tep, struct tep_record *rec);
+const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid);
 struct tep_cmdline;
-struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
+struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm,
 					   struct tep_cmdline *next);
-int tep_cmdline_pid(struct tep_handle *pevent, struct tep_cmdline *cmdline);
+int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline);
 
 void tep_print_field(struct trace_seq *s, void *data,
 		     struct tep_format_field *field);
@@ -543,11 +543,10 @@ void tep_print_fields(struct trace_seq *s, void *data,
 		      int size __maybe_unused, struct tep_event *event);
 void tep_event_info(struct trace_seq *s, struct tep_event *event,
 		    struct tep_record *record);
-int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum,
+int tep_strerror(struct tep_handle *tep, enum tep_errno errnum,
 		 char *buf, size_t buflen);
 
-struct tep_event **tep_list_events(struct tep_handle *tep,
-				   enum tep_event_sort_type);
+struct tep_event **tep_list_events(struct tep_handle *tep, enum tep_event_sort_type);
 struct tep_event **tep_list_events_copy(struct tep_handle *tep,
 					enum tep_event_sort_type);
 struct tep_format_field **tep_event_common_fields(struct tep_event *event);
@@ -557,28 +556,28 @@ enum tep_endian {
         TEP_LITTLE_ENDIAN = 0,
         TEP_BIG_ENDIAN
 };
-int tep_get_cpus(struct tep_handle *pevent);
-void tep_set_cpus(struct tep_handle *pevent, int cpus);
-int tep_get_long_size(struct tep_handle *pevent);
-void tep_set_long_size(struct tep_handle *pevent, int long_size);
-int tep_get_page_size(struct tep_handle *pevent);
-void tep_set_page_size(struct tep_handle *pevent, int _page_size);
-bool tep_is_file_bigendian(struct tep_handle *pevent);
-void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian);
-bool tep_is_local_bigendian(struct tep_handle *pevent);
-void tep_set_local_bigendian(struct tep_handle *pevent, enum tep_endian endian);
-bool tep_is_latency_format(struct tep_handle *pevent);
-void tep_set_latency_format(struct tep_handle *pevent, int lat);
-int tep_get_header_page_size(struct tep_handle *pevent);
+int tep_get_cpus(struct tep_handle *tep);
+void tep_set_cpus(struct tep_handle *tep, int cpus);
+int tep_get_long_size(struct tep_handle *tep);
+void tep_set_long_size(struct tep_handle *tep, int long_size);
+int tep_get_page_size(struct tep_handle *tep);
+void tep_set_page_size(struct tep_handle *tep, int _page_size);
+bool tep_is_file_bigendian(struct tep_handle *tep);
+void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian);
+bool tep_is_local_bigendian(struct tep_handle *tep);
+void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian);
+bool tep_is_latency_format(struct tep_handle *tep);
+void tep_set_latency_format(struct tep_handle *tep, int lat);
+int tep_get_header_page_size(struct tep_handle *tep);
 int tep_get_header_timestamp_size(struct tep_handle *tep);
 bool tep_is_old_format(struct tep_handle *tep);
 void tep_set_print_raw(struct tep_handle *tep, int print_raw);
 void tep_set_test_filters(struct tep_handle *tep, int test_filters);
 
 struct tep_handle *tep_alloc(void);
-void tep_free(struct tep_handle *pevent);
-void tep_ref(struct tep_handle *pevent);
-void tep_unref(struct tep_handle *pevent);
+void tep_free(struct tep_handle *tep);
+void tep_ref(struct tep_handle *tep);
+void tep_unref(struct tep_handle *tep);
 int tep_get_ref(struct tep_handle *tep);
 
 /* access to the internal parser */
@@ -590,8 +589,8 @@ const char *tep_get_input_buf(void);
 unsigned long long tep_get_input_buf_ptr(void);
 
 /* for debugging */
-void tep_print_funcs(struct tep_handle *pevent);
-void tep_print_printk(struct tep_handle *pevent);
+void tep_print_funcs(struct tep_handle *tep);
+void tep_print_printk(struct tep_handle *tep);
 
 /* ----------------------- filtering ----------------------- */
 
@@ -724,7 +723,7 @@ struct tep_event_filter {
 	char			error_buffer[TEP_FILTER_ERROR_BUFSZ];
 };
 
-struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent);
+struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep);
 
 /* for backward compatibility */
 #define FILTER_NONE		TEP_ERRNO__NO_FILTER
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index e74f16c88398f..d0488de0174ab 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -419,16 +419,16 @@ load_plugins(struct tep_handle *pevent, const char *suffix,
 }
 
 struct tep_plugin_list*
-tep_load_plugins(struct tep_handle *pevent)
+tep_load_plugins(struct tep_handle *tep)
 {
 	struct tep_plugin_list *list = NULL;
 
-	load_plugins(pevent, ".so", load_plugin, &list);
+	load_plugins(tep, ".so", load_plugin, &list);
 	return list;
 }
 
 void
-tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *pevent)
+tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep)
 {
 	tep_plugin_unload_func func;
 	struct tep_plugin_list *list;
@@ -438,7 +438,7 @@ tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *peven
 		plugin_list = list->next;
 		func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME);
 		if (func)
-			func(pevent);
+			func(tep);
 		dlclose(list->handle);
 		free(list->name);
 		free(list);
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 3320c0a0e343a..5d8f9d9bc253d 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -164,9 +164,9 @@ add_filter_type(struct tep_event_filter *filter, int id)
 
 /**
  * tep_filter_alloc - create a new event filter
- * @pevent: The pevent that this filter is associated with
+ * @tep: The tep that this filter is associated with
  */
-struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent)
+struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep)
 {
 	struct tep_event_filter *filter;
 
@@ -175,8 +175,8 @@ struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent)
 		return NULL;
 
 	memset(filter, 0, sizeof(*filter));
-	filter->pevent = pevent;
-	tep_ref(pevent);
+	filter->pevent = tep;
+	tep_ref(tep);
 
 	return filter;
 }
diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c
index a51b366f47dad..3d43b56a6c984 100644
--- a/tools/lib/traceevent/plugin_cfg80211.c
+++ b/tools/lib/traceevent/plugin_cfg80211.c
@@ -25,9 +25,9 @@ process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
 	return val ? (long long) le16toh(*val) : 0;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_print_function(pevent,
+	tep_register_print_function(tep,
 				    process___le16_to_cpup,
 				    TEP_FUNC_ARG_INT,
 				    "__le16_to_cpup",
@@ -36,8 +36,8 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_print_function(pevent, process___le16_to_cpup,
+	tep_unregister_print_function(tep, process___le16_to_cpup,
 				      "__le16_to_cpup");
 }
diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c
index a73eca34a8f96..e93f33bd705f8 100644
--- a/tools/lib/traceevent/plugin_function.c
+++ b/tools/lib/traceevent/plugin_function.c
@@ -164,9 +164,9 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
 	return 0;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_event_handler(pevent, -1, "ftrace", "function",
+	tep_register_event_handler(tep, -1, "ftrace", "function",
 				   function_handler, NULL);
 
 	tep_plugin_add_options("ftrace", plugin_options);
@@ -174,11 +174,11 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
 	int i, x;
 
-	tep_unregister_event_handler(pevent, -1, "ftrace", "function",
+	tep_unregister_event_handler(tep, -1, "ftrace", "function",
 				     function_handler, NULL);
 
 	for (i = 0; i <= cpus; i++) {
diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c
index 5db5e401275ff..bb434e0ed03ab 100644
--- a/tools/lib/traceevent/plugin_hrtimer.c
+++ b/tools/lib/traceevent/plugin_hrtimer.c
@@ -67,23 +67,23 @@ static int timer_start_handler(struct trace_seq *s,
 	return 0;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_event_handler(pevent, -1,
+	tep_register_event_handler(tep, -1,
 				   "timer", "hrtimer_expire_entry",
 				   timer_expire_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "timer", "hrtimer_start",
+	tep_register_event_handler(tep, -1, "timer", "hrtimer_start",
 				   timer_start_handler, NULL);
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_event_handler(pevent, -1,
+	tep_unregister_event_handler(tep, -1,
 				     "timer", "hrtimer_expire_entry",
 				     timer_expire_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "timer", "hrtimer_start",
+	tep_unregister_event_handler(tep, -1, "timer", "hrtimer_start",
 				     timer_start_handler, NULL);
 }
diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c
index a5e34135dd6a2..04fc125f38cb3 100644
--- a/tools/lib/traceevent/plugin_jbd2.c
+++ b/tools/lib/traceevent/plugin_jbd2.c
@@ -48,16 +48,16 @@ process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
 	return jiffies;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_print_function(pevent,
+	tep_register_print_function(tep,
 				    process_jbd2_dev_to_name,
 				    TEP_FUNC_ARG_STRING,
 				    "jbd2_dev_to_name",
 				    TEP_FUNC_ARG_INT,
 				    TEP_FUNC_ARG_VOID);
 
-	tep_register_print_function(pevent,
+	tep_register_print_function(tep,
 				    process_jiffies_to_msecs,
 				    TEP_FUNC_ARG_LONG,
 				    "jiffies_to_msecs",
@@ -66,11 +66,11 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_print_function(pevent, process_jbd2_dev_to_name,
+	tep_unregister_print_function(tep, process_jbd2_dev_to_name,
 				      "jbd2_dev_to_name");
 
-	tep_unregister_print_function(pevent, process_jiffies_to_msecs,
+	tep_unregister_print_function(tep, process_jiffies_to_msecs,
 				      "jiffies_to_msecs");
 }
diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c
index 0e3c601f9ed19..c09bafdd83775 100644
--- a/tools/lib/traceevent/plugin_kmem.c
+++ b/tools/lib/traceevent/plugin_kmem.c
@@ -49,47 +49,47 @@ static int call_site_handler(struct trace_seq *s, struct tep_record *record,
 	return 1;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_event_handler(pevent, -1, "kmem", "kfree",
+	tep_register_event_handler(tep, -1, "kmem", "kfree",
 				   call_site_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kmem", "kmalloc",
+	tep_register_event_handler(tep, -1, "kmem", "kmalloc",
 				   call_site_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kmem", "kmalloc_node",
+	tep_register_event_handler(tep, -1, "kmem", "kmalloc_node",
 				   call_site_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
+	tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
 				   call_site_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kmem",
+	tep_register_event_handler(tep, -1, "kmem",
 				   "kmem_cache_alloc_node",
 				   call_site_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_free",
+	tep_register_event_handler(tep, -1, "kmem", "kmem_cache_free",
 				   call_site_handler, NULL);
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_event_handler(pevent, -1, "kmem", "kfree",
+	tep_unregister_event_handler(tep, -1, "kmem", "kfree",
 				     call_site_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc",
+	tep_unregister_event_handler(tep, -1, "kmem", "kmalloc",
 				     call_site_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc_node",
+	tep_unregister_event_handler(tep, -1, "kmem", "kmalloc_node",
 				     call_site_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
+	tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
 				     call_site_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kmem",
+	tep_unregister_event_handler(tep, -1, "kmem",
 				     "kmem_cache_alloc_node",
 				     call_site_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_free",
+	tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_free",
 				     call_site_handler, NULL);
 }
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
index 688e5d97d7a73..472298c2b531e 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -445,40 +445,40 @@ process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
 	return pte & PT_WRITABLE_MASK;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
 	init_disassembler();
 
-	tep_register_event_handler(pevent, -1, "kvm", "kvm_exit",
+	tep_register_event_handler(tep, -1, "kvm", "kvm_exit",
 				   kvm_exit_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
+	tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
 				   kvm_emulate_insn_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
+	tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
 				   kvm_nested_vmexit_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
+	tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
 				   kvm_nested_vmexit_inject_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
+	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
 				   kvm_mmu_get_page_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
+	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
 				   kvm_mmu_print_role, NULL);
 
-	tep_register_event_handler(pevent, -1,
+	tep_register_event_handler(tep, -1,
 				   "kvmmmu", "kvm_mmu_unsync_page",
 				   kvm_mmu_print_role, NULL);
 
-	tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
+	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
 				   kvm_mmu_print_role, NULL);
 
-	tep_register_event_handler(pevent, -1, "kvmmmu",
+	tep_register_event_handler(tep, -1, "kvmmmu",
 			"kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
 			NULL);
 
-	tep_register_print_function(pevent,
+	tep_register_print_function(tep,
 				    process_is_writable_pte,
 				    TEP_FUNC_ARG_INT,
 				    "is_writable_pte",
@@ -487,37 +487,37 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_event_handler(pevent, -1, "kvm", "kvm_exit",
+	tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit",
 				     kvm_exit_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
+	tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
 				     kvm_emulate_insn_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
+	tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
 				     kvm_nested_vmexit_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
+	tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
 				     kvm_nested_vmexit_inject_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
+	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
 				     kvm_mmu_get_page_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
+	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
 				     kvm_mmu_print_role, NULL);
 
-	tep_unregister_event_handler(pevent, -1,
+	tep_unregister_event_handler(tep, -1,
 				     "kvmmmu", "kvm_mmu_unsync_page",
 				     kvm_mmu_print_role, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
+	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
 				     kvm_mmu_print_role, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "kvmmmu",
+	tep_unregister_event_handler(tep, -1, "kvmmmu",
 			"kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
 			NULL);
 
-	tep_unregister_print_function(pevent, process_is_writable_pte,
+	tep_unregister_print_function(tep, process_is_writable_pte,
 				      "is_writable_pte");
 }
diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c
index e38b9477aad22..884303c26b5cd 100644
--- a/tools/lib/traceevent/plugin_mac80211.c
+++ b/tools/lib/traceevent/plugin_mac80211.c
@@ -87,17 +87,17 @@ static int drv_bss_info_changed(struct trace_seq *s,
 	return 0;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_event_handler(pevent, -1, "mac80211",
+	tep_register_event_handler(tep, -1, "mac80211",
 				   "drv_bss_info_changed",
 				   drv_bss_info_changed, NULL);
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_event_handler(pevent, -1, "mac80211",
+	tep_unregister_event_handler(tep, -1, "mac80211",
 				     "drv_bss_info_changed",
 				     drv_bss_info_changed, NULL);
 }
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
index 834c9e378ff85..701c22913cf87 100644
--- a/tools/lib/traceevent/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -135,27 +135,27 @@ static int sched_switch_handler(struct trace_seq *s,
 	return 0;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_event_handler(pevent, -1, "sched", "sched_switch",
+	tep_register_event_handler(tep, -1, "sched", "sched_switch",
 				   sched_switch_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "sched", "sched_wakeup",
+	tep_register_event_handler(tep, -1, "sched", "sched_wakeup",
 				   sched_wakeup_handler, NULL);
 
-	tep_register_event_handler(pevent, -1, "sched", "sched_wakeup_new",
+	tep_register_event_handler(tep, -1, "sched", "sched_wakeup_new",
 				   sched_wakeup_handler, NULL);
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_event_handler(pevent, -1, "sched", "sched_switch",
+	tep_unregister_event_handler(tep, -1, "sched", "sched_switch",
 				     sched_switch_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup",
+	tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup",
 				     sched_wakeup_handler, NULL);
 
-	tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup_new",
+	tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup_new",
 				     sched_wakeup_handler, NULL);
 }
diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c
index 4eba25cc14318..5d0387a4b65a0 100644
--- a/tools/lib/traceevent/plugin_scsi.c
+++ b/tools/lib/traceevent/plugin_scsi.c
@@ -414,9 +414,9 @@ unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
 	return 0;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_print_function(pevent,
+	tep_register_print_function(tep,
 				    process_scsi_trace_parse_cdb,
 				    TEP_FUNC_ARG_STRING,
 				    "scsi_trace_parse_cdb",
@@ -427,8 +427,8 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_print_function(pevent, process_scsi_trace_parse_cdb,
+	tep_unregister_print_function(tep, process_scsi_trace_parse_cdb,
 				      "scsi_trace_parse_cdb");
 }
diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c
index bc0496e4c296f..993b208d0323f 100644
--- a/tools/lib/traceevent/plugin_xen.c
+++ b/tools/lib/traceevent/plugin_xen.c
@@ -120,9 +120,9 @@ unsigned long long process_xen_hypercall_name(struct trace_seq *s,
 	return 0;
 }
 
-int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
 {
-	tep_register_print_function(pevent,
+	tep_register_print_function(tep,
 				    process_xen_hypercall_name,
 				    TEP_FUNC_ARG_STRING,
 				    "xen_hypercall_name",
@@ -131,8 +131,8 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
 	return 0;
 }
 
-void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
 {
-	tep_unregister_print_function(pevent, process_xen_hypercall_name,
+	tep_unregister_print_function(tep, process_xen_hypercall_name,
 				      "xen_hypercall_name");
 }
-- 
GitLab


From 69769ce159cbfd7567815a86cfc3ea63423de61b Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:18 -0400
Subject: [PATCH 0428/1861] perf tools, tools lib traceevent: Rename "pevent"
 member of struct tep_event to "tep"

The member "pevent" of the struct tep_event is renamed to "tep". This
makes the struct consistent with the chosen naming convention:

  tep (trace event parser), instead of the old pevent.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/linux-trace-devel/20190401132111.13727-3-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164344.627724996@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c            | 32 +++++++++----------
 tools/lib/traceevent/event-parse.h            |  2 +-
 tools/lib/traceevent/parse-filter.c           |  6 ++--
 tools/lib/traceevent/plugin_function.c        |  2 +-
 tools/lib/traceevent/plugin_kmem.c            |  4 +--
 tools/lib/traceevent/plugin_kvm.c             |  4 +--
 tools/lib/traceevent/plugin_sched_switch.c    |  2 +-
 tools/perf/builtin-kmem.c                     |  2 +-
 tools/perf/util/data-convert-bt.c             |  4 +--
 tools/perf/util/python.c                      |  2 +-
 .../util/scripting-engines/trace-event-perl.c |  2 +-
 .../scripting-engines/trace-event-python.c    |  2 +-
 tools/perf/util/trace-event-parse.c           |  2 +-
 13 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ceefa9b3d7265..70ea638156f2e 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -804,7 +804,7 @@ static int add_event(struct tep_handle *pevent, struct tep_event *event)
 	pevent->events[i] = event;
 	pevent->nr_events++;
 
-	event->pevent = pevent;
+	event->tep = pevent;
 
 	return 0;
 }
@@ -1656,8 +1656,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
 			else if (field->flags & TEP_FIELD_IS_STRING)
 				field->elementsize = 1;
 			else if (field->flags & TEP_FIELD_IS_LONG)
-				field->elementsize = event->pevent ?
-						     event->pevent->long_size :
+				field->elementsize = event->tep ?
+						     event->tep->long_size :
 						     sizeof(long);
 		} else
 			field->elementsize = field->size;
@@ -3075,7 +3075,7 @@ process_function(struct tep_event *event, struct tep_print_arg *arg,
 		return process_dynamic_array_len(event, arg, tok);
 	}
 
-	func = find_func_handler(event->pevent, token);
+	func = find_func_handler(event->tep, token);
 	if (func) {
 		free_token(token);
 		return process_func_handler(event, func, arg, tok);
@@ -3405,7 +3405,7 @@ int tep_read_number_field(struct tep_format_field *field, const void *data,
 	case 2:
 	case 4:
 	case 8:
-		*value = tep_read_number(field->event->pevent,
+		*value = tep_read_number(field->event->tep,
 					 data + field->offset, field->size);
 		return 0;
 	default:
@@ -3566,7 +3566,7 @@ tep_find_event_by_name(struct tep_handle *tep,
 static unsigned long long
 eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg)
 {
-	struct tep_handle *pevent = event->pevent;
+	struct tep_handle *pevent = event->tep;
 	unsigned long long val = 0;
 	unsigned long long left, right;
 	struct tep_print_arg *typearg = NULL;
@@ -3907,7 +3907,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			  struct tep_event *event, const char *format,
 			  int len_arg, struct tep_print_arg *arg)
 {
-	struct tep_handle *pevent = event->pevent;
+	struct tep_handle *pevent = event->tep;
 	struct tep_print_flag_sym *flag;
 	struct tep_format_field *field;
 	struct printk_map *printk;
@@ -4256,7 +4256,7 @@ static void free_args(struct tep_print_arg *args)
 
 static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event)
 {
-	struct tep_handle *pevent = event->pevent;
+	struct tep_handle *pevent = event->tep;
 	struct tep_format_field *field, *ip_field;
 	struct tep_print_arg *args, *arg, **next;
 	unsigned long long ip, val;
@@ -4434,7 +4434,7 @@ static char *
 get_bprint_format(void *data, int size __maybe_unused,
 		  struct tep_event *event)
 {
-	struct tep_handle *pevent = event->pevent;
+	struct tep_handle *pevent = event->tep;
 	unsigned long long addr;
 	struct tep_format_field *field;
 	struct printk_map *printk;
@@ -4835,7 +4835,7 @@ void tep_print_field(struct trace_seq *s, void *data,
 {
 	unsigned long long val;
 	unsigned int offset, len, i;
-	struct tep_handle *pevent = field->event->pevent;
+	struct tep_handle *pevent = field->event->tep;
 
 	if (field->flags & TEP_FIELD_IS_ARRAY) {
 		offset = field->offset;
@@ -4910,7 +4910,7 @@ void tep_print_fields(struct trace_seq *s, void *data,
 
 static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
 {
-	struct tep_handle *pevent = event->pevent;
+	struct tep_handle *pevent = event->tep;
 	struct tep_print_fmt *print_fmt = &event->print_fmt;
 	struct tep_print_arg *arg = print_fmt->args;
 	struct tep_print_arg *args = NULL;
@@ -5424,7 +5424,7 @@ void tep_event_info(struct trace_seq *s, struct tep_event *event,
 {
 	int print_pretty = 1;
 
-	if (event->pevent->print_raw || (event->flags & TEP_EVENT_FL_PRINTRAW))
+	if (event->tep->print_raw || (event->flags & TEP_EVENT_FL_PRINTRAW))
 		tep_print_fields(s, record->data, record->size, event);
 	else {
 
@@ -6163,7 +6163,7 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp,
 	}
 
 	/* Add pevent to event so that it can be referenced */
-	event->pevent = pevent;
+	event->tep = pevent;
 
 	ret = event_read_format(event);
 	if (ret < 0) {
@@ -6357,8 +6357,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
 
 	offset = field->offset;
 	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
-		offset = tep_read_number(event->pevent,
-					    data + offset, field->size);
+		offset = tep_read_number(event->tep,
+					 data + offset, field->size);
 		*len = offset >> 16;
 		offset &= 0xffff;
 	} else
@@ -6492,7 +6492,7 @@ int tep_print_func_field(struct trace_seq *s, const char *fmt,
 			 struct tep_record *record, int err)
 {
 	struct tep_format_field *field = tep_find_field(event, name);
-	struct tep_handle *pevent = event->pevent;
+	struct tep_handle *pevent = event->tep;
 	unsigned long long val;
 	struct func_map *func;
 	char tmp[128];
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 2720a30ef86ba..de7ace173e756 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -278,7 +278,7 @@ struct tep_print_fmt {
 };
 
 struct tep_event {
-	struct tep_handle	*pevent;
+	struct tep_handle	*tep;
 	char			*name;
 	int			id;
 	int			flags;
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 5d8f9d9bc253d..1233582092977 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1528,8 +1528,8 @@ get_comm(struct tep_event *event, struct tep_record *record)
 	const char *comm;
 	int pid;
 
-	pid = tep_data_pid(event->pevent, record);
-	comm = tep_data_comm_from_pid(event->pevent, pid);
+	pid = tep_data_pid(event->tep, record);
+	comm = tep_data_comm_from_pid(event->tep, pid);
 	return comm;
 }
 
@@ -1727,7 +1727,7 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *
 
 	} else {
 		event = arg->str.field->event;
-		pevent = event->pevent;
+		pevent = event->tep;
 		addr = get_value(event, arg->str.field, record);
 
 		if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG))
diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c
index e93f33bd705f8..d6a8df0606141 100644
--- a/tools/lib/traceevent/plugin_function.c
+++ b/tools/lib/traceevent/plugin_function.c
@@ -126,7 +126,7 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
 static int function_handler(struct trace_seq *s, struct tep_record *record,
 			    struct tep_event *event, void *context)
 {
-	struct tep_handle *pevent = event->pevent;
+	struct tep_handle *pevent = event->tep;
 	unsigned long long function;
 	unsigned long long pfunction;
 	const char *func;
diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c
index c09bafdd83775..edaec5d962c3d 100644
--- a/tools/lib/traceevent/plugin_kmem.c
+++ b/tools/lib/traceevent/plugin_kmem.c
@@ -39,11 +39,11 @@ static int call_site_handler(struct trace_seq *s, struct tep_record *record,
 	if (tep_read_number_field(field, data, &val))
 		return 1;
 
-	func = tep_find_function(event->pevent, val);
+	func = tep_find_function(event->tep, val);
 	if (!func)
 		return 1;
 
-	addr = tep_find_function_address(event->pevent, val);
+	addr = tep_find_function_address(event->tep, val);
 
 	trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
 	return 1;
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
index 472298c2b531e..c8e623065a7e4 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -389,8 +389,8 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
 	 * We can only use the structure if file is of the same
 	 * endianness.
 	 */
-	if (tep_is_file_bigendian(event->pevent) ==
-	    tep_is_local_bigendian(event->pevent)) {
+	if (tep_is_file_bigendian(event->tep) ==
+	    tep_is_local_bigendian(event->tep)) {
 
 		trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
 				 role.level,
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
index 701c22913cf87..957389a0ff7ad 100644
--- a/tools/lib/traceevent/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -62,7 +62,7 @@ static void write_and_save_comm(struct tep_format_field *field,
 	comm = &s->buffer[len];
 
 	/* Help out the comm to ids. This will handle dups */
-	tep_register_comm(field->event->pevent, comm, pid);
+	tep_register_comm(field->event->tep, comm, pid);
 }
 
 static int sched_wakeup_handler(struct trace_seq *s,
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fa520f4b8095a..b80eee4551111 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1975,7 +1975,7 @@ int cmd_kmem(int argc, const char **argv)
 			goto out_delete;
 		}
 
-		kmem_page_size = tep_get_page_size(evsel->tp_format->pevent);
+		kmem_page_size = tep_get_page_size(evsel->tp_format->tep);
 		symbol_conf.use_callchain = true;
 	}
 
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 26af43ad9ddd3..e0311c9750ad5 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -310,7 +310,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 	if (flags & TEP_FIELD_IS_DYNAMIC) {
 		unsigned long long tmp_val;
 
-		tmp_val = tep_read_number(fmtf->event->pevent,
+		tmp_val = tep_read_number(fmtf->event->tep,
 					  data + offset, len);
 		offset = tmp_val;
 		len = offset >> 16;
@@ -354,7 +354,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 			unsigned long long value_int;
 
 			value_int = tep_read_number(
-					fmtf->event->pevent,
+					fmtf->event->tep,
 					data + offset + i * len, len);
 
 			if (!(flags & TEP_FIELD_IS_SIGNED))
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index dda0ac978b1eb..6aa7e2352e16e 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -342,7 +342,7 @@ static bool is_tracepoint(struct pyrf_event *pevent)
 static PyObject*
 tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
 {
-	struct tep_handle *pevent = field->event->pevent;
+	struct tep_handle *pevent = field->event->tep;
 	void *data = pe->sample.raw_data;
 	PyObject *ret = NULL;
 	unsigned long long val;
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 5f06378a482b8..61aa7f3df915b 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -372,7 +372,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 	ns = nsecs - s * NSEC_PER_SEC;
 
 	scripting_context->event_data = data;
-	scripting_context->pevent = evsel->tp_format->pevent;
+	scripting_context->pevent = evsel->tp_format->tep;
 
 	ENTER;
 	SAVETMPS;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 09604c6508f04..22f52b6698719 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -837,7 +837,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
 	ns = nsecs - s * NSEC_PER_SEC;
 
 	scripting_context->event_data = data;
-	scripting_context->pevent = evsel->tp_format->pevent;
+	scripting_context->pevent = evsel->tp_format->tep;
 
 	context = _PyCapsule_New(scripting_context, NULL, NULL);
 
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index ad74be1f0e420..863955e4094e2 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -111,7 +111,7 @@ raw_field_value(struct tep_event *event, const char *name, void *data)
 
 unsigned long long read_size(struct tep_event *event, void *ptr, int size)
 {
-	return tep_read_number(event->pevent, ptr, size);
+	return tep_read_number(event->tep, ptr, size);
 }
 
 void event_format__fprintf(struct tep_event *event,
-- 
GitLab


From 6b1f4c426a60387f1291d2ba3838c7b0914be12f Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:19 -0400
Subject: [PATCH 0429/1861] perf tools, tools lib traceevent: Rename "pevent"
 member of struct tep_event_filter to "tep"

The member "pevent" of the struct tep_event_filter is renamed to "tep".
This makes the struct consistent with the chosen naming convention:

  tep (trace event parser), instead of the old pevent.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/linux-trace-devel/20190401132111.13727-4-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164344.785896189@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.h  |  2 +-
 tools/lib/traceevent/parse-filter.c | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index de7ace173e756..642f68ab5fb2b 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -717,7 +717,7 @@ struct tep_filter_type {
 #define TEP_FILTER_ERROR_BUFSZ  1024
 
 struct tep_event_filter {
-	struct tep_handle	*pevent;
+	struct tep_handle	*tep;
 	int			filters;
 	struct tep_filter_type	*event_filters;
 	char			error_buffer[TEP_FILTER_ERROR_BUFSZ];
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 1233582092977..6a4d5d1dd1cd5 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -154,7 +154,7 @@ add_filter_type(struct tep_event_filter *filter, int id)
 
 	filter_type = &filter->event_filters[i];
 	filter_type->event_id = id;
-	filter_type->event = tep_find_event(filter->pevent, id);
+	filter_type->event = tep_find_event(filter->tep, id);
 	filter_type->filter = NULL;
 
 	filter->filters++;
@@ -175,7 +175,7 @@ struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep)
 		return NULL;
 
 	memset(filter, 0, sizeof(*filter));
-	filter->pevent = tep;
+	filter->tep = tep;
 	tep_ref(tep);
 
 	return filter;
@@ -1257,7 +1257,7 @@ static void filter_init_error_buf(struct tep_event_filter *filter)
 enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
 					 const char *filter_str)
 {
-	struct tep_handle *pevent = filter->pevent;
+	struct tep_handle *pevent = filter->tep;
 	struct event_list *event;
 	struct event_list *events = NULL;
 	const char *filter_start;
@@ -1377,7 +1377,7 @@ int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
 		return 0;
 	}
 
-	return tep_strerror(filter->pevent, err, buf, buflen);
+	return tep_strerror(filter->tep, err, buf, buflen);
 }
 
 /**
@@ -1440,7 +1440,7 @@ void tep_filter_reset(struct tep_event_filter *filter)
 
 void tep_filter_free(struct tep_event_filter *filter)
 {
-	tep_unref(filter->pevent);
+	tep_unref(filter->tep);
 
 	tep_filter_reset(filter);
 
@@ -1462,7 +1462,7 @@ static int copy_filter_type(struct tep_event_filter *filter,
 	/* Can't assume that the pevent's are the same */
 	sys = filter_type->event->system;
 	name = filter_type->event->name;
-	event = tep_find_event_by_name(filter->pevent, sys, name);
+	event = tep_find_event_by_name(filter->tep, sys, name);
 	if (!event)
 		return -1;
 
@@ -1872,7 +1872,7 @@ int tep_event_filtered(struct tep_event_filter *filter, int event_id)
 enum tep_errno tep_filter_match(struct tep_event_filter *filter,
 				struct tep_record *record)
 {
-	struct tep_handle *pevent = filter->pevent;
+	struct tep_handle *pevent = filter->tep;
 	struct tep_filter_type *filter_type;
 	int event_id;
 	int ret;
-- 
GitLab


From c9bd7796959a8b92afe79c392dd54992bfc67328 Mon Sep 17 00:00:00 2001
From: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Date: Mon, 1 Apr 2019 12:43:20 -0400
Subject: [PATCH 0430/1861] tools lib traceevent: Rename input arguments and
 local variables of libtraceevent from pevent to tep

"pevent" to "tep" renaming of:
 - all "pevent" input arguments of libtraceevent internal functions.
 - all local "pevent" variables of libtraceevent.

This makes the implementation consistent with the chosen naming
convention, tep (trace event parser), and will avoid any confusion with
the old pevent name

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/linux-trace-devel/20190401132111.13727-5-tstoyanov@vmware.com
Link: http://lkml.kernel.org/r/20190401164344.944953447@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c     | 306 ++++++++++++-------------
 tools/lib/traceevent/event-plugin.c    |  24 +-
 tools/lib/traceevent/parse-filter.c    |  24 +-
 tools/lib/traceevent/plugin_function.c |   6 +-
 4 files changed, 180 insertions(+), 180 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 70ea638156f2e..7d4faaecd243e 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -148,14 +148,14 @@ struct cmdline_list {
 	int			pid;
 };
 
-static int cmdline_init(struct tep_handle *pevent)
+static int cmdline_init(struct tep_handle *tep)
 {
-	struct cmdline_list *cmdlist = pevent->cmdlist;
+	struct cmdline_list *cmdlist = tep->cmdlist;
 	struct cmdline_list *item;
 	struct tep_cmdline *cmdlines;
 	int i;
 
-	cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count);
+	cmdlines = malloc(sizeof(*cmdlines) * tep->cmdline_count);
 	if (!cmdlines)
 		return -1;
 
@@ -169,15 +169,15 @@ static int cmdline_init(struct tep_handle *pevent)
 		free(item);
 	}
 
-	qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+	qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
 
-	pevent->cmdlines = cmdlines;
-	pevent->cmdlist = NULL;
+	tep->cmdlines = cmdlines;
+	tep->cmdlist = NULL;
 
 	return 0;
 }
 
-static const char *find_cmdline(struct tep_handle *pevent, int pid)
+static const char *find_cmdline(struct tep_handle *tep, int pid)
 {
 	const struct tep_cmdline *comm;
 	struct tep_cmdline key;
@@ -185,13 +185,13 @@ static const char *find_cmdline(struct tep_handle *pevent, int pid)
 	if (!pid)
 		return "<idle>";
 
-	if (!pevent->cmdlines && cmdline_init(pevent))
+	if (!tep->cmdlines && cmdline_init(tep))
 		return "<not enough memory for cmdlines!>";
 
 	key.pid = pid;
 
-	comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
-		       sizeof(*pevent->cmdlines), cmdline_cmp);
+	comm = bsearch(&key, tep->cmdlines, tep->cmdline_count,
+		       sizeof(*tep->cmdlines), cmdline_cmp);
 
 	if (comm)
 		return comm->comm;
@@ -232,10 +232,10 @@ bool tep_is_pid_registered(struct tep_handle *tep, int pid)
  * we must add this pid. This is much slower than when cmdlines
  * are added before the array is initialized.
  */
-static int add_new_comm(struct tep_handle *pevent,
+static int add_new_comm(struct tep_handle *tep,
 			const char *comm, int pid, bool override)
 {
-	struct tep_cmdline *cmdlines = pevent->cmdlines;
+	struct tep_cmdline *cmdlines = tep->cmdlines;
 	struct tep_cmdline *cmdline;
 	struct tep_cmdline key;
 	char *new_comm;
@@ -246,8 +246,8 @@ static int add_new_comm(struct tep_handle *pevent,
 	/* avoid duplicates */
 	key.pid = pid;
 
-	cmdline = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
-		       sizeof(*pevent->cmdlines), cmdline_cmp);
+	cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count,
+			  sizeof(*tep->cmdlines), cmdline_cmp);
 	if (cmdline) {
 		if (!override) {
 			errno = EEXIST;
@@ -264,26 +264,26 @@ static int add_new_comm(struct tep_handle *pevent,
 		return 0;
 	}
 
-	cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (pevent->cmdline_count + 1));
+	cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (tep->cmdline_count + 1));
 	if (!cmdlines) {
 		errno = ENOMEM;
 		return -1;
 	}
 
-	cmdlines[pevent->cmdline_count].comm = strdup(comm);
-	if (!cmdlines[pevent->cmdline_count].comm) {
+	cmdlines[tep->cmdline_count].comm = strdup(comm);
+	if (!cmdlines[tep->cmdline_count].comm) {
 		free(cmdlines);
 		errno = ENOMEM;
 		return -1;
 	}
 
-	cmdlines[pevent->cmdline_count].pid = pid;
+	cmdlines[tep->cmdline_count].pid = pid;
 		
-	if (cmdlines[pevent->cmdline_count].comm)
-		pevent->cmdline_count++;
+	if (cmdlines[tep->cmdline_count].comm)
+		tep->cmdline_count++;
 
-	qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
-	pevent->cmdlines = cmdlines;
+	qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+	tep->cmdlines = cmdlines;
 
 	return 0;
 }
@@ -408,18 +408,18 @@ static int func_bcmp(const void *a, const void *b)
 	return 1;
 }
 
-static int func_map_init(struct tep_handle *pevent)
+static int func_map_init(struct tep_handle *tep)
 {
 	struct func_list *funclist;
 	struct func_list *item;
 	struct func_map *func_map;
 	int i;
 
-	func_map = malloc(sizeof(*func_map) * (pevent->func_count + 1));
+	func_map = malloc(sizeof(*func_map) * (tep->func_count + 1));
 	if (!func_map)
 		return -1;
 
-	funclist = pevent->funclist;
+	funclist = tep->funclist;
 
 	i = 0;
 	while (funclist) {
@@ -432,34 +432,34 @@ static int func_map_init(struct tep_handle *pevent)
 		free(item);
 	}
 
-	qsort(func_map, pevent->func_count, sizeof(*func_map), func_cmp);
+	qsort(func_map, tep->func_count, sizeof(*func_map), func_cmp);
 
 	/*
 	 * Add a special record at the end.
 	 */
-	func_map[pevent->func_count].func = NULL;
-	func_map[pevent->func_count].addr = 0;
-	func_map[pevent->func_count].mod = NULL;
+	func_map[tep->func_count].func = NULL;
+	func_map[tep->func_count].addr = 0;
+	func_map[tep->func_count].mod = NULL;
 
-	pevent->func_map = func_map;
-	pevent->funclist = NULL;
+	tep->func_map = func_map;
+	tep->funclist = NULL;
 
 	return 0;
 }
 
 static struct func_map *
-__find_func(struct tep_handle *pevent, unsigned long long addr)
+__find_func(struct tep_handle *tep, unsigned long long addr)
 {
 	struct func_map *func;
 	struct func_map key;
 
-	if (!pevent->func_map)
-		func_map_init(pevent);
+	if (!tep->func_map)
+		func_map_init(tep);
 
 	key.addr = addr;
 
-	func = bsearch(&key, pevent->func_map, pevent->func_count,
-		       sizeof(*pevent->func_map), func_bcmp);
+	func = bsearch(&key, tep->func_map, tep->func_count,
+		       sizeof(*tep->func_map), func_bcmp);
 
 	return func;
 }
@@ -510,18 +510,18 @@ void tep_reset_function_resolver(struct tep_handle *tep)
 }
 
 static struct func_map *
-find_func(struct tep_handle *pevent, unsigned long long addr)
+find_func(struct tep_handle *tep, unsigned long long addr)
 {
 	struct func_map *map;
 
-	if (!pevent->func_resolver)
-		return __find_func(pevent, addr);
+	if (!tep->func_resolver)
+		return __find_func(tep, addr);
 
-	map = &pevent->func_resolver->map;
+	map = &tep->func_resolver->map;
 	map->mod  = NULL;
 	map->addr = addr;
-	map->func = pevent->func_resolver->func(pevent->func_resolver->priv,
-						&map->addr, &map->mod);
+	map->func = tep->func_resolver->func(tep->func_resolver->priv,
+					     &map->addr, &map->mod);
 	if (map->func == NULL)
 		return NULL;
 
@@ -662,18 +662,18 @@ static int printk_cmp(const void *a, const void *b)
 	return 0;
 }
 
-static int printk_map_init(struct tep_handle *pevent)
+static int printk_map_init(struct tep_handle *tep)
 {
 	struct printk_list *printklist;
 	struct printk_list *item;
 	struct printk_map *printk_map;
 	int i;
 
-	printk_map = malloc(sizeof(*printk_map) * (pevent->printk_count + 1));
+	printk_map = malloc(sizeof(*printk_map) * (tep->printk_count + 1));
 	if (!printk_map)
 		return -1;
 
-	printklist = pevent->printklist;
+	printklist = tep->printklist;
 
 	i = 0;
 	while (printklist) {
@@ -685,27 +685,27 @@ static int printk_map_init(struct tep_handle *pevent)
 		free(item);
 	}
 
-	qsort(printk_map, pevent->printk_count, sizeof(*printk_map), printk_cmp);
+	qsort(printk_map, tep->printk_count, sizeof(*printk_map), printk_cmp);
 
-	pevent->printk_map = printk_map;
-	pevent->printklist = NULL;
+	tep->printk_map = printk_map;
+	tep->printklist = NULL;
 
 	return 0;
 }
 
 static struct printk_map *
-find_printk(struct tep_handle *pevent, unsigned long long addr)
+find_printk(struct tep_handle *tep, unsigned long long addr)
 {
 	struct printk_map *printk;
 	struct printk_map key;
 
-	if (!pevent->printk_map && printk_map_init(pevent))
+	if (!tep->printk_map && printk_map_init(tep))
 		return NULL;
 
 	key.addr = addr;
 
-	printk = bsearch(&key, pevent->printk_map, pevent->printk_count,
-			 sizeof(*pevent->printk_map), printk_cmp);
+	printk = bsearch(&key, tep->printk_map, tep->printk_count,
+			 sizeof(*tep->printk_map), printk_cmp);
 
 	return printk;
 }
@@ -782,29 +782,29 @@ static struct tep_event *alloc_event(void)
 	return calloc(1, sizeof(struct tep_event));
 }
 
-static int add_event(struct tep_handle *pevent, struct tep_event *event)
+static int add_event(struct tep_handle *tep, struct tep_event *event)
 {
 	int i;
-	struct tep_event **events = realloc(pevent->events, sizeof(event) *
-					    (pevent->nr_events + 1));
+	struct tep_event **events = realloc(tep->events, sizeof(event) *
+					    (tep->nr_events + 1));
 	if (!events)
 		return -1;
 
-	pevent->events = events;
+	tep->events = events;
 
-	for (i = 0; i < pevent->nr_events; i++) {
-		if (pevent->events[i]->id > event->id)
+	for (i = 0; i < tep->nr_events; i++) {
+		if (tep->events[i]->id > event->id)
 			break;
 	}
-	if (i < pevent->nr_events)
-		memmove(&pevent->events[i + 1],
-			&pevent->events[i],
-			sizeof(event) * (pevent->nr_events - i));
+	if (i < tep->nr_events)
+		memmove(&tep->events[i + 1],
+			&tep->events[i],
+			sizeof(event) * (tep->nr_events - i));
 
-	pevent->events[i] = event;
-	pevent->nr_events++;
+	tep->events[i] = event;
+	tep->nr_events++;
 
-	event->tep = pevent;
+	event->tep = tep;
 
 	return 0;
 }
@@ -2941,14 +2941,14 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar
 }
 
 static struct tep_function_handler *
-find_func_handler(struct tep_handle *pevent, char *func_name)
+find_func_handler(struct tep_handle *tep, char *func_name)
 {
 	struct tep_function_handler *func;
 
-	if (!pevent)
+	if (!tep)
 		return NULL;
 
-	for (func = pevent->func_handlers; func; func = func->next) {
+	for (func = tep->func_handlers; func; func = func->next) {
 		if (strcmp(func->name, func_name) == 0)
 			break;
 	}
@@ -2956,12 +2956,12 @@ find_func_handler(struct tep_handle *pevent, char *func_name)
 	return func;
 }
 
-static void remove_func_handler(struct tep_handle *pevent, char *func_name)
+static void remove_func_handler(struct tep_handle *tep, char *func_name)
 {
 	struct tep_function_handler *func;
 	struct tep_function_handler **next;
 
-	next = &pevent->func_handlers;
+	next = &tep->func_handlers;
 	while ((func = *next)) {
 		if (strcmp(func->name, func_name) == 0) {
 			*next = func->next;
@@ -3413,7 +3413,7 @@ int tep_read_number_field(struct tep_format_field *field, const void *data,
 	}
 }
 
-static int get_common_info(struct tep_handle *pevent,
+static int get_common_info(struct tep_handle *tep,
 			   const char *type, int *offset, int *size)
 {
 	struct tep_event *event;
@@ -3423,12 +3423,12 @@ static int get_common_info(struct tep_handle *pevent,
 	 * All events should have the same common elements.
 	 * Pick any event to find where the type is;
 	 */
-	if (!pevent->events) {
+	if (!tep->events) {
 		do_warning("no event_list!");
 		return -1;
 	}
 
-	event = pevent->events[0];
+	event = tep->events[0];
 	field = tep_find_common_field(event, type);
 	if (!field)
 		return -1;
@@ -3439,58 +3439,58 @@ static int get_common_info(struct tep_handle *pevent,
 	return 0;
 }
 
-static int __parse_common(struct tep_handle *pevent, void *data,
+static int __parse_common(struct tep_handle *tep, void *data,
 			  int *size, int *offset, const char *name)
 {
 	int ret;
 
 	if (!*size) {
-		ret = get_common_info(pevent, name, offset, size);
+		ret = get_common_info(tep, name, offset, size);
 		if (ret < 0)
 			return ret;
 	}
-	return tep_read_number(pevent, data + *offset, *size);
+	return tep_read_number(tep, data + *offset, *size);
 }
 
-static int trace_parse_common_type(struct tep_handle *pevent, void *data)
+static int trace_parse_common_type(struct tep_handle *tep, void *data)
 {
-	return __parse_common(pevent, data,
-			      &pevent->type_size, &pevent->type_offset,
+	return __parse_common(tep, data,
+			      &tep->type_size, &tep->type_offset,
 			      "common_type");
 }
 
-static int parse_common_pid(struct tep_handle *pevent, void *data)
+static int parse_common_pid(struct tep_handle *tep, void *data)
 {
-	return __parse_common(pevent, data,
-			      &pevent->pid_size, &pevent->pid_offset,
+	return __parse_common(tep, data,
+			      &tep->pid_size, &tep->pid_offset,
 			      "common_pid");
 }
 
-static int parse_common_pc(struct tep_handle *pevent, void *data)
+static int parse_common_pc(struct tep_handle *tep, void *data)
 {
-	return __parse_common(pevent, data,
-			      &pevent->pc_size, &pevent->pc_offset,
+	return __parse_common(tep, data,
+			      &tep->pc_size, &tep->pc_offset,
 			      "common_preempt_count");
 }
 
-static int parse_common_flags(struct tep_handle *pevent, void *data)
+static int parse_common_flags(struct tep_handle *tep, void *data)
 {
-	return __parse_common(pevent, data,
-			      &pevent->flags_size, &pevent->flags_offset,
+	return __parse_common(tep, data,
+			      &tep->flags_size, &tep->flags_offset,
 			      "common_flags");
 }
 
-static int parse_common_lock_depth(struct tep_handle *pevent, void *data)
+static int parse_common_lock_depth(struct tep_handle *tep, void *data)
 {
-	return __parse_common(pevent, data,
-			      &pevent->ld_size, &pevent->ld_offset,
+	return __parse_common(tep, data,
+			      &tep->ld_size, &tep->ld_offset,
 			      "common_lock_depth");
 }
 
-static int parse_common_migrate_disable(struct tep_handle *pevent, void *data)
+static int parse_common_migrate_disable(struct tep_handle *tep, void *data)
 {
-	return __parse_common(pevent, data,
-			      &pevent->ld_size, &pevent->ld_offset,
+	return __parse_common(tep, data,
+			      &tep->ld_size, &tep->ld_offset,
 			      "common_migrate_disable");
 }
 
@@ -3566,7 +3566,7 @@ tep_find_event_by_name(struct tep_handle *tep,
 static unsigned long long
 eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg)
 {
-	struct tep_handle *pevent = event->tep;
+	struct tep_handle *tep = event->tep;
 	unsigned long long val = 0;
 	unsigned long long left, right;
 	struct tep_print_arg *typearg = NULL;
@@ -3588,7 +3588,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg
 			
 		}
 		/* must be a number */
-		val = tep_read_number(pevent, data + arg->field.field->offset,
+		val = tep_read_number(tep, data + arg->field.field->offset,
 				      arg->field.field->size);
 		break;
 	case TEP_PRINT_FLAGS:
@@ -3628,11 +3628,11 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg
 			}
 
 			/* Default to long size */
-			field_size = pevent->long_size;
+			field_size = tep->long_size;
 
 			switch (larg->type) {
 			case TEP_PRINT_DYNAMIC_ARRAY:
-				offset = tep_read_number(pevent,
+				offset = tep_read_number(tep,
 						   data + larg->dynarray.field->offset,
 						   larg->dynarray.field->size);
 				if (larg->dynarray.field->elementsize)
@@ -3661,7 +3661,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg
 			default:
 				goto default_op; /* oops, all bets off */
 			}
-			val = tep_read_number(pevent,
+			val = tep_read_number(tep,
 					      data + offset, field_size);
 			if (typearg)
 				val = eval_type(val, typearg, 1);
@@ -3762,7 +3762,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg
 		}
 		break;
 	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
-		offset = tep_read_number(pevent,
+		offset = tep_read_number(tep,
 					 data + arg->dynarray.field->offset,
 					 arg->dynarray.field->size);
 		/*
@@ -3774,7 +3774,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg
 		break;
 	case TEP_PRINT_DYNAMIC_ARRAY:
 		/* Without [], we pass the address to the dynamic data */
-		offset = tep_read_number(pevent,
+		offset = tep_read_number(tep,
 					 data + arg->dynarray.field->offset,
 					 arg->dynarray.field->size);
 		/*
@@ -3849,7 +3849,7 @@ static void print_str_to_seq(struct trace_seq *s, const char *format,
 		trace_seq_printf(s, format, str);
 }
 
-static void print_bitmask_to_seq(struct tep_handle *pevent,
+static void print_bitmask_to_seq(struct tep_handle *tep,
 				 struct trace_seq *s, const char *format,
 				 int len_arg, const void *data, int size)
 {
@@ -3881,7 +3881,7 @@ static void print_bitmask_to_seq(struct tep_handle *pevent,
 		 * In the kernel, this is an array of long words, thus
 		 * endianness is very important.
 		 */
-		if (pevent->file_bigendian)
+		if (tep->file_bigendian)
 			index = size - (len + 1);
 		else
 			index = len;
@@ -3907,7 +3907,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			  struct tep_event *event, const char *format,
 			  int len_arg, struct tep_print_arg *arg)
 {
-	struct tep_handle *pevent = event->tep;
+	struct tep_handle *tep = event->tep;
 	struct tep_print_flag_sym *flag;
 	struct tep_format_field *field;
 	struct printk_map *printk;
@@ -3944,7 +3944,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		 * is a pointer.
 		 */
 		if (!(field->flags & TEP_FIELD_IS_ARRAY) &&
-		    field->size == pevent->long_size) {
+		    field->size == tep->long_size) {
 
 			/* Handle heterogeneous recording and processing
 			 * architectures
@@ -3959,12 +3959,12 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			 * on 32-bit devices:
 			 * In this case, 64 bits must be read.
 			 */
-			addr = (pevent->long_size == 8) ?
+			addr = (tep->long_size == 8) ?
 				*(unsigned long long *)(data + field->offset) :
 				(unsigned long long)*(unsigned int *)(data + field->offset);
 
 			/* Check if it matches a print format */
-			printk = find_printk(pevent, addr);
+			printk = find_printk(tep, addr);
 			if (printk)
 				trace_seq_puts(s, printk->printk);
 			else
@@ -4021,7 +4021,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 	case TEP_PRINT_HEX_STR:
 		if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
 			unsigned long offset;
-			offset = tep_read_number(pevent,
+			offset = tep_read_number(tep,
 				data + arg->hex.field->dynarray.field->offset,
 				arg->hex.field->dynarray.field->size);
 			hex = data + (offset & 0xffff);
@@ -4052,7 +4052,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			unsigned long offset;
 			struct tep_format_field *field =
 				arg->int_array.field->dynarray.field;
-			offset = tep_read_number(pevent,
+			offset = tep_read_number(tep,
 						 data + field->offset,
 						 field->size);
 			num = data + (offset & 0xffff);
@@ -4103,7 +4103,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			f = tep_find_any_field(event, arg->string.string);
 			arg->string.offset = f->offset;
 		}
-		str_offset = tep_data2host4(pevent, *(unsigned int *)(data + arg->string.offset));
+		str_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->string.offset));
 		str_offset &= 0xffff;
 		print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
 		break;
@@ -4121,10 +4121,10 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			f = tep_find_any_field(event, arg->bitmask.bitmask);
 			arg->bitmask.offset = f->offset;
 		}
-		bitmask_offset = tep_data2host4(pevent, *(unsigned int *)(data + arg->bitmask.offset));
+		bitmask_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
 		bitmask_size = bitmask_offset >> 16;
 		bitmask_offset &= 0xffff;
-		print_bitmask_to_seq(pevent, s, format, len_arg,
+		print_bitmask_to_seq(tep, s, format, len_arg,
 				     data + bitmask_offset, bitmask_size);
 		break;
 	}
@@ -4256,7 +4256,7 @@ static void free_args(struct tep_print_arg *args)
 
 static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event)
 {
-	struct tep_handle *pevent = event->tep;
+	struct tep_handle *tep = event->tep;
 	struct tep_format_field *field, *ip_field;
 	struct tep_print_arg *args, *arg, **next;
 	unsigned long long ip, val;
@@ -4264,8 +4264,8 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
 	void *bptr;
 	int vsize = 0;
 
-	field = pevent->bprint_buf_field;
-	ip_field = pevent->bprint_ip_field;
+	field = tep->bprint_buf_field;
+	ip_field = tep->bprint_ip_field;
 
 	if (!field) {
 		field = tep_find_field(event, "buf");
@@ -4278,11 +4278,11 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
 			do_warning_event(event, "can't find ip field for binary printk");
 			return NULL;
 		}
-		pevent->bprint_buf_field = field;
-		pevent->bprint_ip_field = ip_field;
+		tep->bprint_buf_field = field;
+		tep->bprint_ip_field = ip_field;
 	}
 
-	ip = tep_read_number(pevent, data + ip_field->offset, ip_field->size);
+	ip = tep_read_number(tep, data + ip_field->offset, ip_field->size);
 
 	/*
 	 * The first arg is the IP pointer.
@@ -4360,7 +4360,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
 					vsize = 4;
 					break;
 				case 1:
-					vsize = pevent->long_size;
+					vsize = tep->long_size;
 					break;
 				case 2:
 					vsize = 8;
@@ -4377,7 +4377,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
 				/* the pointers are always 4 bytes aligned */
 				bptr = (void *)(((unsigned long)bptr + 3) &
 						~3);
-				val = tep_read_number(pevent, bptr, vsize);
+				val = tep_read_number(tep, bptr, vsize);
 				bptr += vsize;
 				arg = alloc_arg();
 				if (!arg) {
@@ -4434,13 +4434,13 @@ static char *
 get_bprint_format(void *data, int size __maybe_unused,
 		  struct tep_event *event)
 {
-	struct tep_handle *pevent = event->tep;
+	struct tep_handle *tep = event->tep;
 	unsigned long long addr;
 	struct tep_format_field *field;
 	struct printk_map *printk;
 	char *format;
 
-	field = pevent->bprint_fmt_field;
+	field = tep->bprint_fmt_field;
 
 	if (!field) {
 		field = tep_find_field(event, "fmt");
@@ -4448,12 +4448,12 @@ get_bprint_format(void *data, int size __maybe_unused,
 			do_warning_event(event, "can't find format field for binary printk");
 			return NULL;
 		}
-		pevent->bprint_fmt_field = field;
+		tep->bprint_fmt_field = field;
 	}
 
-	addr = tep_read_number(pevent, data + field->offset, field->size);
+	addr = tep_read_number(tep, data + field->offset, field->size);
 
-	printk = find_printk(pevent, addr);
+	printk = find_printk(tep, addr);
 	if (!printk) {
 		if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0)
 			return NULL;
@@ -4835,13 +4835,13 @@ void tep_print_field(struct trace_seq *s, void *data,
 {
 	unsigned long long val;
 	unsigned int offset, len, i;
-	struct tep_handle *pevent = field->event->tep;
+	struct tep_handle *tep = field->event->tep;
 
 	if (field->flags & TEP_FIELD_IS_ARRAY) {
 		offset = field->offset;
 		len = field->size;
 		if (field->flags & TEP_FIELD_IS_DYNAMIC) {
-			val = tep_read_number(pevent, data + offset, len);
+			val = tep_read_number(tep, data + offset, len);
 			offset = val;
 			len = offset >> 16;
 			offset &= 0xffff;
@@ -4861,7 +4861,7 @@ void tep_print_field(struct trace_seq *s, void *data,
 			field->flags &= ~TEP_FIELD_IS_STRING;
 		}
 	} else {
-		val = tep_read_number(pevent, data + field->offset,
+		val = tep_read_number(tep, data + field->offset,
 				      field->size);
 		if (field->flags & TEP_FIELD_IS_POINTER) {
 			trace_seq_printf(s, "0x%llx", val);
@@ -4910,7 +4910,7 @@ void tep_print_fields(struct trace_seq *s, void *data,
 
 static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
 {
-	struct tep_handle *pevent = event->tep;
+	struct tep_handle *tep = event->tep;
 	struct tep_print_fmt *print_fmt = &event->print_fmt;
 	struct tep_print_arg *arg = print_fmt->args;
 	struct tep_print_arg *args = NULL;
@@ -5002,7 +5002,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e
 			case '-':
 				goto cont_process;
 			case 'p':
-				if (pevent->long_size == 4)
+				if (tep->long_size == 4)
 					ls = 1;
 				else
 					ls = 2;
@@ -5063,7 +5063,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e
 				arg = arg->next;
 
 				if (show_func) {
-					func = find_func(pevent, val);
+					func = find_func(tep, val);
 					if (func) {
 						trace_seq_puts(s, func->func);
 						if (show_func == 'F')
@@ -5073,7 +5073,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e
 						break;
 					}
 				}
-				if (pevent->long_size == 8 && ls == 1 &&
+				if (tep->long_size == 8 && ls == 1 &&
 				    sizeof(long) != 8) {
 					char *p;
 
@@ -5320,14 +5320,14 @@ const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid)
 }
 
 static struct tep_cmdline *
-pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct tep_cmdline *next)
+pid_from_cmdlist(struct tep_handle *tep, const char *comm, struct tep_cmdline *next)
 {
 	struct cmdline_list *cmdlist = (struct cmdline_list *)next;
 
 	if (cmdlist)
 		cmdlist = cmdlist->next;
 	else
-		cmdlist = pevent->cmdlist;
+		cmdlist = tep->cmdlist;
 
 	while (cmdlist && strcmp(cmdlist->comm, comm) != 0)
 		cmdlist = cmdlist->next;
@@ -6078,11 +6078,11 @@ static void free_handler(struct event_handler *handle)
 	free(handle);
 }
 
-static int find_event_handle(struct tep_handle *pevent, struct tep_event *event)
+static int find_event_handle(struct tep_handle *tep, struct tep_event *event)
 {
 	struct event_handler *handle, **next;
 
-	for (next = &pevent->handlers; *next;
+	for (next = &tep->handlers; *next;
 	     next = &(*next)->next) {
 		handle = *next;
 		if (event_matches(event, handle->id,
@@ -6120,7 +6120,7 @@ static int find_event_handle(struct tep_handle *pevent, struct tep_event *event)
  * /sys/kernel/debug/tracing/events/.../.../format
  */
 enum tep_errno __tep_parse_format(struct tep_event **eventp,
-				  struct tep_handle *pevent, const char *buf,
+				  struct tep_handle *tep, const char *buf,
 				  unsigned long size, const char *sys)
 {
 	struct tep_event *event;
@@ -6162,8 +6162,8 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp,
 		goto event_alloc_failed;
 	}
 
-	/* Add pevent to event so that it can be referenced */
-	event->tep = pevent;
+	/* Add tep to event so that it can be referenced */
+	event->tep = tep;
 
 	ret = event_read_format(event);
 	if (ret < 0) {
@@ -6175,7 +6175,7 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp,
 	 * If the event has an override, don't print warnings if the event
 	 * print format fails to parse.
 	 */
-	if (pevent && find_event_handle(pevent, event))
+	if (tep && find_event_handle(tep, event))
 		show_warning = 0;
 
 	ret = event_read_print(event);
@@ -6227,18 +6227,18 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp,
 }
 
 static enum tep_errno
-__parse_event(struct tep_handle *pevent,
+__parse_event(struct tep_handle *tep,
 	      struct tep_event **eventp,
 	      const char *buf, unsigned long size,
 	      const char *sys)
 {
-	int ret = __tep_parse_format(eventp, pevent, buf, size, sys);
+	int ret = __tep_parse_format(eventp, tep, buf, size, sys);
 	struct tep_event *event = *eventp;
 
 	if (event == NULL)
 		return ret;
 
-	if (pevent && add_event(pevent, event)) {
+	if (tep && add_event(tep, event)) {
 		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
 		goto event_add_failed;
 	}
@@ -6492,7 +6492,7 @@ int tep_print_func_field(struct trace_seq *s, const char *fmt,
 			 struct tep_record *record, int err)
 {
 	struct tep_format_field *field = tep_find_field(event, name);
-	struct tep_handle *pevent = event->tep;
+	struct tep_handle *tep = event->tep;
 	unsigned long long val;
 	struct func_map *func;
 	char tmp[128];
@@ -6503,7 +6503,7 @@ int tep_print_func_field(struct trace_seq *s, const char *fmt,
 	if (tep_read_number_field(field, record->data, &val))
 		goto failed;
 
-	func = find_func(pevent, val);
+	func = find_func(tep, val);
 
 	if (func)
 		snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val);
@@ -6648,7 +6648,7 @@ int tep_unregister_print_function(struct tep_handle *tep,
 	return -1;
 }
 
-static struct tep_event *search_event(struct tep_handle *pevent, int id,
+static struct tep_event *search_event(struct tep_handle *tep, int id,
 				      const char *sys_name,
 				      const char *event_name)
 {
@@ -6656,7 +6656,7 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id,
 
 	if (id >= 0) {
 		/* search by id */
-		event = tep_find_event(pevent, id);
+		event = tep_find_event(tep, id);
 		if (!event)
 			return NULL;
 		if (event_name && (strcmp(event_name, event->name) != 0))
@@ -6664,7 +6664,7 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id,
 		if (sys_name && (strcmp(sys_name, event->system) != 0))
 			return NULL;
 	} else {
-		event = tep_find_event_by_name(pevent, sys_name, event_name);
+		event = tep_find_event_by_name(tep, sys_name, event_name);
 		if (!event)
 			return NULL;
 	}
@@ -6821,14 +6821,14 @@ not_found:
  */
 struct tep_handle *tep_alloc(void)
 {
-	struct tep_handle *pevent = calloc(1, sizeof(*pevent));
+	struct tep_handle *tep = calloc(1, sizeof(*tep));
 
-	if (pevent) {
-		pevent->ref_count = 1;
-		pevent->host_bigendian = tep_is_bigendian();
+	if (tep) {
+		tep->ref_count = 1;
+		tep->host_bigendian = tep_is_bigendian();
 	}
 
-	return pevent;
+	return tep;
 }
 
 void tep_ref(struct tep_handle *tep)
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index d0488de0174ab..8ca28de9337a5 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -269,7 +269,7 @@ void tep_print_plugins(struct trace_seq *s,
 }
 
 static void
-load_plugin(struct tep_handle *pevent, const char *path,
+load_plugin(struct tep_handle *tep, const char *path,
 	    const char *file, void *data)
 {
 	struct tep_plugin_list **plugin_list = data;
@@ -316,7 +316,7 @@ load_plugin(struct tep_handle *pevent, const char *path,
 	*plugin_list = list;
 
 	pr_stat("registering plugin: %s", plugin);
-	func(pevent);
+	func(tep);
 	return;
 
  out_free:
@@ -324,9 +324,9 @@ load_plugin(struct tep_handle *pevent, const char *path,
 }
 
 static void
-load_plugins_dir(struct tep_handle *pevent, const char *suffix,
+load_plugins_dir(struct tep_handle *tep, const char *suffix,
 		 const char *path,
-		 void (*load_plugin)(struct tep_handle *pevent,
+		 void (*load_plugin)(struct tep_handle *tep,
 				     const char *path,
 				     const char *name,
 				     void *data),
@@ -359,15 +359,15 @@ load_plugins_dir(struct tep_handle *pevent, const char *suffix,
 		if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0)
 			continue;
 
-		load_plugin(pevent, path, name, data);
+		load_plugin(tep, path, name, data);
 	}
 
 	closedir(dir);
 }
 
 static void
-load_plugins(struct tep_handle *pevent, const char *suffix,
-	     void (*load_plugin)(struct tep_handle *pevent,
+load_plugins(struct tep_handle *tep, const char *suffix,
+	     void (*load_plugin)(struct tep_handle *tep,
 				 const char *path,
 				 const char *name,
 				 void *data),
@@ -378,7 +378,7 @@ load_plugins(struct tep_handle *pevent, const char *suffix,
 	char *envdir;
 	int ret;
 
-	if (pevent->flags & TEP_DISABLE_PLUGINS)
+	if (tep->flags & TEP_DISABLE_PLUGINS)
 		return;
 
 	/*
@@ -386,8 +386,8 @@ load_plugins(struct tep_handle *pevent, const char *suffix,
 	 * check that first.
 	 */
 #ifdef PLUGIN_DIR
-	if (!(pevent->flags & TEP_DISABLE_SYS_PLUGINS))
-		load_plugins_dir(pevent, suffix, PLUGIN_DIR,
+	if (!(tep->flags & TEP_DISABLE_SYS_PLUGINS))
+		load_plugins_dir(tep, suffix, PLUGIN_DIR,
 				 load_plugin, data);
 #endif
 
@@ -397,7 +397,7 @@ load_plugins(struct tep_handle *pevent, const char *suffix,
 	 */
 	envdir = getenv("TRACEEVENT_PLUGIN_DIR");
 	if (envdir)
-		load_plugins_dir(pevent, suffix, envdir, load_plugin, data);
+		load_plugins_dir(tep, suffix, envdir, load_plugin, data);
 
 	/*
 	 * Now let the home directory override the environment
@@ -413,7 +413,7 @@ load_plugins(struct tep_handle *pevent, const char *suffix,
 		return;
 	}
 
-	load_plugins_dir(pevent, suffix, path, load_plugin, data);
+	load_plugins_dir(tep, suffix, path, load_plugin, data);
 
 	free(path);
 }
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 6a4d5d1dd1cd5..552592d153fb8 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -256,7 +256,7 @@ static int event_match(struct tep_event *event,
 }
 
 static enum tep_errno
-find_event(struct tep_handle *pevent, struct event_list **events,
+find_event(struct tep_handle *tep, struct event_list **events,
 	   char *sys_name, char *event_name)
 {
 	struct tep_event *event;
@@ -299,8 +299,8 @@ find_event(struct tep_handle *pevent, struct event_list **events,
 		}
 	}
 
-	for (i = 0; i < pevent->nr_events; i++) {
-		event = pevent->events[i];
+	for (i = 0; i < tep->nr_events; i++) {
+		event = tep->events[i];
 		if (event_match(event, sys_name ? &sreg : NULL, &ereg)) {
 			match = 1;
 			if (add_event(events, event) < 0) {
@@ -1257,7 +1257,7 @@ static void filter_init_error_buf(struct tep_event_filter *filter)
 enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
 					 const char *filter_str)
 {
-	struct tep_handle *pevent = filter->tep;
+	struct tep_handle *tep = filter->tep;
 	struct event_list *event;
 	struct event_list *events = NULL;
 	const char *filter_start;
@@ -1313,7 +1313,7 @@ enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
 		}
 
 		/* Find this event */
-		ret = find_event(pevent, &events, strim(sys_name), strim(event_name));
+		ret = find_event(tep, &events, strim(sys_name), strim(event_name));
 		if (ret < 0) {
 			free_events(events);
 			free(this_event);
@@ -1334,7 +1334,7 @@ enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
 		if (ret < 0)
 			rtn = ret;
 
-		if (ret >= 0 && pevent->test_filters) {
+		if (ret >= 0 && tep->test_filters) {
 			char *test;
 			test = tep_filter_make_string(filter, event->event->id);
 			if (test) {
@@ -1459,7 +1459,7 @@ static int copy_filter_type(struct tep_event_filter *filter,
 	const char *name;
 	char *str;
 
-	/* Can't assume that the pevent's are the same */
+	/* Can't assume that the tep's are the same */
 	sys = filter_type->event->system;
 	name = filter_type->event->name;
 	event = tep_find_event_by_name(filter->tep, sys, name);
@@ -1697,7 +1697,7 @@ static int test_num(struct tep_event *event, struct tep_filter_arg *arg,
 static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record)
 {
 	struct tep_event *event;
-	struct tep_handle *pevent;
+	struct tep_handle *tep;
 	unsigned long long addr;
 	const char *val = NULL;
 	unsigned int size;
@@ -1727,12 +1727,12 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *
 
 	} else {
 		event = arg->str.field->event;
-		pevent = event->tep;
+		tep = event->tep;
 		addr = get_value(event, arg->str.field, record);
 
 		if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG))
 			/* convert to a kernel symbol */
-			val = tep_find_function(pevent, addr);
+			val = tep_find_function(tep, addr);
 
 		if (val == NULL) {
 			/* just use the hex of the string name */
@@ -1872,7 +1872,7 @@ int tep_event_filtered(struct tep_event_filter *filter, int event_id)
 enum tep_errno tep_filter_match(struct tep_event_filter *filter,
 				struct tep_record *record)
 {
-	struct tep_handle *pevent = filter->tep;
+	struct tep_handle *tep = filter->tep;
 	struct tep_filter_type *filter_type;
 	int event_id;
 	int ret;
@@ -1883,7 +1883,7 @@ enum tep_errno tep_filter_match(struct tep_event_filter *filter,
 	if (!filter->filters)
 		return TEP_ERRNO__NO_FILTER;
 
-	event_id = tep_data_type(pevent, record);
+	event_id = tep_data_type(tep, record);
 
 	filter_type = find_filter_type(filter, event_id);
 	if (!filter_type)
diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c
index d6a8df0606141..7770fcb78e0fb 100644
--- a/tools/lib/traceevent/plugin_function.c
+++ b/tools/lib/traceevent/plugin_function.c
@@ -126,7 +126,7 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
 static int function_handler(struct trace_seq *s, struct tep_record *record,
 			    struct tep_event *event, void *context)
 {
-	struct tep_handle *pevent = event->tep;
+	struct tep_handle *tep = event->tep;
 	unsigned long long function;
 	unsigned long long pfunction;
 	const char *func;
@@ -136,12 +136,12 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
 	if (tep_get_field_val(s, event, "ip", record, &function, 1))
 		return trace_seq_putc(s, '!');
 
-	func = tep_find_function(pevent, function);
+	func = tep_find_function(tep, function);
 
 	if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
 		return trace_seq_putc(s, '!');
 
-	parent = tep_find_function(pevent, pfunction);
+	parent = tep_find_function(tep, pfunction);
 
 	if (parent && ftrace_indent->set)
 		index = add_and_get_index(parent, func, record->cpu);
-- 
GitLab


From 3b1c5d9659718263c7f9c93af82f98221c58f171 Mon Sep 17 00:00:00 2001
From: Alexey Budankov <alexey.budankov@linux.intel.com>
Date: Mon, 18 Mar 2019 20:39:49 +0300
Subject: [PATCH 0431/1861] tools build: Implement libzstd feature check,
 LIBZSTD_DIR and NO_LIBZSTD defines

Implement libzstd feature check, NO_LIBZSTD and LIBZSTD_DIR defines to
override Zstd library sources or disable the feature from the command
line:

  $ make -C tools/perf LIBZSTD_DIR=/path/to/zstd/sources/ clean all
  $ make -C tools/perf NO_LIBZSTD=1 clean all

Auto detection feature status is reported just before compilation
starts.  If your system has some version of the zstd library
preinstalled then the build system finds and uses it during the build.

If you still prefer to compile with some other version of zstd library
you have capability to refer the compilation to that version using
LIBZSTD_DIR define.

Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/9b4cd8b0-10a3-1f1e-8d6b-5922a7ca216b@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/Makefile.feature       |  2 ++
 tools/build/feature/Makefile       |  6 +++++-
 tools/build/feature/test-all.c     |  5 +++++
 tools/build/feature/test-libzstd.c | 12 ++++++++++++
 tools/perf/Makefile.config         | 20 ++++++++++++++++++++
 tools/perf/Makefile.perf           |  3 +++
 tools/perf/builtin-version.c       |  2 ++
 7 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 tools/build/feature/test-libzstd.c

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 8d3864b061f38..361207387b1b7 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -67,6 +67,7 @@ FEATURE_TESTS_BASIC :=                  \
         sdt				\
         setns				\
         libaio				\
+        libzstd				\
         disassembler-four-args
 
 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
@@ -120,6 +121,7 @@ FEATURE_DISPLAY ?=              \
          get_cpuid              \
          bpf			\
          libaio			\
+         libzstd		\
          disassembler-four-args
 
 # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 7ceb4441b6277..4b8244ee65ce6 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -62,7 +62,8 @@ FILES=                                          \
          test-clang.bin				\
          test-llvm.bin				\
          test-llvm-version.bin			\
-         test-libaio.bin
+         test-libaio.bin			\
+         test-libzstd.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -301,6 +302,9 @@ $(OUTPUT)test-clang.bin:
 $(OUTPUT)test-libaio.bin:
 	$(BUILD) -lrt
 
+$(OUTPUT)test-libzstd.bin:
+	$(BUILD) -lzstd
+
 ###############################
 
 clean:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 7853e6d91090c..a59c537050934 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -182,6 +182,10 @@
 # include "test-disassembler-four-args.c"
 #undef main
 
+#define main main_test_zstd
+# include "test-libzstd.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
 	main_test_libpython();
@@ -224,6 +228,7 @@ int main(int argc, char *argv[])
 	main_test_libaio();
 	main_test_reallocarray();
 	main_test_disassembler_four_args();
+	main_test_libzstd();
 
 	return 0;
 }
diff --git a/tools/build/feature/test-libzstd.c b/tools/build/feature/test-libzstd.c
new file mode 100644
index 0000000000000..55268c01b84db
--- /dev/null
+++ b/tools/build/feature/test-libzstd.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <zstd.h>
+
+int main(void)
+{
+	ZSTD_CStream	*cstream;
+
+	cstream = ZSTD_createCStream();
+	ZSTD_freeCStream(cstream);
+
+	return 0;
+}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index fe3f97e342fae..beb8b48b44e61 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -152,6 +152,13 @@ endif
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
+ifdef LIBZSTD_DIR
+  LIBZSTD_CFLAGS  := -I$(LIBZSTD_DIR)/lib
+  LIBZSTD_LDFLAGS := -L$(LIBZSTD_DIR)/lib
+endif
+FEATURE_CHECK_CFLAGS-libzstd := $(LIBZSTD_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libzstd := $(LIBZSTD_LDFLAGS)
+
 FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
 # include ARCH specific config
 -include $(src-perf)/arch/$(SRCARCH)/Makefile
@@ -787,6 +794,19 @@ ifndef NO_LZMA
   endif
 endif
 
+ifndef NO_LIBZSTD
+  ifeq ($(feature-libzstd), 1)
+    CFLAGS += -DHAVE_ZSTD_SUPPORT
+    CFLAGS += $(LIBZSTD_CFLAGS)
+    LDFLAGS += $(LIBZSTD_LDFLAGS)
+    EXTLIBS += -lzstd
+    $(call detected,CONFIG_ZSTD)
+  else
+    msg := $(warning No libzstd found, disables trace compression, please install libzstd-dev[el] and/or set LIBZSTD_DIR);
+    NO_LIBZSTD := 1
+  endif
+endif
+
 ifndef NO_BACKTRACE
   ifeq ($(feature-backtrace), 1)
     CFLAGS += -DHAVE_BACKTRACE_SUPPORT
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index e8c9f77e90107..c706548d5b105 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -108,6 +108,9 @@ include ../scripts/utilities.mak
 # streaming for record mode. Currently Posix AIO trace streaming is
 # supported only when linking with glibc.
 #
+# Define NO_LIBZSTD if you do not want support of Zstandard based runtime
+# trace compression in record mode.
+#
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index 50df168be326d..f470144d1a704 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -78,6 +78,8 @@ static void library_status(void)
 	STATUS(HAVE_LZMA_SUPPORT, lzma);
 	STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
 	STATUS(HAVE_LIBBPF_SUPPORT, bpf);
+	STATUS(HAVE_AIO_SUPPORT, aio);
+	STATUS(HAVE_ZSTD_SUPPORT, zstd);
 }
 
 int cmd_version(int argc, const char **argv)
-- 
GitLab


From 470530bbb8fbbf2a09bd1d7150bb92501c5c54e6 Mon Sep 17 00:00:00 2001
From: Alexey Budankov <alexey.budankov@linux.intel.com>
Date: Mon, 18 Mar 2019 20:40:26 +0300
Subject: [PATCH 0432/1861] perf record: Implement --mmap-flush=<number> option

Implement a --mmap-flush option that specifies minimal number of bytes
that is extracted from mmaped kernel buffer to store into a trace. The
default option value is 1 byte what means every time trace writing
thread finds some new data in the mmaped buffer the data is extracted,
possibly compressed and written to a trace.

  $ tools/perf/perf record --mmap-flush 1024 -e cycles -- matrix.gcc
  $ tools/perf/perf record --aio --mmap-flush 1K -e cycles -- matrix.gcc

The option is independent from -z setting, doesn't vary with compression
level and can serve two purposes.

The first purpose is to increase the compression ratio of a trace data.
Larger data chunks are compressed more effectively so the implemented
option allows specifying data chunk size to compress. Also at some cases
executing more write syscalls with smaller data size can take longer
than executing less write syscalls with bigger data size due to syscall
overhead so extracting bigger data chunks specified by the option value
could additionally decrease runtime overhead.

The second purpose is to avoid self monitoring live-lock issue in system
wide (-a) profiling mode. Profiling in system wide mode with compression
(-a -z) can additionally induce data into the kernel buffers along with
the data from monitored processes. If performance data rate and volume
from the monitored processes is high then trace streaming and
compression activity in the tool is also high. High tool process
activity can lead to subtle live-lock effect when compression of single
new byte from some of mmaped kernel buffer leads to generation of the
next single byte at some mmaped buffer. So perf tool process ends up in
endless self monitoring.

Implemented synch parameter is the mean to force data move independently
from the specified flush threshold value. Despite the provided flush
value the tool needs capability to unconditionally drain memory buffers,
at least in the end of the collection.

Committer testing:

Running with the default value, i.e. as soon as there is something to
read go on consuming, we first write the synthesized events, small
chunks of about 128 bytes:

  # perf trace -m 2048 --call-graph dwarf -e write -- perf record
  <SNIP>
     101.142 ( 0.004 ms): perf/25821 write(fd: 3</root/perf.data>, buf: 0x210db60, count: 120) = 120
                                         __libc_write (/usr/lib64/libpthread-2.28.so)
                                         ion (/home/acme/bin/perf)
                                         record__write (inlined)
                                         process_synthesized_event (/home/acme/bin/perf)
                                         perf_tool__process_synth_event (inlined)
                                         perf_event__synthesize_mmap_events (/home/acme/bin/perf)

Then we move to reading the mmap buffers consuming the events put there
by the kernel perf infrastructure:

     107.561 ( 0.005 ms): perf/25821 write(fd: 3</root/perf.data>, buf: 0x7f1befc02000, count: 336) = 336
                                         __libc_write (/usr/lib64/libpthread-2.28.so)
                                         ion (/home/acme/bin/perf)
                                         record__write (inlined)
                                         record__pushfn (/home/acme/bin/perf)
                                         perf_mmap__push (/home/acme/bin/perf)
                                         record__mmap_read_evlist (inlined)
                                         record__mmap_read_all (inlined)
                                         __cmd_record (inlined)
                                         cmd_record (/home/acme/bin/perf)
     12919.953 ( 0.136 ms): perf/25821 write(fd: 3</root/perf.data>, buf: 0x7f1befc83150, count: 184984) = 184984
  <SNIP same backtrace as in the 107.561 timestamp>
     12920.094 ( 0.155 ms): perf/25821 write(fd: 3</root/perf.data>, buf: 0x7f1befc02150, count: 261816) = 261816
  <SNIP same backtrace as in the 107.561 timestamp>
     12920.253 ( 0.093 ms): perf/25821 write(fd: 3</root/perf.data>, buf: 0x7f1befb81120, count: 170832) = 170832
  <SNIP same backtrace as in the 107.561 timestamp>

If we limit it to write only when more than 16MB are available for
reading, it throttles that to a quarter of the --mmap-pages set for
'perf record', which by default get to 528384 bytes, found out using
'record -v':

  mmap flush: 132096
  mmap size 528384B

With that in place all the writes coming from
record__mmap_read_evlist(), i.e. from the mmap buffers setup by the
kernel perf infrastructure were at least 132096 bytes long.

Trying with a bigger mmap size:

   perf trace -e write perf record -v -m 2048 --mmap-flush 16M
   74982.928 ( 2.471 ms): perf/26500 write(fd: 3</root/perf.data>, buf: 0x7ff94a6cc000, count: 3580888) = 3580888
   74985.406 ( 2.353 ms): perf/26500 write(fd: 3</root/perf.data>, buf: 0x7ff949ecb000, count: 3453256) = 3453256
   74987.764 ( 2.629 ms): perf/26500 write(fd: 3</root/perf.data>, buf: 0x7ff9496ca000, count: 3859232) = 3859232
   74990.399 ( 2.341 ms): perf/26500 write(fd: 3</root/perf.data>, buf: 0x7ff948ec9000, count: 3769032) = 3769032
   74992.744 ( 2.064 ms): perf/26500 write(fd: 3</root/perf.data>, buf: 0x7ff9486c8000, count: 3310520) = 3310520
   74994.814 ( 2.619 ms): perf/26500 write(fd: 3</root/perf.data>, buf: 0x7ff947ec7000, count: 4194688) = 4194688
   74997.439 ( 2.787 ms): perf/26500 write(fd: 3</root/perf.data>, buf: 0x7ff9476c6000, count: 4029760) = 4029760

Was again limited to a quarter of the mmap size:

  mmap flush: 2098176
  mmap size 8392704B

A warning about that would be good to have but can be added later,
something like:

  "max flush is a quarter of the mmap size, if wanting to bump the mmap
   flush further, bump the mmap size as well using -m/--mmap-pages"

Also rename the 'sync' parameters to 'synch' to keep tools/perf building
with older glibcs:

  cc1: warnings being treated as errors
  builtin-record.c: In function 'record__mmap_read_evlist':
  builtin-record.c:775: warning: declaration of 'sync' shadows a global declaration
  /usr/include/unistd.h:933: warning: shadowed declaration is here
  builtin-record.c: In function 'record__mmap_read_all':
  builtin-record.c:856: warning: declaration of 'sync' shadows a global declaration
  /usr/include/unistd.h:933: warning: shadowed declaration is here

Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/f6600d72-ecfa-2eb7-7e51-f6954547d500@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 19 +++++++
 tools/perf/builtin-record.c              | 65 +++++++++++++++++++++---
 tools/perf/perf.h                        |  1 +
 tools/perf/util/evlist.c                 |  6 +--
 tools/perf/util/evlist.h                 |  3 +-
 tools/perf/util/mmap.c                   |  4 +-
 tools/perf/util/mmap.h                   |  3 +-
 7 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 8fe4dffcadd0e..58986f4cc190f 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -459,6 +459,25 @@ Set affinity mask of trace reading thread according to the policy defined by 'mo
   node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
   cpu  - thread affinity mask is set to cpu of the processed mmap buffer
 
+--mmap-flush=number::
+
+Specify minimal number of bytes that is extracted from mmap data pages and
+processed for output. One can specify the number using B/K/M/G suffixes.
+
+The maximal allowed value is a quarter of the size of mmaped data pages.
+
+The default option value is 1 byte which means that every time that the output
+writing thread finds some new data in the mmaped buffer the data is extracted,
+possibly compressed (-z) and written to the output, perf.data or pipe.
+
+Larger data chunks are compressed more effectively in comparison to smaller
+chunks so extraction of larger chunks from the mmap data pages is preferable
+from the perspective of output size reduction.
+
+Also at some cases executing less output write syscalls with bigger data size
+can take less time than executing more output write syscalls with smaller data
+size thus lowering runtime profiling overhead.
+
 --all-kernel::
 Configure all used events to run in kernel space.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4e2d953d4bc58..c5e10552776a9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -337,6 +337,41 @@ static int record__aio_enabled(struct record *rec)
 	return rec->opts.nr_cblocks > 0;
 }
 
+#define MMAP_FLUSH_DEFAULT 1
+static int record__mmap_flush_parse(const struct option *opt,
+				    const char *str,
+				    int unset)
+{
+	int flush_max;
+	struct record_opts *opts = (struct record_opts *)opt->value;
+	static struct parse_tag tags[] = {
+			{ .tag  = 'B', .mult = 1       },
+			{ .tag  = 'K', .mult = 1 << 10 },
+			{ .tag  = 'M', .mult = 1 << 20 },
+			{ .tag  = 'G', .mult = 1 << 30 },
+			{ .tag  = 0 },
+	};
+
+	if (unset)
+		return 0;
+
+	if (str) {
+		opts->mmap_flush = parse_tag_value(str, tags);
+		if (opts->mmap_flush == (int)-1)
+			opts->mmap_flush = strtol(str, NULL, 0);
+	}
+
+	if (!opts->mmap_flush)
+		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
+
+	flush_max = perf_evlist__mmap_size(opts->mmap_pages);
+	flush_max /= 4;
+	if (opts->mmap_flush > flush_max)
+		opts->mmap_flush = flush_max;
+
+	return 0;
+}
+
 static int process_synthesized_event(struct perf_tool *tool,
 				     union perf_event *event,
 				     struct perf_sample *sample __maybe_unused,
@@ -546,7 +581,8 @@ static int record__mmap_evlist(struct record *rec,
 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 				 opts->auxtrace_mmap_pages,
 				 opts->auxtrace_snapshot_mode,
-				 opts->nr_cblocks, opts->affinity) < 0) {
+				 opts->nr_cblocks, opts->affinity,
+				 opts->mmap_flush) < 0) {
 		if (errno == EPERM) {
 			pr_err("Permission error mapping pages.\n"
 			       "Consider increasing "
@@ -736,7 +772,7 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
 }
 
 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
-				    bool overwrite)
+				    bool overwrite, bool synch)
 {
 	u64 bytes_written = rec->bytes_written;
 	int i;
@@ -759,12 +795,19 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 		off = record__aio_get_pos(trace_fd);
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
+		u64 flush = 0;
 		struct perf_mmap *map = &maps[i];
 
 		if (map->base) {
 			record__adjust_affinity(rec, map);
+			if (synch) {
+				flush = map->flush;
+				map->flush = 1;
+			}
 			if (!record__aio_enabled(rec)) {
 				if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+					if (synch)
+						map->flush = flush;
 					rc = -1;
 					goto out;
 				}
@@ -777,10 +820,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 				idx = record__aio_sync(map, false);
 				if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
 					record__aio_set_pos(trace_fd, off);
+					if (synch)
+						map->flush = flush;
 					rc = -1;
 					goto out;
 				}
 			}
+			if (synch)
+				map->flush = flush;
 		}
 
 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
@@ -806,15 +853,15 @@ out:
 	return rc;
 }
 
-static int record__mmap_read_all(struct record *rec)
+static int record__mmap_read_all(struct record *rec, bool synch)
 {
 	int err;
 
-	err = record__mmap_read_evlist(rec, rec->evlist, false);
+	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
 	if (err)
 		return err;
 
-	return record__mmap_read_evlist(rec, rec->evlist, true);
+	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
 }
 
 static void record__init_features(struct record *rec)
@@ -1340,7 +1387,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
 
-		if (record__mmap_read_all(rec) < 0) {
+		if (record__mmap_read_all(rec, false) < 0) {
 			trigger_error(&auxtrace_snapshot_trigger);
 			trigger_error(&switch_output_trigger);
 			err = -1;
@@ -1441,6 +1488,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		record__synthesize_workload(rec, true);
 
 out_child:
+	record__mmap_read_all(rec, true);
 	record__aio_mmap_read_sync(rec);
 
 	if (forks) {
@@ -1846,6 +1894,7 @@ static struct record record = {
 			.uses_mmap   = true,
 			.default_per_cpu = true,
 		},
+		.mmap_flush          = MMAP_FLUSH_DEFAULT,
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -1912,6 +1961,9 @@ static struct option __record_options[] = {
 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
 		     "number of mmap data pages and AUX area tracing mmap pages",
 		     record__parse_mmap_pages),
+	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
+		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
+		     record__mmap_flush_parse),
 	OPT_BOOLEAN(0, "group", &record.opts.group,
 		    "put the counters into a counter group"),
 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
@@ -2224,6 +2276,7 @@ int cmd_record(int argc, const char **argv)
 		pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
 
 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
+	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
 
 	err = __cmd_record(&record, argc, argv);
 out:
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index c59743def8d36..369eae61068de 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -85,6 +85,7 @@ struct record_opts {
 	u64          clockid_res_ns;
 	int	     nr_cblocks;
 	int	     affinity;
+	int	     mmap_flush;
 };
 
 enum perf_affinity {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6689378ee577c..f2bbae38278d0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1009,7 +1009,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
  */
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 			 unsigned int auxtrace_pages,
-			 bool auxtrace_overwrite, int nr_cblocks, int affinity)
+			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush)
 {
 	struct perf_evsel *evsel;
 	const struct cpu_map *cpus = evlist->cpus;
@@ -1019,7 +1019,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 	 * Its value is decided by evsel's write_backward.
 	 * So &mp should not be passed through const pointer.
 	 */
-	struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity };
+	struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush };
 
 	if (!evlist->mmap)
 		evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1051,7 +1051,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
 {
-	return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS);
+	return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1);
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 6a94785b91007..c9a0f72677fd4 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -177,7 +177,8 @@ unsigned long perf_event_mlock_kb_in_pages(void);
 
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 			 unsigned int auxtrace_pages,
-			 bool auxtrace_overwrite, int nr_cblocks, int affinity);
+			 bool auxtrace_overwrite, int nr_cblocks,
+			 int affinity, int flush);
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index cdc7740fc1819..ef3d79b2c90b3 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -440,6 +440,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
 
 	perf_mmap__setup_affinity_mask(map, mp);
 
+	map->flush = mp->flush;
+
 	if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
 				&mp->auxtrace_mp, map->base, fd))
 		return -1;
@@ -492,7 +494,7 @@ static int __perf_mmap__read_init(struct perf_mmap *md)
 	md->start = md->overwrite ? head : old;
 	md->end = md->overwrite ? old : head;
 
-	if (md->start == md->end)
+	if ((md->end - md->start) < md->flush)
 		return -EAGAIN;
 
 	size = md->end - md->start;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e566c19b242b6..b82f8c2d55c47 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -39,6 +39,7 @@ struct perf_mmap {
 	} aio;
 #endif
 	cpu_set_t	affinity_mask;
+	u64		flush;
 };
 
 /*
@@ -70,7 +71,7 @@ enum bkw_mmap_state {
 };
 
 struct mmap_params {
-	int			    prot, mask, nr_cblocks, affinity;
+	int			    prot, mask, nr_cblocks, affinity, flush;
 	struct auxtrace_mmap_params auxtrace_mp;
 };
 
-- 
GitLab


From fd5500989c8f3c3944ac0a144be04bae2506f7ba Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:03:48 -0700
Subject: [PATCH 0433/1861] perf vendor events intel: Update metrics from TMAM
 3.5

Update all the Intel JSON metrics from Ahmad Yasin's TMAM 3.5
for Intel big core from Sandy Bridge to Cascade Lake.

This has many improvements and new metircs

- New TopDownL1_SMT group that provides a per SMT thread version
of --topdown that does not require -a anymore. The drawback is
increased multiplexing though since L1 TopDown does not fit into
4 generic counters anymore.

- Added SMT aware versions of other metrics

- Split SMT aware metrics into separate metrics to avoid
unnecessary event collections

- New metrics for better branch analysis:
Estimated Branch_Mispredict_Costs, Instructions per taken Branch,
Branch Instructions per Taken Branch, etc.

- Instruction mix metrics:
Instructions per load, Instructions per store, Instructions per Branch,
Instructions per Call

- New Cache metrics:
Bandwidth to L1/L2/L3 caches. L1/L2/L3 misses per kilo instructions.
memory level parallelism

- New memory controller metrics:
Normalized memory bandwidth in interval mode, Average memory latency,
Average number of parallel read requests,

- 3DXP persistent memory metrics for Cascade Lake:
3dxp read latency, 3dxp read/write bandwidth

- Some other useful metrics like Instruction Level Parallelism,

- Various other improvements.

Not all metrics are available on all CPUs. Skylake has best coverage.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/x86/broadwell/bdw-metrics.json       | 260 ++++++++++++---
 .../arch/x86/broadwellx/bdx-metrics.json      | 278 +++++++++++++---
 .../arch/x86/cascadelakex/clx-metrics.json    | 304 +++++++++++++++---
 .../arch/x86/haswell/hsw-metrics.json         | 234 ++++++++++++--
 .../arch/x86/haswellx/hsx-metrics.json        | 252 +++++++++++++--
 .../arch/x86/ivybridge/ivb-metrics.json       | 250 +++++++++++---
 .../arch/x86/ivytown/ivt-metrics.json         | 256 ++++++++++++---
 .../arch/x86/jaketown/jkt-metrics.json        | 150 +++++++--
 .../arch/x86/sandybridge/snb-metrics.json     | 144 +++++++--
 .../arch/x86/skylake/skl-metrics.json         | 274 +++++++++++++---
 .../arch/x86/skylakex/skx-metrics.json        | 304 +++++++++++++++---
 11 files changed, 2321 insertions(+), 385 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index 00bfdb5c5acb3..212b117a8ffb1 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -1,164 +1,352 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
         "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
-        "MetricGroup": "Frontend",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "Branch_Misprediction_Cost"
     },
     {
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts_SMT",
+        "MetricName": "Branch_Misprediction_Cost_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ( DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED ) ) / cycles",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ( DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED ) ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index 5a7f1ec242004..c6f9762f32c06 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -1,164 +1,370 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
         "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
-        "MetricGroup": "Frontend",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "Branch_Misprediction_Cost"
+    },
+    {
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (12 * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts_SMT",
+        "MetricName": "Branch_Misprediction_Cost_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
     },
     {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7 * ( DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED ) ) / ( 2 * cycles )",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles))",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7 * ( DTLB_STORE_MISSES.WALK_COMPLETED + DTLB_LOAD_MISSES.WALK_COMPLETED + ITLB_MISSES.WALK_COMPLETED ) ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
+        "MetricExpr": "1000000000 * ( cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x35\\,umask\\=0x3\\,filter_opc\\=0x182@ ) / ( cbox_0@event\\=0x0@ / duration_time )",
+        "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_Lat",
+        "MetricName": "DRAM_Read_Latency"
+    },
+    {
+        "MetricExpr": "cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182\\,thresh\\=1@",
+        "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_Parallel_Reads"
+    },
+    {
+        "MetricExpr": "cbox_0@event\\=0x0@",
+        "BriefDescription": "Socket actual clocks when any core is active on that socket",
+        "MetricGroup": "",
+        "MetricName": "Socket_CLKS"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 71e9737f4614d..1a1a3501180ab 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -1,164 +1,394 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
-        "MetricGroup": "Frontend",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ))",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL  - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "Branch_Misprediction_Cost"
+    },
+    {
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts_SMT",
+        "MetricName": "Branch_Misprediction_Cost_SMT"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Access_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
+        "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+        "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_Lat",
+        "MetricName": "DRAM_Read_Latency"
+    },
+    {
+        "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@",
+        "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_Parallel_Reads"
+    },
+    {
+        "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 1 == 1 else 0 else 0",
+        "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
+        "MetricGroup": "Memory_Lat",
+        "MetricName": "MEM_PMM_Read_Latency"
+    },
+    {
+        "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 1 == 1 else 0 else 0",
+        "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "PMM_Read_BW"
+    },
+    {
+        "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 1 == 1 else 0 else 0",
+        "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "PMM_Write_BW"
+    },
+    {
+        "MetricExpr": "cha_0@event\\=0x0@",
+        "BriefDescription": "Socket actual clocks when any core is active on that socket",
+        "MetricGroup": "",
+        "MetricName": "Socket_CLKS"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
index 5ab5c78fe5805..21b27488b6214 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -1,158 +1,322 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
         "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
-        "MetricGroup": "Frontend",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
     },
     {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index 5ab5c78fe5805..e5aac148c9419 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -1,158 +1,340 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
         "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
-        "MetricGroup": "Frontend",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL  - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
     },
     {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
+        "MetricExpr": "1000000000 * ( cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x35\\,umask\\=0x3\\,filter_opc\\=0x182@ ) / ( cbox_0@event\\=0x0@ / duration_time )",
+        "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_Lat",
+        "MetricName": "DRAM_Read_Latency"
+    },
+    {
+        "MetricExpr": "cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182\\,thresh\\=1@",
+        "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_Parallel_Reads"
+    },
+    {
+        "MetricExpr": "cbox_0@event\\=0x0@",
+        "BriefDescription": "Socket actual clocks when any core is active on that socket",
+        "MetricGroup": "",
+        "MetricName": "Socket_CLKS"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index 7c2679514efb4..bc4d5fc284a00 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -1,164 +1,340 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
-        "MetricGroup": "Frontend",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
     },
     {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
index 7c2679514efb4..f3874b5f99953 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -1,164 +1,346 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
-        "MetricGroup": "Frontend",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
     },
     {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / cycles",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
+        "MetricExpr": "cbox_0@event\\=0x0@",
+        "BriefDescription": "Socket actual clocks when any core is active on that socket",
+        "MetricGroup": "",
+        "MetricName": "Socket_CLKS"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index fd7d7c438226b..98c73e430b05b 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -1,140 +1,232 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
-        "MetricGroup": "Frontend",
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
+        "MetricExpr": "cbox_0@event\\=0x0@",
+        "BriefDescription": "Socket actual clocks when any core is active on that socket",
+        "MetricGroup": "",
+        "MetricName": "Socket_CLKS"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
index fd7d7c438226b..cfeba5067bab5 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -1,140 +1,226 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
-        "MetricGroup": "Frontend",
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4 ) )",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index 71e9737f4614d..2c95417a4dae1 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -1,164 +1,364 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
-        "MetricGroup": "Frontend",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ))",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL  - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "Branch_Misprediction_Cost"
     },
     {
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts_SMT",
+        "MetricName": "Branch_Misprediction_Cost_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Access_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
+        "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,thresh\\=1@",
+        "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_Parallel_Reads"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 71e9737f4614d..56e03ba771f48 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -1,164 +1,394 @@
 [
     {
-        "BriefDescription": "Instructions Per Cycle (per logical thread)",
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound"
+    },
+    {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Frontend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation"
+    },
+    {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Bad_Speculation_SMT"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound"
+    },
+    {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Backend_Bound_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring"
+    },
+    {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+        "MetricGroup": "TopdownL1_SMT",
+        "MetricName": "Retiring_SMT"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Instructions Per Cycle (per logical thread)",
         "MetricGroup": "TopDownL1",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
-        "MetricGroup": "Pipeline",
+        "BriefDescription": "Uops Per Instruction",
+        "MetricGroup": "Pipeline;Retiring",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
-        "MetricGroup": "Frontend",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "IpTB"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTB"
+    },
+    {
+        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
+        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
+        "MetricGroup": "PGO",
         "MetricName": "IFetch_Line_Utilization"
     },
     {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
-        "MetricGroup": "DSB; Frontend_Bandwidth",
+        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ))",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Frontend_Bandwidth",
         "MetricName": "DSB_Coverage"
     },
     {
-        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+        "BriefDescription": "Cycles Per Instruction (threaded)",
         "MetricGroup": "Pipeline;Summary",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-thread actual clocks when the logical processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots",
-        "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+        "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total number of retired Instructions",
+        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Total issue-pipeline slots (per core)",
+        "MetricGroup": "TopDownL1_SMT",
+        "MetricName": "SLOTS_SMT"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means loads are more frequent)",
+        "MetricGroup": "Instruction_Type;L1_Bound",
+        "MetricName": "IpL"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store",
+        "MetricGroup": "Instruction_Type;Store_Bound",
+        "MetricName": "IpS"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch",
+        "MetricGroup": "Branches;Instruction_Type;Port_5;Port_6",
+        "MetricName": "IpB"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instruction per (near) call",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
         "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
         "MetricGroup": "Summary",
         "MetricName": "Instructions"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / cycles",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
         "MetricGroup": "SMT",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT",
+        "MetricName": "CoreIPC_SMT"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS",
+        "MetricName": "FLOPc"
+    },
+    {
+        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricGroup": "FLOPS_SMT",
+        "MetricName": "FLOPc_SMT"
+    },
+    {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
         "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
-        "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL  - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
-        "MetricGroup": "Unknown_Branches",
-        "MetricName": "BAClear_Cost"
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "Branch_Misprediction_Cost"
+    },
+    {
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per branch misprediction (jeclear and baclear)",
+        "MetricGroup": "Branch_Mispredicts_SMT",
+        "MetricName": "Branch_Misprediction_Cost_SMT"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
+        "MetricGroup": "Branch_Mispredicts",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Core actual clocks when any thread is active on the physical core",
-        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-thread)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
         "MetricGroup": "TLB",
         "MetricName": "Page_Walks_Utilization"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricGroup": "TLB_SMT",
+        "MetricName": "Page_Walks_Utilization_SMT"
+    },
+    {
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L1D_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L2_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Fill_BW"
+    },
+    {
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "L3_Cache_Access_BW"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L1MPKI"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI"
+    },
+    {
+        "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2MPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L2HPKI_All"
+    },
+    {
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricGroup": "Cache_Misses;",
+        "MetricName": "L3MPKI"
+    },
+    {
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "BriefDescription": "Average CPU Utilization",
         "MetricGroup": "Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
         "MetricGroup": "FLOPS;Summary",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+        "BriefDescription": "Fraction of cycles where both hardware threads were active",
         "MetricGroup": "SMT;Summary",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
+        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_BW_Use"
+    },
+    {
+        "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+        "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_Lat",
+        "MetricName": "DRAM_Read_Latency"
+    },
+    {
+        "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@",
+        "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "DRAM_Parallel_Reads"
+    },
+    {
+        "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0",
+        "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
+        "MetricGroup": "Memory_Lat",
+        "MetricName": "MEM_PMM_Read_Latency"
+    },
+    {
+        "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
+        "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "PMM_Read_BW"
+    },
+    {
+        "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
+        "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+        "MetricGroup": "Memory_BW",
+        "MetricName": "PMM_Write_BW"
+    },
+    {
+        "MetricExpr": "cha_0@event\\=0x0@",
+        "BriefDescription": "Socket actual clocks when any core is active on that socket",
+        "MetricGroup": "",
+        "MetricName": "Socket_CLKS"
+    },
+    {
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per core",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per core",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per core",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C2 residency percent per package",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C3 residency percent per package",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C6 residency percent per package",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
         "MetricGroup": "Power",
+        "BriefDescription": "C7 residency percent per package",
         "MetricName": "C7_Pkg_Residency"
     }
 ]
-- 
GitLab


From 8313fe2d685da168b732421f85714cfd702d2141 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:36:31 -0700
Subject: [PATCH 0434/1861] perf vendor events intel: Update Broadwell events
 to v23

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/broadwell/cache.json  | 1630 ++++++++--------
 .../arch/x86/broadwell/floating-point.json    |   51 +-
 .../arch/x86/broadwell/frontend.json          |    4 +-
 .../pmu-events/arch/x86/broadwell/memory.json | 1640 ++++++++---------
 .../arch/x86/broadwell/pipeline.json          |   36 +-
 5 files changed, 1685 insertions(+), 1676 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/broadwell/cache.json b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
index 0b080b0352d84..7938bf5689aba 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
@@ -56,10 +56,10 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache.",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
+        "UMask": "0xc1",
         "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "Demand Data Read requests that hit L2 cache",
@@ -68,7 +68,7 @@
     {
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x42",
+        "UMask": "0xc2",
         "EventName": "L2_RQSTS.RFO_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "RFO requests that hit L2 cache.",
@@ -77,7 +77,7 @@
     {
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x44",
+        "UMask": "0xc4",
         "EventName": "L2_RQSTS.CODE_RD_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
@@ -87,7 +87,7 @@
         "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x50",
+        "UMask": "0xd0",
         "EventName": "L2_RQSTS.L2_PF_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "L2 prefetch requests that hit L2 cache",
@@ -433,7 +433,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
         "EventCode": "0xD0",
         "Counter": "0,1,2,3",
         "UMask": "0x41",
@@ -445,7 +445,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
         "EventCode": "0xD0",
         "Counter": "0,1,2,3",
         "UMask": "0x42",
@@ -771,2628 +771,2628 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010001 ",
+        "MSRValue": "0x0000010001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that have any response type.",
+        "BriefDescription": "Counts demand data reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020001 ",
+        "MSRValue": "0x0080020001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020001 ",
+        "MSRValue": "0x0100020001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020001 ",
+        "MSRValue": "0x0200020001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020001 ",
+        "MSRValue": "0x0400020001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020001 ",
+        "MSRValue": "0x1000020001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020001 ",
+        "MSRValue": "0x3F80020001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0001 ",
+        "MSRValue": "0x00803C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0001 ",
+        "MSRValue": "0x01003C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0001 ",
+        "MSRValue": "0x02003C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0001 ",
+        "MSRValue": "0x04003C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0001 ",
+        "MSRValue": "0x10003C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0001 ",
+        "MSRValue": "0x3F803C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3.",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010002 ",
+        "MSRValue": "0x0000010002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
+        "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0002 ",
+        "MSRValue": "0x00803C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0002 ",
+        "MSRValue": "0x01003C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0002 ",
+        "MSRValue": "0x02003C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0002 ",
+        "MSRValue": "0x04003C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0002 ",
+        "MSRValue": "0x10003C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0002 ",
+        "MSRValue": "0x3F803C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010004 ",
+        "MSRValue": "0x0000010004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that have any response type.",
+        "BriefDescription": "Counts all demand code reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020004 ",
+        "MSRValue": "0x0080020004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020004 ",
+        "MSRValue": "0x0100020004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020004 ",
+        "MSRValue": "0x0200020004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020004 ",
+        "MSRValue": "0x0400020004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020004 ",
+        "MSRValue": "0x1000020004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020004 ",
+        "MSRValue": "0x3F80020004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0004 ",
+        "MSRValue": "0x00803C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0004 ",
+        "MSRValue": "0x01003C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0004 ",
+        "MSRValue": "0x02003C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0004 ",
+        "MSRValue": "0x04003C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0004 ",
+        "MSRValue": "0x10003C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0004 ",
+        "MSRValue": "0x3F803C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that hit in the L3.",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive) have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010008 ",
+        "MSRValue": "0x0000010008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that have any response type.",
+        "BriefDescription": "Counts writebacks (modified to exclusive) have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020008 ",
+        "MSRValue": "0x0080020008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020008 ",
+        "MSRValue": "0x0100020008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020008 ",
+        "MSRValue": "0x0200020008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020008 ",
+        "MSRValue": "0x0400020008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020008 ",
+        "MSRValue": "0x1000020008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020008 ",
+        "MSRValue": "0x3F80020008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0008 ",
+        "MSRValue": "0x00803C0008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0008 ",
+        "MSRValue": "0x01003C0008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0008 ",
+        "MSRValue": "0x02003C0008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0008 ",
+        "MSRValue": "0x04003C0008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0008 ",
+        "MSRValue": "0x10003C0008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0008 ",
+        "MSRValue": "0x3F803C0008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3.",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010010 ",
+        "MSRValue": "0x0000010010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020010 ",
+        "MSRValue": "0x0080020010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020010 ",
+        "MSRValue": "0x0100020010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020010 ",
+        "MSRValue": "0x0200020010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020010 ",
+        "MSRValue": "0x0400020010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020010 ",
+        "MSRValue": "0x1000020010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020010 ",
+        "MSRValue": "0x3F80020010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0010 ",
+        "MSRValue": "0x00803C0010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0010 ",
+        "MSRValue": "0x01003C0010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0010 ",
+        "MSRValue": "0x02003C0010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0010 ",
+        "MSRValue": "0x04003C0010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0010 ",
+        "MSRValue": "0x10003C0010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0010 ",
+        "MSRValue": "0x3F803C0010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010020 ",
+        "MSRValue": "0x0000010020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020020 ",
+        "MSRValue": "0x0080020020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020020 ",
+        "MSRValue": "0x0100020020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020020 ",
+        "MSRValue": "0x0200020020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020020 ",
+        "MSRValue": "0x0400020020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020020 ",
+        "MSRValue": "0x1000020020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020020 ",
+        "MSRValue": "0x3F80020020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0020 ",
+        "MSRValue": "0x00803C0020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0020 ",
+        "MSRValue": "0x01003C0020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0020 ",
+        "MSRValue": "0x02003C0020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0020 ",
+        "MSRValue": "0x04003C0020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0020 ",
+        "MSRValue": "0x10003C0020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0020 ",
+        "MSRValue": "0x3F803C0020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010040 ",
+        "MSRValue": "0x0000010040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020040 ",
+        "MSRValue": "0x0080020040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020040 ",
+        "MSRValue": "0x0100020040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020040 ",
+        "MSRValue": "0x0200020040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020040 ",
+        "MSRValue": "0x0400020040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020040 ",
+        "MSRValue": "0x1000020040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020040 ",
+        "MSRValue": "0x3F80020040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0040 ",
+        "MSRValue": "0x00803C0040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0040 ",
+        "MSRValue": "0x01003C0040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0040 ",
+        "MSRValue": "0x02003C0040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0040 ",
+        "MSRValue": "0x04003C0040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0040 ",
+        "MSRValue": "0x10003C0040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0040 ",
+        "MSRValue": "0x3F803C0040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010080 ",
+        "MSRValue": "0x0000010080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020080 ",
+        "MSRValue": "0x0080020080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020080 ",
+        "MSRValue": "0x0100020080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020080 ",
+        "MSRValue": "0x0200020080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020080 ",
+        "MSRValue": "0x0400020080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020080 ",
+        "MSRValue": "0x1000020080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020080 ",
+        "MSRValue": "0x3F80020080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0080 ",
+        "MSRValue": "0x00803C0080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0080 ",
+        "MSRValue": "0x01003C0080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0080 ",
+        "MSRValue": "0x02003C0080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0080 ",
+        "MSRValue": "0x04003C0080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0080 ",
+        "MSRValue": "0x10003C0080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0080 ",
+        "MSRValue": "0x3F803C0080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010100 ",
+        "MSRValue": "0x0000010100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020100 ",
+        "MSRValue": "0x0080020100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020100 ",
+        "MSRValue": "0x0100020100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020100 ",
+        "MSRValue": "0x0200020100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020100 ",
+        "MSRValue": "0x0400020100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020100 ",
+        "MSRValue": "0x1000020100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020100 ",
+        "MSRValue": "0x3F80020100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0100 ",
+        "MSRValue": "0x00803C0100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0100 ",
+        "MSRValue": "0x01003C0100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0100 ",
+        "MSRValue": "0x02003C0100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0100 ",
+        "MSRValue": "0x04003C0100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0100 ",
+        "MSRValue": "0x10003C0100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0100 ",
+        "MSRValue": "0x3F803C0100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010200 ",
+        "MSRValue": "0x0000010200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020200 ",
+        "MSRValue": "0x0080020200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020200 ",
+        "MSRValue": "0x0100020200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020200 ",
+        "MSRValue": "0x0200020200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020200 ",
+        "MSRValue": "0x0400020200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020200 ",
+        "MSRValue": "0x1000020200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020200 ",
+        "MSRValue": "0x3F80020200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0200 ",
+        "MSRValue": "0x00803C0200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0200 ",
+        "MSRValue": "0x01003C0200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0200 ",
+        "MSRValue": "0x02003C0200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0200 ",
+        "MSRValue": "0x04003C0200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0200 ",
+        "MSRValue": "0x10003C0200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0200 ",
+        "MSRValue": "0x3F803C0200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000018000 ",
+        "MSRValue": "0x0000018000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that have any response type.",
+        "BriefDescription": "Counts any other requests have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080028000 ",
+        "MSRValue": "0x0080028000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100028000 ",
+        "MSRValue": "0x0100028000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200028000 ",
+        "MSRValue": "0x0200028000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400028000 ",
+        "MSRValue": "0x0400028000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000028000 ",
+        "MSRValue": "0x1000028000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80028000 ",
+        "MSRValue": "0x3F80028000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c8000 ",
+        "MSRValue": "0x00803C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c8000 ",
+        "MSRValue": "0x01003C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c8000 ",
+        "MSRValue": "0x02003C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c8000 ",
+        "MSRValue": "0x04003C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c8000 ",
+        "MSRValue": "0x10003C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c8000 ",
+        "MSRValue": "0x3F803C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that hit in the L3.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010090 ",
+        "MSRValue": "0x0000010090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that have any response type.",
+        "BriefDescription": "Counts all prefetch data reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020090 ",
+        "MSRValue": "0x0080020090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020090 ",
+        "MSRValue": "0x0100020090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020090 ",
+        "MSRValue": "0x0200020090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020090 ",
+        "MSRValue": "0x0400020090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020090 ",
+        "MSRValue": "0x1000020090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020090 ",
+        "MSRValue": "0x3F80020090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0090 ",
+        "MSRValue": "0x00803C0090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0090 ",
+        "MSRValue": "0x01003C0090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0090 ",
+        "MSRValue": "0x02003C0090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0090 ",
+        "MSRValue": "0x04003C0090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0090 ",
+        "MSRValue": "0x10003C0090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0090 ",
+        "MSRValue": "0x3F803C0090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010120 ",
+        "MSRValue": "0x0000010120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that have any response type.",
+        "BriefDescription": "Counts prefetch RFOs have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020120 ",
+        "MSRValue": "0x0080020120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020120 ",
+        "MSRValue": "0x0100020120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020120 ",
+        "MSRValue": "0x0200020120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020120 ",
+        "MSRValue": "0x0400020120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020120 ",
+        "MSRValue": "0x1000020120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020120 ",
+        "MSRValue": "0x3F80020120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0120 ",
+        "MSRValue": "0x00803C0120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0120 ",
+        "MSRValue": "0x01003C0120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0120 ",
+        "MSRValue": "0x02003C0120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0120 ",
+        "MSRValue": "0x04003C0120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0120 ",
+        "MSRValue": "0x10003C0120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0120 ",
+        "MSRValue": "0x3F803C0120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010240 ",
+        "MSRValue": "0x0000010240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that have any response type.",
+        "BriefDescription": "Counts all prefetch code reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020240 ",
+        "MSRValue": "0x0080020240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020240 ",
+        "MSRValue": "0x0100020240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020240 ",
+        "MSRValue": "0x0200020240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020240 ",
+        "MSRValue": "0x0400020240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020240 ",
+        "MSRValue": "0x1000020240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020240 ",
+        "MSRValue": "0x3F80020240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0240 ",
+        "MSRValue": "0x00803C0240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0240 ",
+        "MSRValue": "0x01003C0240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0240 ",
+        "MSRValue": "0x02003C0240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0240 ",
+        "MSRValue": "0x04003C0240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0240 ",
+        "MSRValue": "0x10003C0240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0240 ",
+        "MSRValue": "0x3F803C0240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that hit in the L3.",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010091 ",
+        "MSRValue": "0x0000010091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
+        "BriefDescription": "Counts all demand & prefetch data reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020091 ",
+        "MSRValue": "0x0080020091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020091 ",
+        "MSRValue": "0x0100020091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020091 ",
+        "MSRValue": "0x0200020091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020091 ",
+        "MSRValue": "0x0400020091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020091 ",
+        "MSRValue": "0x1000020091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020091 ",
+        "MSRValue": "0x3F80020091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0091 ",
+        "MSRValue": "0x00803C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0091 ",
+        "MSRValue": "0x01003C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0091 ",
+        "MSRValue": "0x02003C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0091 ",
+        "MSRValue": "0x04003C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0091 ",
+        "MSRValue": "0x10003C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0091 ",
+        "MSRValue": "0x3F803C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010122 ",
+        "MSRValue": "0x0000010122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
+        "BriefDescription": "Counts all demand & prefetch RFOs have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020122 ",
+        "MSRValue": "0x0080020122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020122 ",
+        "MSRValue": "0x0100020122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020122 ",
+        "MSRValue": "0x0200020122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020122 ",
+        "MSRValue": "0x0400020122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020122 ",
+        "MSRValue": "0x1000020122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f80020122 ",
+        "MSRValue": "0x3F80020122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00803c0122 ",
+        "MSRValue": "0x00803C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01003c0122 ",
+        "MSRValue": "0x01003C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02003c0122 ",
+        "MSRValue": "0x02003C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0122 ",
+        "MSRValue": "0x04003C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0122 ",
+        "MSRValue": "0x10003C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0122 ",
+        "MSRValue": "0x3F803C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
index 689d478dae93b..15291239c1285 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
@@ -1,24 +1,26 @@
 [
     {
-        "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.",
         "EventCode": "0xC1",
         "Counter": "0,1,2,3",
         "UMask": "0x8",
         "Errata": "BDM30",
         "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable (Precise Event)",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.",
         "EventCode": "0xC1",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
         "Errata": "BDM30",
         "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "BriefDescription": "Number of transitions from legacy SSE to AVX-256 when penalty applicable (Precise Event)",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -45,7 +47,7 @@
         "UMask": "0x3",
         "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. (RSQRT for single precision?)",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -54,7 +56,7 @@
         "UMask": "0x4",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -63,7 +65,7 @@
         "UMask": "0x8",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -72,7 +74,7 @@
         "UMask": "0x10",
         "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -81,7 +83,7 @@
         "UMask": "0x15",
         "EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
         "SampleAfterValue": "2000006",
-        "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.  ?.",
+        "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -90,7 +92,7 @@
         "UMask": "0x20",
         "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -99,7 +101,7 @@
         "UMask": "0x2a",
         "EventName": "FP_ARITH_INST_RETIRED.SINGLE",
         "SampleAfterValue": "2000005",
-        "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+        "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -108,57 +110,62 @@
         "UMask": "0x3c",
         "EventName": "FP_ARITH_INST_RETIRED.PACKED",
         "SampleAfterValue": "2000004",
-        "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. (RSQRT for single-precision?)",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
         "EventName": "FP_ASSIST.X87_OUTPUT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of X87 assists due to output value.",
+        "BriefDescription": "output - Numeric Overflow, Numeric Underflow, Inexact Result  (Precise Event)",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
         "EventName": "FP_ASSIST.X87_INPUT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of X87 assists due to input value.",
+        "BriefDescription": "input - Invalid Operation, Denormal Operand, SNaN Operand  (Precise Event)",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x8",
         "EventName": "FP_ASSIST.SIMD_OUTPUT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "BriefDescription": "SSE* FP micro-code assist when output value is invalid. (Precise Event)",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts any input SSE* floating-point (FP) assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
         "EventName": "FP_ASSIST.SIMD_INPUT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "BriefDescription": "Any input SSE* FP Assist -   (Precise Event)",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "PEBS": "1",
+        "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1. Uses PEBS.",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x1e",
         "EventName": "FP_ASSIST.ANY",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "BriefDescription": "Counts any FP_ASSIST umask was incrementing   (Precise Event)",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json
index 7142c76d7f115..aa4a5d762f212 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json
@@ -211,7 +211,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.",
         "EventCode": "0x9C",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
@@ -274,7 +274,7 @@
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.",
         "EventCode": "0xAB",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/memory.json b/tools/perf/pmu-events/arch/x86/broadwell/memory.json
index c9154cebbdf0c..b6b5247d3d5a7 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/memory.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/memory.json
@@ -311,7 +311,7 @@
     },
     {
         "PEBS": "2",
-        "PublicDescription": "This event counts loads with latency value being above four.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above four.",
         "EventCode": "0xCD",
         "MSRValue": "0x4",
         "Counter": "3",
@@ -320,13 +320,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Loads with latency value being above 4",
+        "BriefDescription": "Randomly selected loads with latency value being above 4",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "This event counts loads with latency value being above eight.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above eight.",
         "EventCode": "0xCD",
         "MSRValue": "0x8",
         "Counter": "3",
@@ -335,13 +335,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "50021",
-        "BriefDescription": "Loads with latency value being above 8",
+        "BriefDescription": "Randomly selected loads with latency value being above 8",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "This event counts loads with latency value being above 16.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 16.",
         "EventCode": "0xCD",
         "MSRValue": "0x10",
         "Counter": "3",
@@ -350,13 +350,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "20011",
-        "BriefDescription": "Loads with latency value being above 16",
+        "BriefDescription": "Randomly selected loads with latency value being above 16",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "This event counts loads with latency value being above 32.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 32.",
         "EventCode": "0xCD",
         "MSRValue": "0x20",
         "Counter": "3",
@@ -365,13 +365,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Loads with latency value being above 32",
+        "BriefDescription": "Randomly selected loads with latency value being above 32",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "This event counts loads with latency value being above 64.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 64.",
         "EventCode": "0xCD",
         "MSRValue": "0x40",
         "Counter": "3",
@@ -380,13 +380,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "2003",
-        "BriefDescription": "Loads with latency value being above 64",
+        "BriefDescription": "Randomly selected loads with latency value being above 64",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "This event counts loads with latency value being above 128.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 128.",
         "EventCode": "0xCD",
         "MSRValue": "0x80",
         "Counter": "3",
@@ -395,13 +395,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "1009",
-        "BriefDescription": "Loads with latency value being above 128",
+        "BriefDescription": "Randomly selected loads with latency value being above 128",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "This event counts loads with latency value being above 256.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 256.",
         "EventCode": "0xCD",
         "MSRValue": "0x100",
         "Counter": "3",
@@ -410,13 +410,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "503",
-        "BriefDescription": "Loads with latency value being above 256",
+        "BriefDescription": "Randomly selected loads with latency value being above 256",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "This event counts loads with latency value being above 512.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 512.",
         "EventCode": "0xCD",
         "MSRValue": "0x200",
         "Counter": "3",
@@ -425,2620 +425,2620 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "101",
-        "BriefDescription": "Loads with latency value being above 512",
+        "BriefDescription": "Randomly selected loads with latency value being above 512",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020001 ",
+        "MSRValue": "0x2000020001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0001 ",
+        "MSRValue": "0x20003C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000001 ",
+        "MSRValue": "0x0084000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000001 ",
+        "MSRValue": "0x0104000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000001 ",
+        "MSRValue": "0x0204000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000001 ",
+        "MSRValue": "0x0404000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000001 ",
+        "MSRValue": "0x1004000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000001 ",
+        "MSRValue": "0x2004000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000001 ",
+        "MSRValue": "0x3F84000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000001 ",
+        "MSRValue": "0x00BC000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000001 ",
+        "MSRValue": "0x013C000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000001 ",
+        "MSRValue": "0x023C000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000001 ",
+        "MSRValue": "0x043C000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0002 ",
+        "MSRValue": "0x20003C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000002 ",
+        "MSRValue": "0x3F84000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000002 ",
+        "MSRValue": "0x00BC000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000002 ",
+        "MSRValue": "0x013C000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000002 ",
+        "MSRValue": "0x023C000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000002 ",
+        "MSRValue": "0x043C000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020004 ",
+        "MSRValue": "0x2000020004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0004 ",
+        "MSRValue": "0x20003C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000004 ",
+        "MSRValue": "0x0084000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000004 ",
+        "MSRValue": "0x0104000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000004 ",
+        "MSRValue": "0x0204000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000004 ",
+        "MSRValue": "0x0404000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000004 ",
+        "MSRValue": "0x1004000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000004 ",
+        "MSRValue": "0x2004000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000004 ",
+        "MSRValue": "0x3F84000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000004 ",
+        "MSRValue": "0x00BC000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000004 ",
+        "MSRValue": "0x013C000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000004 ",
+        "MSRValue": "0x023C000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000004 ",
+        "MSRValue": "0x043C000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020008 ",
+        "MSRValue": "0x2000020008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0008 ",
+        "MSRValue": "0x20003C0008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000008 ",
+        "MSRValue": "0x0084000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000008 ",
+        "MSRValue": "0x0104000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000008 ",
+        "MSRValue": "0x0204000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000008 ",
+        "MSRValue": "0x0404000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000008 ",
+        "MSRValue": "0x1004000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000008 ",
+        "MSRValue": "0x2004000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000008 ",
+        "MSRValue": "0x3F84000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000008 ",
+        "MSRValue": "0x00BC000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000008 ",
+        "MSRValue": "0x013C000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000008 ",
+        "MSRValue": "0x023C000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts writebacks (modified to exclusive)",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000008 ",
+        "MSRValue": "0x043C000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "COREWB & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts writebacks (modified to exclusive)",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020010 ",
+        "MSRValue": "0x2000020010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0010 ",
+        "MSRValue": "0x20003C0010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000010 ",
+        "MSRValue": "0x0084000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000010 ",
+        "MSRValue": "0x0104000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000010 ",
+        "MSRValue": "0x0204000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000010 ",
+        "MSRValue": "0x0404000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000010 ",
+        "MSRValue": "0x1004000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000010 ",
+        "MSRValue": "0x2004000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000010 ",
+        "MSRValue": "0x3F84000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000010 ",
+        "MSRValue": "0x00BC000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000010 ",
+        "MSRValue": "0x013C000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000010 ",
+        "MSRValue": "0x023C000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000010 ",
+        "MSRValue": "0x043C000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020020 ",
+        "MSRValue": "0x2000020020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0020 ",
+        "MSRValue": "0x20003C0020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000020 ",
+        "MSRValue": "0x0084000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000020 ",
+        "MSRValue": "0x0104000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000020 ",
+        "MSRValue": "0x0204000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000020 ",
+        "MSRValue": "0x0404000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000020 ",
+        "MSRValue": "0x1004000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000020 ",
+        "MSRValue": "0x2004000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000020 ",
+        "MSRValue": "0x3F84000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000020 ",
+        "MSRValue": "0x00BC000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000020 ",
+        "MSRValue": "0x013C000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000020 ",
+        "MSRValue": "0x023C000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000020 ",
+        "MSRValue": "0x043C000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020040 ",
+        "MSRValue": "0x2000020040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0040 ",
+        "MSRValue": "0x20003C0040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000040 ",
+        "MSRValue": "0x0084000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000040 ",
+        "MSRValue": "0x0104000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000040 ",
+        "MSRValue": "0x0204000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000040 ",
+        "MSRValue": "0x0404000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000040 ",
+        "MSRValue": "0x1004000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000040 ",
+        "MSRValue": "0x2004000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000040 ",
+        "MSRValue": "0x3F84000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000040 ",
+        "MSRValue": "0x00BC000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000040 ",
+        "MSRValue": "0x013C000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000040 ",
+        "MSRValue": "0x023C000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000040 ",
+        "MSRValue": "0x043C000040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L2_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020080 ",
+        "MSRValue": "0x2000020080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0080 ",
+        "MSRValue": "0x20003C0080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000080 ",
+        "MSRValue": "0x0084000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000080 ",
+        "MSRValue": "0x0104000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000080 ",
+        "MSRValue": "0x0204000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000080 ",
+        "MSRValue": "0x0404000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000080 ",
+        "MSRValue": "0x1004000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000080 ",
+        "MSRValue": "0x2004000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000080 ",
+        "MSRValue": "0x3F84000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000080 ",
+        "MSRValue": "0x00BC000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000080 ",
+        "MSRValue": "0x013C000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000080 ",
+        "MSRValue": "0x023C000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000080 ",
+        "MSRValue": "0x043C000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020100 ",
+        "MSRValue": "0x2000020100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0100 ",
+        "MSRValue": "0x20003C0100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000100 ",
+        "MSRValue": "0x0084000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000100 ",
+        "MSRValue": "0x0104000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000100 ",
+        "MSRValue": "0x0204000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000100 ",
+        "MSRValue": "0x0404000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000100 ",
+        "MSRValue": "0x1004000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000100 ",
+        "MSRValue": "0x2004000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000100 ",
+        "MSRValue": "0x3F84000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000100 ",
+        "MSRValue": "0x00BC000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000100 ",
+        "MSRValue": "0x013C000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000100 ",
+        "MSRValue": "0x023C000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000100 ",
+        "MSRValue": "0x043C000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020200 ",
+        "MSRValue": "0x2000020200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0200 ",
+        "MSRValue": "0x20003C0200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000200 ",
+        "MSRValue": "0x0084000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000200 ",
+        "MSRValue": "0x0104000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000200 ",
+        "MSRValue": "0x0204000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000200 ",
+        "MSRValue": "0x0404000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000200 ",
+        "MSRValue": "0x1004000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000200 ",
+        "MSRValue": "0x2004000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000200 ",
+        "MSRValue": "0x3F84000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000200 ",
+        "MSRValue": "0x00BC000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000200 ",
+        "MSRValue": "0x013C000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000200 ",
+        "MSRValue": "0x023C000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000200 ",
+        "MSRValue": "0x043C000200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "PF_L3_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000028000 ",
+        "MSRValue": "0x2000028000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c8000 ",
+        "MSRValue": "0x20003C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084008000 ",
+        "MSRValue": "0x0084008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104008000 ",
+        "MSRValue": "0x0104008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204008000 ",
+        "MSRValue": "0x0204008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404008000 ",
+        "MSRValue": "0x0404008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004008000 ",
+        "MSRValue": "0x1004008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004008000 ",
+        "MSRValue": "0x2004008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84008000 ",
+        "MSRValue": "0x3F84008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc008000 ",
+        "MSRValue": "0x00BC008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c008000 ",
+        "MSRValue": "0x013C008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts any other requests that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c008000 ",
+        "MSRValue": "0x023C008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c008000 ",
+        "MSRValue": "0x043C008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "OTHER & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020090 ",
+        "MSRValue": "0x2000020090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0090 ",
+        "MSRValue": "0x20003C0090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000090 ",
+        "MSRValue": "0x0084000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000090 ",
+        "MSRValue": "0x0104000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000090 ",
+        "MSRValue": "0x0204000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000090 ",
+        "MSRValue": "0x0404000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000090 ",
+        "MSRValue": "0x1004000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000090 ",
+        "MSRValue": "0x2004000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000090 ",
+        "MSRValue": "0x3F84000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000090 ",
+        "MSRValue": "0x00BC000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000090 ",
+        "MSRValue": "0x013C000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000090 ",
+        "MSRValue": "0x023C000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000090 ",
+        "MSRValue": "0x043C000090",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020120 ",
+        "MSRValue": "0x2000020120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0120 ",
+        "MSRValue": "0x20003C0120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000120 ",
+        "MSRValue": "0x0084000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000120 ",
+        "MSRValue": "0x0104000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000120 ",
+        "MSRValue": "0x0204000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000120 ",
+        "MSRValue": "0x0404000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000120 ",
+        "MSRValue": "0x1004000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000120 ",
+        "MSRValue": "0x2004000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000120 ",
+        "MSRValue": "0x3F84000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000120 ",
+        "MSRValue": "0x00BC000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000120 ",
+        "MSRValue": "0x013C000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000120 ",
+        "MSRValue": "0x023C000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000120 ",
+        "MSRValue": "0x043C000120",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020240 ",
+        "MSRValue": "0x2000020240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0240 ",
+        "MSRValue": "0x20003C0240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000240 ",
+        "MSRValue": "0x0084000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000240 ",
+        "MSRValue": "0x0104000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000240 ",
+        "MSRValue": "0x0204000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000240 ",
+        "MSRValue": "0x0404000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000240 ",
+        "MSRValue": "0x1004000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000240 ",
+        "MSRValue": "0x2004000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000240 ",
+        "MSRValue": "0x3F84000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000240 ",
+        "MSRValue": "0x00BC000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000240 ",
+        "MSRValue": "0x013C000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000240 ",
+        "MSRValue": "0x023C000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch code reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000240 ",
+        "MSRValue": "0x043C000240",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_PF_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all prefetch code reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020091 ",
+        "MSRValue": "0x2000020091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0091 ",
+        "MSRValue": "0x20003C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000091 ",
+        "MSRValue": "0x0084000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000091 ",
+        "MSRValue": "0x0104000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000091 ",
+        "MSRValue": "0x0204000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000091 ",
+        "MSRValue": "0x0404000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000091 ",
+        "MSRValue": "0x1004000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000091 ",
+        "MSRValue": "0x2004000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000091 ",
+        "MSRValue": "0x3F84000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000091 ",
+        "MSRValue": "0x00BC000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000091 ",
+        "MSRValue": "0x013C000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000091 ",
+        "MSRValue": "0x023C000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000091 ",
+        "MSRValue": "0x043C000091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand & prefetch data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020122 ",
+        "MSRValue": "0x2000020122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20003c0122 ",
+        "MSRValue": "0x20003C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address.",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000122 ",
+        "MSRValue": "0x0084000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000122 ",
+        "MSRValue": "0x0104000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000122 ",
+        "MSRValue": "0x0204000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000122 ",
+        "MSRValue": "0x0404000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000122 ",
+        "MSRValue": "0x1004000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000122 ",
+        "MSRValue": "0x2004000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f84000122 ",
+        "MSRValue": "0x3F84000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000122 ",
+        "MSRValue": "0x00BC000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information.",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000122 ",
+        "MSRValue": "0x013C000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000122 ",
+        "MSRValue": "0x023C000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response.",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000122 ",
+        "MSRValue": "0x043C000122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts all demand & prefetch RFOs",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
index 999cf30663639..bb25574b8d212 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
@@ -1,7 +1,6 @@
 [
     {
         "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 0",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
@@ -11,7 +10,6 @@
     },
     {
         "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
@@ -20,7 +18,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "AnyThread": "1",
@@ -31,7 +28,6 @@
     },
     {
         "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
@@ -317,7 +313,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+        "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
         "EventCode": "0x87",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
@@ -786,8 +782,8 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
-        "EventCode": "0xA2",
+        "PublicDescription": "This event counts resource-related stall cycles.",
+        "EventCode": "0xa2",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "RESOURCE_STALLS.ANY",
@@ -973,6 +969,7 @@
         "CounterHTOff": "2"
     },
     {
+        "PublicDescription": "Number of Uops delivered by the LSD.",
         "EventCode": "0xA8",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
@@ -1147,7 +1144,8 @@
         "CounterHTOff": "1"
     },
     {
-        "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
         "EventCode": "0xC0",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
@@ -1157,12 +1155,12 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "PEBS": "1",
         "EventCode": "0xC1",
         "Counter": "0,1,2,3",
         "UMask": "0x40",
         "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -1178,26 +1176,28 @@
         "Data_LA": "1"
     },
     {
-        "PublicDescription": "This event counts cycles without actually retired uops.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts cycles without actually retired uops.",
         "EventCode": "0xC2",
         "Invert": "1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "UOPS_RETIRED.STALL_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles without actually retired uops.",
+        "BriefDescription": "Cycles no executable uops retired (Precise Event)",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+        "PEBS": "1",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to  PEBS uops retired event.",
         "EventCode": "0xC2",
         "Invert": "1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "BriefDescription": "Number of cycles using always true condition applied to  PEBS uops retired event.",
         "CounterMask": "10",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1320,13 +1320,14 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.",
         "EventCode": "0xC4",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
         "EventName": "BR_INST_RETIRED.NOT_TAKEN",
         "SampleAfterValue": "400009",
-        "BriefDescription": "Not taken branch instructions retired.",
+        "BriefDescription": "Counts all not taken macro branch instructions retired. (Precise Event)",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -1341,14 +1342,15 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts far branch instructions retired.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
         "EventCode": "0xC4",
         "Counter": "0,1,2,3",
         "UMask": "0x40",
         "Errata": "BDW98",
         "EventName": "BR_INST_RETIRED.FAR_BRANCH",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Far branch instructions retired.",
+        "BriefDescription": "Counts the number of far branch instructions retired.(Precise Event)",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-- 
GitLab


From d2243329ef3c5107d00dee4327a9884a394ef715 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:37:33 -0700
Subject: [PATCH 0435/1861] perf vendor events intel: Update Broadwell-DE
 events to v7

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/x86/broadwellde/cache.json    | 4 ++--
 tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
index 4ad425312bdc4..bf243fe2a0ec3 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
@@ -439,7 +439,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -451,7 +451,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
         "SampleAfterValue": "100003",
         "L1_Hit_Indication": "1",
         "CounterHTOff": "0,1,2,3"
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json
index 0d04bf9db0008..e2f0540625a24 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json
@@ -1,6 +1,5 @@
 [
     {
-        "EventCode": "0x00",
         "UMask": "0x1",
         "BriefDescription": "Instructions retired from execution.",
         "Counter": "Fixed counter 0",
@@ -10,7 +9,6 @@
         "CounterHTOff": "Fixed counter 0"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x2",
         "BriefDescription": "Core cycles when the thread is not in halt state",
         "Counter": "Fixed counter 1",
@@ -20,7 +18,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x2",
         "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "Counter": "Fixed counter 1",
@@ -30,7 +27,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x3",
         "BriefDescription": "Reference cycles when the core is not in halt state.",
         "Counter": "Fixed counter 2",
@@ -322,7 +318,7 @@
         "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
         "Counter": "0,1,2,3",
         "EventName": "ILD_STALL.LCP",
-        "PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+        "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
-- 
GitLab


From 24339348b9153047c08b5879bc69d3327cb07783 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:38:43 -0700
Subject: [PATCH 0436/1861] perf vendor events intel: Update Skylake events to
 v42

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/skylake/cache.json    | 2193 ++++++++++++++++-
 .../pmu-events/arch/x86/skylake/frontend.json |   14 +-
 .../pmu-events/arch/x86/skylake/memory.json   | 1121 ++++++++-
 .../pmu-events/arch/x86/skylake/pipeline.json |   39 +-
 4 files changed, 3181 insertions(+), 186 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/skylake/cache.json b/tools/perf/pmu-events/arch/x86/skylake/cache.json
index 54bfe9e4045c7..720458139049c 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/cache.json
@@ -60,10 +60,10 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
+        "UMask": "0xc1",
         "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "Demand Data Read requests that hit L2 cache",
@@ -73,7 +73,7 @@
         "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x42",
+        "UMask": "0xc2",
         "EventName": "L2_RQSTS.RFO_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "RFO requests that hit L2 cache",
@@ -83,7 +83,7 @@
         "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x44",
+        "UMask": "0xc4",
         "EventName": "L2_RQSTS.CODE_RD_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
@@ -482,7 +482,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
         "EventCode": "0xD1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
@@ -554,7 +554,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
         "EventCode": "0xD1",
         "Counter": "0,1,2,3",
         "UMask": "0x40",
@@ -661,13 +661,13 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.",
+        "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
         "EventCode": "0xF2",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
         "EventName": "L2_LINES_OUT.USELESS_PREF",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -690,249 +690,2238 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fc0400001 ",
+        "MSRValue": "0x3FC0408000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000400001 ",
+        "MSRValue": "0x1000408000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400400001 ",
+        "MSRValue": "0x0400408000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200400001 ",
+        "MSRValue": "0x0200408000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100400001 ",
+        "MSRValue": "0x0100408000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080400001 ",
+        "MSRValue": "0x0080408000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fc01c0001 ",
+        "MSRValue": "0x0040408000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & ANY_SNOOP",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001c0001 ",
+        "MSRValue": "0x3FC01C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001c0001 ",
+        "MSRValue": "0x10001C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001c0001 ",
+        "MSRValue": "0x04001C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001c0001 ",
+        "MSRValue": "0x02001C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801c0001 ",
+        "MSRValue": "0x01001C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NONE",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fc0020001 ",
+        "MSRValue": "0x00801C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020001 ",
+        "MSRValue": "0x00401C8000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020001 ",
+        "MSRValue": "0x3FC0108000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020001 ",
+        "MSRValue": "0x1000108000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020001 ",
+        "MSRValue": "0x0400108000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts any other requests",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020001 ",
+        "MSRValue": "0x0200108000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100108000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080108000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040108000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0088000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000088000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400088000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200088000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100088000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080088000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040088000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0028000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000028000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400028000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200028000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100028000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080028000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040028000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests have any response type.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000018000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC01C0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10001C0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04001C0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02001C0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01001C0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00801C0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00401C0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0100004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000100004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400100004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200100004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100100004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080100004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040100004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0080004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000080004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400080004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200080004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100080004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080080004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040080004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0020004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040020004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC01C0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10001C0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04001C0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02001C0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01001C0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00801C0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00401C0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0100002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000100002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400100002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200100002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100100002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080100002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040100002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0080002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000080002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400080002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200080002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100080002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080080002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040080002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0020002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040020002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs) have any response type.",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0000010002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC01C0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x10001C0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x04001C0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x02001C0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x01001C0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00801C0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00401C0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0100001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000100001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400100001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200100001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100100001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080100001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040100001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0080001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000080001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400080001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200080001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100080001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080080001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040080001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC0020001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1000020001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0400020001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0200020001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0100020001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0080020001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0040020001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads have any response type.",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010001 ",
+        "MSRValue": "0x0000010001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that have any response type.",
+        "BriefDescription": "Counts demand data reads have any response type.",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
index 578dff5bd823c..7fa95a35e3cac 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
@@ -177,7 +177,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
         "EventCode": "0x9C",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
@@ -242,7 +242,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.",
         "EventCode": "0xAB",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
@@ -253,7 +253,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
         "EventCode": "0xC6",
         "MSRValue": "0x11",
         "Counter": "0,1,2,3",
@@ -360,7 +360,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
         "EventCode": "0xC6",
         "MSRValue": "0x400806",
         "Counter": "0,1,2,3",
@@ -374,7 +374,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
         "EventCode": "0xC6",
         "MSRValue": "0x401006",
         "Counter": "0,1,2,3",
@@ -388,7 +388,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end  after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end  after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
         "EventCode": "0xC6",
         "MSRValue": "0x402006",
         "Counter": "0,1,2,3",
@@ -454,7 +454,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
         "EventCode": "0xC6",
         "MSRValue": "0x100206",
         "Counter": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/skylake/memory.json b/tools/perf/pmu-events/arch/x86/skylake/memory.json
index 3bd8b712c889d..f197b4c7695be 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/memory.json
@@ -215,7 +215,7 @@
         "UMask": "0x4",
         "EventName": "HLE_RETIRED.ABORTED",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ",
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -237,6 +237,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
         "EventCode": "0xC8",
         "Counter": "0,1,2,3",
         "UMask": "0x20",
@@ -292,7 +293,7 @@
         "UMask": "0x4",
         "EventName": "RTM_RETIRED.ABORTED",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -346,7 +347,7 @@
     },
     {
         "PEBS": "2",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
         "EventCode": "0xCD",
         "MSRValue": "0x4",
         "Counter": "0,1,2,3",
@@ -354,13 +355,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
         "TakenAlone": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
         "EventCode": "0xCD",
         "MSRValue": "0x8",
         "Counter": "0,1,2,3",
@@ -368,13 +369,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "50021",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
         "TakenAlone": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
         "EventCode": "0xCD",
         "MSRValue": "0x10",
         "Counter": "0,1,2,3",
@@ -382,13 +383,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "20011",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
         "TakenAlone": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
         "EventCode": "0xCD",
         "MSRValue": "0x20",
         "Counter": "0,1,2,3",
@@ -396,13 +397,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
         "TakenAlone": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
         "EventCode": "0xCD",
         "MSRValue": "0x40",
         "Counter": "0,1,2,3",
@@ -410,13 +411,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "2003",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
         "TakenAlone": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
         "EventCode": "0xCD",
         "MSRValue": "0x80",
         "Counter": "0,1,2,3",
@@ -424,13 +425,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "1009",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
         "TakenAlone": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
         "EventCode": "0xCD",
         "MSRValue": "0x100",
         "Counter": "0,1,2,3",
@@ -438,13 +439,13 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "503",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
         "TakenAlone": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "PEBS": "2",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
         "EventCode": "0xCD",
         "MSRValue": "0x200",
         "Counter": "0,1,2,3",
@@ -452,163 +453,1151 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "101",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
         "TakenAlone": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts any other requests",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3ffc000001 ",
+        "MSRValue": "0x3FFC408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x203C408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x103C408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043C408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023C408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013C408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00BC408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x007C408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC4008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0044008000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000408000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20001C8000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000108000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000088000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000048000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts any other requests",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000028000",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts any other requests",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FFC400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x203C400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x103C400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043C400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023C400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013C400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00BC400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x007C400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC4000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0044000004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000400004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20001C0004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000100004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000080004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000040004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020004",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FFC400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x203C400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x103C400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x043C400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x023C400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x013C400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x00BC400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x007C400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FC4000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2004000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0404000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0204000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0104000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0084000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0044000002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000400002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20001C0002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000100002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000080002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000040002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020002",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x3FFC400001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103c000001 ",
+        "MSRValue": "0x203C400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x103C400001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HITM",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043c000001 ",
+        "MSRValue": "0x043C400001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023c000001 ",
+        "MSRValue": "0x023C400001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013c000001 ",
+        "MSRValue": "0x013C400001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00bc000001 ",
+        "MSRValue": "0x00BC400001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x007C400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NONE",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fc4000001 ",
+        "MSRValue": "0x3FC4000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000001 ",
+        "MSRValue": "0x2004000001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x1004000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000001 ",
+        "MSRValue": "0x0404000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000001 ",
+        "MSRValue": "0x0204000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000001 ",
+        "MSRValue": "0x0104000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000001 ",
+        "MSRValue": "0x0084000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x0044000001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000400001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x20001C0001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000100001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000080001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000040001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts demand data reads",
+        "Offcore": "1",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PublicDescription": "Counts demand data reads",
+        "EventCode": "0xB7, 0xBB",
+        "MSRValue": "0x2000020001",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+        "BriefDescription": "Counts demand data reads",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
index bc6d2afbcd8ac..4a891fbbc4bb2 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
@@ -1,7 +1,6 @@
 [
     {
         "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 0",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
@@ -11,7 +10,6 @@
     },
     {
         "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
@@ -20,7 +18,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "AnyThread": "1",
@@ -31,7 +28,6 @@
     },
     {
         "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
@@ -121,7 +117,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.",
         "EventCode": "0x0E",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
@@ -247,6 +243,16 @@
         "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
+        "EventCode": "0x59",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
         "EventCode": "0x5E",
@@ -361,8 +367,8 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
-        "EventCode": "0xA2",
+        "PublicDescription": "Counts resource-related stall cycles.",
+        "EventCode": "0xa2",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "RESOURCE_STALLS.ANY",
@@ -735,7 +741,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
         "EventCode": "0xC2",
         "Invert": "1",
         "Counter": "0,1,2,3",
@@ -759,6 +765,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "PublicDescription": "Number of machine clears (nukes) of any type.",
         "EventCode": "0xC3",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
@@ -839,14 +846,15 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.",
         "EventCode": "0xC4",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
         "Errata": "SKL091",
         "EventName": "BR_INST_RETIRED.NOT_TAKEN",
         "SampleAfterValue": "400009",
-        "BriefDescription": "Not taken branch instructions retired.",
+        "BriefDescription": "Counts all not taken macro branch instructions retired.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -924,7 +932,7 @@
         "UMask": "0x20",
         "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
         "SampleAfterValue": "400009",
-        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ",
+        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -937,6 +945,15 @@
         "BriefDescription": "Increments whenever there is an update to the LBR array.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0xCC",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
         "EventCode": "0xE6",
-- 
GitLab


From 19f2d40c57141bf586c5b4f6bd5d8fedf7b54f3e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:39:05 -0700
Subject: [PATCH 0437/1861] perf vendor events intel: Update SkylakeX events to
 v1.12

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/skylakex/cache.json   | 786 ++++++++----------
 .../arch/x86/skylakex/floating-point.json     |   2 -
 .../arch/x86/skylakex/frontend.json           | 234 +++---
 .../pmu-events/arch/x86/skylakex/memory.json  | 751 ++++++++---------
 .../arch/x86/skylakex/pipeline.json           | 173 ++--
 5 files changed, 899 insertions(+), 1047 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
index 5c9940866acd8..24df183693faa 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
@@ -61,17 +61,17 @@
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x41",
+        "UMask": "0xc1",
         "BriefDescription": "Demand Data Read requests that hit L2 cache",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
-        "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
         "SampleAfterValue": "200003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x42",
+        "UMask": "0xc2",
         "BriefDescription": "RFO requests that hit L2 cache",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.RFO_HIT",
@@ -81,7 +81,7 @@
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x44",
+        "UMask": "0xc4",
         "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.CODE_RD_HIT",
@@ -165,6 +165,7 @@
         "BriefDescription": "Core-originated cacheable demand requests missed L3",
         "Counter": "0,1,2,3",
         "EventName": "LONGEST_LAT_CACHE.MISS",
+        "Errata": "SKL057",
         "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
@@ -175,28 +176,29 @@
         "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
         "Counter": "0,1,2,3",
         "EventName": "LONGEST_LAT_CACHE.REFERENCE",
-        "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
+        "Errata": "SKL057",
+        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x48",
         "UMask": "0x1",
-        "BriefDescription": "L1D miss outstandings duration in cycles",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
         "Counter": "0,1,2,3",
-        "EventName": "L1D_PEND_MISS.PENDING",
-        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x48",
         "UMask": "0x1",
-        "BriefDescription": "Cycles with L1D load Misses outstanding.",
+        "BriefDescription": "L1D miss outstandings duration in cycles",
         "Counter": "0,1,2,3",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
-        "CounterMask": "1",
-        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -234,21 +236,21 @@
     {
         "EventCode": "0x60",
         "UMask": "0x1",
-        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
-        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x60",
         "UMask": "0x1",
-        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
-        "CounterMask": "1",
-        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -307,21 +309,21 @@
     {
         "EventCode": "0x60",
         "UMask": "0x8",
-        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
-        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x60",
         "UMask": "0x8",
-        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
-        "CounterMask": "1",
-        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -486,7 +488,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_RETIRED.L1_HIT",
-        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -558,7 +560,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_RETIRED.FB_HIT",
-        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
     },
@@ -690,6 +692,7 @@
         "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
         "Counter": "0,1,2,3",
         "EventName": "L2_LINES_OUT.SILENT",
+        "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
         "SampleAfterValue": "200003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -699,17 +702,18 @@
         "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped",
         "Counter": "0,1,2,3",
         "EventName": "L2_LINES_OUT.NON_SILENT",
-        "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3.  Clean lines may either be allocated in L3 or dropped.",
+        "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
         "SampleAfterValue": "200003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xF2",
         "UMask": "0x4",
-        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
+        "Deprecated": "1",
         "Counter": "0,1,2,3",
         "EventName": "L2_LINES_OUT.USELESS_PREF",
-        "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.",
+        "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
         "SampleAfterValue": "200003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -736,12 +740,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that have any response type.",
-        "MSRValue": "0x0000010001 ",
+        "BriefDescription": "Counts demand data reads have any response type.",
+        "MSRValue": "0x0000010001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -749,12 +753,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0001 ",
+        "BriefDescription": "Counts demand data reads TBD TBD",
+        "MSRValue": "0x01003C0001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -762,25 +766,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0001 ",
+        "BriefDescription": "Counts demand data reads TBD TBD",
+        "MSRValue": "0x04003C0001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0001 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -788,12 +779,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0001 ",
+        "BriefDescription": "Counts demand data reads TBD TBD",
+        "MSRValue": "0x10003C0001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -801,12 +792,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that hit in the L3.",
-        "MSRValue": "0x3f803c0001 ",
+        "BriefDescription": "Counts demand data reads TBD TBD",
+        "MSRValue": "0x3F803C0001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -814,12 +805,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
-        "MSRValue": "0x0000010002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
+        "MSRValue": "0x0000010002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -827,12 +818,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
+        "MSRValue": "0x01003C0002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -840,12 +831,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
+        "MSRValue": "0x04003C0002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -853,25 +844,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0002 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
+        "MSRValue": "0x10003C0002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -879,12 +857,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
-        "MSRValue": "0x3f803c0002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
+        "MSRValue": "0x3F803C0002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -892,12 +870,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that have any response type.",
-        "MSRValue": "0x0000010004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
+        "MSRValue": "0x0000010004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -905,12 +883,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+        "MSRValue": "0x01003C0004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -918,12 +896,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+        "MSRValue": "0x04003C0004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -931,25 +909,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0004 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+        "MSRValue": "0x10003C0004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -957,12 +922,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that hit in the L3.",
-        "MSRValue": "0x3f803c0004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+        "MSRValue": "0x3F803C0004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -970,12 +935,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
-        "MSRValue": "0x0000010010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
+        "MSRValue": "0x0000010010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -983,12 +948,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+        "MSRValue": "0x01003C0010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -996,12 +961,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+        "MSRValue": "0x04003C0010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1009,25 +974,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0010 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+        "MSRValue": "0x10003C0010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1035,12 +987,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
-        "MSRValue": "0x3f803c0010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+        "MSRValue": "0x3F803C0010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1048,12 +1000,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
-        "MSRValue": "0x0000010020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
+        "MSRValue": "0x0000010020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1061,12 +1013,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+        "MSRValue": "0x01003C0020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1074,12 +1026,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+        "MSRValue": "0x04003C0020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1087,25 +1039,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0020 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+        "MSRValue": "0x10003C0020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1113,12 +1052,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
-        "MSRValue": "0x3f803c0020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+        "MSRValue": "0x3F803C0020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1126,12 +1065,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
-        "MSRValue": "0x0000010080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
+        "MSRValue": "0x0000010080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1139,12 +1078,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+        "MSRValue": "0x01003C0080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1152,25 +1091,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+        "MSRValue": "0x04003C0080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0080 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1178,12 +1104,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+        "MSRValue": "0x10003C0080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1191,12 +1117,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
-        "MSRValue": "0x3f803c0080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+        "MSRValue": "0x3F803C0080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1204,12 +1130,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
-        "MSRValue": "0x0000010100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
+        "MSRValue": "0x0000010100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1217,12 +1143,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+        "MSRValue": "0x01003C0100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1230,12 +1156,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+        "MSRValue": "0x04003C0100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1243,12 +1169,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+        "MSRValue": "0x10003C0100",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1256,12 +1182,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+        "MSRValue": "0x3F803C0100",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1269,12 +1195,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
-        "MSRValue": "0x3f803c0100 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.",
+        "MSRValue": "0x0000010400",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1282,12 +1208,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.",
-        "MSRValue": "0x0000010400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+        "MSRValue": "0x01003C0400",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1295,12 +1221,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+        "MSRValue": "0x04003C0400",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1308,12 +1234,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+        "MSRValue": "0x10003C0400",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1321,12 +1247,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "PF_L1D_AND_SW & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+        "MSRValue": "0x3F803C0400",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1334,12 +1260,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0400 ",
+        "BriefDescription": "TBD have any response type.",
+        "MSRValue": "0x0000010490",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1347,12 +1273,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.",
-        "MSRValue": "0x3f803c0400 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x01003C0490",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1360,12 +1286,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that have any response type.",
-        "MSRValue": "0x0000018000 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x04003C0490",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1373,12 +1299,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c8000 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x10003C0490",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1386,12 +1312,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c8000 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x3F803C0490",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1399,12 +1325,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "OTHER & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c8000 ",
+        "BriefDescription": "TBD have any response type.",
+        "MSRValue": "0x0000010120",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1412,12 +1338,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c8000 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x01003C0120",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1425,12 +1351,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that hit in the L3.",
-        "MSRValue": "0x3f803c8000 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x04003C0120",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1438,12 +1364,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that have any response type.",
-        "MSRValue": "0x0000010490 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x10003C0120",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1451,12 +1377,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0490 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x3F803C0120",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1464,12 +1390,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0490 ",
+        "BriefDescription": "TBD have any response type.",
+        "MSRValue": "0x0000010491",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1477,12 +1403,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0490 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x01003C0491",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1490,12 +1416,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0490 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x04003C0491",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1503,12 +1429,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
-        "MSRValue": "0x3f803c0490 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x10003C0491",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1516,12 +1442,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that have any response type.",
-        "MSRValue": "0x0000010120 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x3F803C0491",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1529,12 +1455,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0120 ",
+        "BriefDescription": "TBD have any response type.",
+        "MSRValue": "0x0000010122",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD have any response type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1542,12 +1468,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0120 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x01003C0122",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1555,12 +1481,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0120 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x04003C0122",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1568,12 +1494,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0120 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x10003C0122",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1581,12 +1507,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
-        "MSRValue": "0x3f803c0120 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x3F803C0122",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1594,12 +1520,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
-        "MSRValue": "0x0000010491 ",
+        "BriefDescription": "Counts demand data reads",
+        "MSRValue": "0x08007C0001",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "Counts demand data reads",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1607,12 +1532,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0491 ",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "MSRValue": "0x08007C0002",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1620,12 +1544,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0491 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "MSRValue": "0x08007C0004",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1633,12 +1556,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0491 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads",
+        "MSRValue": "0x08007C0010",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1646,12 +1568,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0491 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs",
+        "MSRValue": "0x08007C0020",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1659,12 +1580,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
-        "MSRValue": "0x3f803c0491 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads",
+        "MSRValue": "0x08007C0080",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1672,12 +1592,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
-        "MSRValue": "0x0000010122 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
+        "MSRValue": "0x08007C0100",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1685,12 +1604,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
-        "MSRValue": "0x01003c0122 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
+        "MSRValue": "0x08007C0400",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1698,12 +1616,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x04003c0122 ",
+        "BriefDescription": "TBD",
+        "MSRValue": "0x08007C0490",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1711,12 +1628,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
-        "MSRValue": "0x08003c0122 ",
+        "BriefDescription": "TBD",
+        "MSRValue": "0x08007C0120",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1724,12 +1640,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
-        "MSRValue": "0x10003c0122 ",
+        "BriefDescription": "TBD",
+        "MSRValue": "0x08007C0491",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1737,12 +1652,11 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
-        "MSRValue": "0x3f803c0122 ",
+        "BriefDescription": "TBD",
+        "MSRValue": "0x08007C0122",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "PublicDescription": "TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
index 286ed1a37ec9a..c5d0babe89fce 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
@@ -59,7 +59,6 @@
         "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
-        "PublicDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8).",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -69,7 +68,6 @@
         "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
-        "PublicDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16).",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
index 403a4f89e9b27..4dc583cfb5459 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -1,14 +1,4 @@
 [
-    {
-        "EventCode": "0x79",
-        "UMask": "0x4",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
-        "Counter": "0,1,2,3",
-        "EventName": "IDQ.MITE_UOPS",
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "SampleAfterValue": "2000003",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
     {
         "EventCode": "0x79",
         "UMask": "0x4",
@@ -22,11 +12,11 @@
     },
     {
         "EventCode": "0x79",
-        "UMask": "0x8",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "UMask": "0x4",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ.DSB_UOPS",
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -41,6 +31,16 @@
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0x79",
+        "UMask": "0x8",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+        "Counter": "0,1,2,3",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0x79",
         "UMask": "0x10",
@@ -55,22 +55,22 @@
     {
         "EventCode": "0x79",
         "UMask": "0x18",
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
-        "CounterMask": "4",
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x79",
         "UMask": "0x18",
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
-        "CounterMask": "1",
-        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -87,22 +87,22 @@
     {
         "EventCode": "0x79",
         "UMask": "0x24",
-        "BriefDescription": "Cycles MITE is delivering 4 Uops",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
-        "CounterMask": "4",
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "CounterMask": "1",
+        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x79",
         "UMask": "0x24",
-        "BriefDescription": "Cycles MITE is delivering any Uop",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
-        "CounterMask": "1",
-        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -118,24 +118,24 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EdgeDetect": "1",
         "EventCode": "0x79",
         "UMask": "0x30",
-        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ.MS_SWITCHES",
-        "CounterMask": "1",
-        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "EdgeDetect": "1",
         "EventCode": "0x79",
         "UMask": "0x30",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ.MS_UOPS",
-        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+        "EventName": "IDQ.MS_SWITCHES",
+        "CounterMask": "1",
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -177,67 +177,67 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "Invert": "1",
         "EventCode": "0x9C",
         "UMask": "0x1",
-        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
-        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "CounterMask": "1",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x9C",
         "UMask": "0x1",
-        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
-        "CounterMask": "4",
-        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "CounterMask": "1",
+        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x9C",
         "UMask": "0x1",
-        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
-        "CounterMask": "3",
-        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "CounterMask": "2",
+        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x9C",
         "UMask": "0x1",
-        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
-        "CounterMask": "2",
-        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "CounterMask": "3",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x9C",
         "UMask": "0x1",
-        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
-        "CounterMask": "1",
-        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "CounterMask": "4",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "Invert": "1",
         "EventCode": "0x9C",
         "UMask": "0x1",
-        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
-        "CounterMask": "1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -247,20 +247,19 @@
         "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
         "Counter": "0,1,2,3",
         "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
-        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x11",
+        "MSRValue": "0x400406",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
         "MSRIndex": "0x3F7",
-        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -268,11 +267,11 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x12",
+        "MSRValue": "0x200206",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.L1I_MISS",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
         "MSRIndex": "0x3F7",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
@@ -281,11 +280,11 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x13",
+        "MSRValue": "0x400206",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
         "MSRIndex": "0x3F7",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
@@ -294,13 +293,13 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x14",
+        "MSRValue": "0x15",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
         "MSRIndex": "0x3F7",
-        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -308,13 +307,13 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
+        "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x15",
+        "MSRValue": "0x14",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
         "MSRIndex": "0x3F7",
-        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -322,11 +321,11 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x400206",
+        "MSRValue": "0x13",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
         "MSRIndex": "0x3F7",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
@@ -335,11 +334,11 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x200206",
+        "MSRValue": "0x12",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+        "EventName": "FRONTEND_RETIRED.L1I_MISS",
         "MSRIndex": "0x3F7",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
@@ -348,12 +347,13 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x400406",
+        "MSRValue": "0x11",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
         "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -361,13 +361,12 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x400806",
+        "MSRValue": "0x300206",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
         "MSRIndex": "0x3F7",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -375,13 +374,13 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x401006",
+        "MSRValue": "0x100206",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
         "MSRIndex": "0x3F7",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -389,13 +388,12 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x402006",
+        "MSRValue": "0x420006",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
         "MSRIndex": "0x3F7",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end  after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -403,11 +401,11 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x404006",
+        "MSRValue": "0x410006",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
         "MSRIndex": "0x3F7",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
@@ -429,11 +427,11 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x410006",
+        "MSRValue": "0x404006",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
         "MSRIndex": "0x3F7",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
@@ -442,12 +440,13 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x420006",
+        "MSRValue": "0x402006",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
         "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end  after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -455,13 +454,13 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
         "PEBS": "1",
-        "MSRValue": "0x100206",
+        "MSRValue": "0x401006",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
         "MSRIndex": "0x3F7",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
@@ -469,12 +468,13 @@
     {
         "EventCode": "0xC6",
         "UMask": "0x1",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
         "PEBS": "1",
-        "MSRValue": "0x300206",
+        "MSRValue": "0x400806",
         "Counter": "0,1,2,3",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
         "MSRIndex": "0x3F7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
index e7f1aa31226dc..48a9cdf81307c 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
@@ -129,20 +129,20 @@
     {
         "EventCode": "0x60",
         "UMask": "0x10",
-        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
-        "CounterMask": "1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+        "CounterMask": "6",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x60",
         "UMask": "0x10",
-        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
+        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
         "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
-        "CounterMask": "6",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "CounterMask": "1",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -210,7 +210,7 @@
     {
         "EventCode": "0xC8",
         "UMask": "0x4",
-        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ",
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "HLE_RETIRED.ABORTED",
@@ -242,6 +242,7 @@
         "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
         "Counter": "0,1,2,3",
         "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+        "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -287,7 +288,7 @@
     {
         "EventCode": "0xC9",
         "UMask": "0x4",
-        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "RTM_RETIRED.ABORTED",
@@ -347,125 +348,125 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
         "PEBS": "2",
-        "MSRValue": "0x4",
+        "MSRValue": "0x200",
         "Counter": "0,1,2,3",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
         "MSRIndex": "0x3F6",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
         "TakenAlone": "1",
-        "SampleAfterValue": "100003",
+        "SampleAfterValue": "101",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
         "PEBS": "2",
-        "MSRValue": "0x8",
+        "MSRValue": "0x100",
         "Counter": "0,1,2,3",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
         "MSRIndex": "0x3F6",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
         "TakenAlone": "1",
-        "SampleAfterValue": "50021",
+        "SampleAfterValue": "503",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
         "PEBS": "2",
-        "MSRValue": "0x10",
+        "MSRValue": "0x80",
         "Counter": "0,1,2,3",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
         "MSRIndex": "0x3F6",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
         "TakenAlone": "1",
-        "SampleAfterValue": "20011",
+        "SampleAfterValue": "1009",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
         "PEBS": "2",
-        "MSRValue": "0x20",
+        "MSRValue": "0x40",
         "Counter": "0,1,2,3",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
         "MSRIndex": "0x3F6",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
         "TakenAlone": "1",
-        "SampleAfterValue": "100007",
+        "SampleAfterValue": "2003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
         "PEBS": "2",
-        "MSRValue": "0x40",
+        "MSRValue": "0x20",
         "Counter": "0,1,2,3",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
         "MSRIndex": "0x3F6",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
         "TakenAlone": "1",
-        "SampleAfterValue": "2003",
+        "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
         "PEBS": "2",
-        "MSRValue": "0x80",
+        "MSRValue": "0x10",
         "Counter": "0,1,2,3",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
         "MSRIndex": "0x3F6",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
         "TakenAlone": "1",
-        "SampleAfterValue": "1009",
+        "SampleAfterValue": "20011",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
         "PEBS": "2",
-        "MSRValue": "0x100",
+        "MSRValue": "0x8",
         "Counter": "0,1,2,3",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
         "MSRIndex": "0x3F6",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
         "TakenAlone": "1",
-        "SampleAfterValue": "503",
+        "SampleAfterValue": "50021",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
         "PEBS": "2",
-        "MSRValue": "0x200",
+        "MSRValue": "0x4",
         "Counter": "0,1,2,3",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
         "MSRIndex": "0x3F6",
-        "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
         "TakenAlone": "1",
-        "SampleAfterValue": "101",
+        "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that miss in the L3.",
-        "MSRValue": "0x3fbc000001 ",
+        "BriefDescription": "Counts demand data reads TBD TBD",
+        "MSRValue": "0x3FBC000001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -473,12 +474,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00001 ",
+        "BriefDescription": "Counts demand data reads TBD",
+        "MSRValue": "0x083FC00001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -486,12 +487,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00001 ",
+        "BriefDescription": "Counts demand data reads TBD",
+        "MSRValue": "0x103FC00001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -499,12 +500,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00001 ",
+        "BriefDescription": "Counts demand data reads TBD",
+        "MSRValue": "0x063FC00001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -512,12 +513,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800001 ",
+        "BriefDescription": "Counts demand data reads TBD",
+        "MSRValue": "0x063B800001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -525,12 +526,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000001 ",
+        "BriefDescription": "Counts demand data reads TBD",
+        "MSRValue": "0x0604000001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -538,12 +539,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.",
-        "MSRValue": "0x3fbc000002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD",
+        "MSRValue": "0x3FBC000002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -551,12 +552,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD",
+        "MSRValue": "0x083FC00002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -564,12 +565,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD",
+        "MSRValue": "0x103FC00002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -577,12 +578,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD",
+        "MSRValue": "0x063FC00002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -590,12 +591,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD",
+        "MSRValue": "0x063B800002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -603,12 +604,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000002 ",
+        "BriefDescription": "Counts all demand data writes (RFOs) TBD",
+        "MSRValue": "0x0604000002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all demand data writes (RFOs) TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -616,12 +617,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that miss in the L3.",
-        "MSRValue": "0x3fbc000004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
+        "MSRValue": "0x3FBC000004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -629,12 +630,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+        "MSRValue": "0x083FC00004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -642,12 +643,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+        "MSRValue": "0x103FC00004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -655,12 +656,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+        "MSRValue": "0x063FC00004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -668,12 +669,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+        "MSRValue": "0x063B800004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -681,12 +682,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000004 ",
+        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
+        "MSRValue": "0x0604000004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -694,12 +695,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.",
-        "MSRValue": "0x3fbc000010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
+        "MSRValue": "0x3FBC000010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -707,12 +708,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+        "MSRValue": "0x083FC00010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -720,12 +721,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+        "MSRValue": "0x103FC00010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -733,12 +734,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+        "MSRValue": "0x063FC00010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -746,12 +747,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+        "MSRValue": "0x063B800010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -759,12 +760,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000010 ",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD",
+        "MSRValue": "0x0604000010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -772,12 +773,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.",
-        "MSRValue": "0x3fbc000020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
+        "MSRValue": "0x3FBC000020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -785,12 +786,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+        "MSRValue": "0x083FC00020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -798,12 +799,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+        "MSRValue": "0x103FC00020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -811,12 +812,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+        "MSRValue": "0x063FC00020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -824,12 +825,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+        "MSRValue": "0x063B800020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -837,12 +838,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000020 ",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
+        "MSRValue": "0x0604000020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -850,12 +851,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.",
-        "MSRValue": "0x3fbc000080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
+        "MSRValue": "0x3FBC000080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -863,12 +864,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+        "MSRValue": "0x083FC00080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -876,12 +877,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+        "MSRValue": "0x103FC00080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -889,12 +890,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+        "MSRValue": "0x063FC00080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -902,12 +903,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+        "MSRValue": "0x063B800080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -915,12 +916,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000080 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
+        "MSRValue": "0x0604000080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -928,12 +929,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.",
-        "MSRValue": "0x3fbc000100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
+        "MSRValue": "0x3FBC000100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -941,12 +942,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+        "MSRValue": "0x083FC00100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -954,12 +955,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+        "MSRValue": "0x103FC00100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -967,12 +968,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+        "MSRValue": "0x063FC00100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -980,12 +981,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+        "MSRValue": "0x063B800100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -993,12 +994,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000100 ",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
+        "MSRValue": "0x0604000100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1006,12 +1007,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.",
-        "MSRValue": "0x3fbc000400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
+        "MSRValue": "0x3FBC000400",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1019,12 +1020,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+        "MSRValue": "0x083FC00400",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1032,12 +1033,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+        "MSRValue": "0x103FC00400",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1045,12 +1046,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+        "MSRValue": "0x063FC00400",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1058,12 +1059,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+        "MSRValue": "0x063B800400",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1071,90 +1072,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000400 ",
+        "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
+        "MSRValue": "0x0604000400",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that miss in the L3.",
-        "MSRValue": "0x3fbc008000 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc08000 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc08000 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc08000 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b808000 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "SampleAfterValue": "100003",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "Offcore": "1",
-        "EventCode": "0xB7, 0xBB",
-        "UMask": "0x1",
-        "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604008000 ",
-        "Counter": "0,1,2,3",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1162,12 +1085,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that miss in the L3.",
-        "MSRValue": "0x3fbc000490 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x3FBC000490",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1175,12 +1098,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00490 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x083FC00490",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1188,12 +1111,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00490 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x103FC00490",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1201,12 +1124,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00490 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x063FC00490",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1214,12 +1137,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800490 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x063B800490",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1227,12 +1150,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000490 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x0604000490",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1240,12 +1163,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that miss in the L3.",
-        "MSRValue": "0x3fbc000120 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x3FBC000120",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1253,12 +1176,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00120 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x083FC00120",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1266,12 +1189,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00120 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x103FC00120",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1279,12 +1202,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00120 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x063FC00120",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1292,12 +1215,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800120 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x063B800120",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1305,12 +1228,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000120 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x0604000120",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1318,12 +1241,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.",
-        "MSRValue": "0x3fbc000491 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x3FBC000491",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1331,12 +1254,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00491 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x083FC00491",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1344,12 +1267,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00491 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x103FC00491",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1357,12 +1280,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00491 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x063FC00491",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1370,12 +1293,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800491 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x063B800491",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1383,12 +1306,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000491 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x0604000491",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1396,12 +1319,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.",
-        "MSRValue": "0x3fbc000122 ",
+        "BriefDescription": "TBD TBD TBD",
+        "MSRValue": "0x3FBC000122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1409,12 +1332,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
-        "MSRValue": "0x083fc00122 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x083FC00122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1422,12 +1345,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
-        "MSRValue": "0x103fc00122 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x103FC00122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1435,12 +1358,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
-        "MSRValue": "0x063fc00122 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x063FC00122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1448,12 +1371,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.",
-        "MSRValue": "0x063b800122 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x063B800122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1461,12 +1384,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.",
-        "MSRValue": "0x0604000122 ",
+        "BriefDescription": "TBD TBD",
+        "MSRValue": "0x0604000122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "MSRIndex": "0x1a6, 0x1a7",
+        "PublicDescription": "TBD TBD",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
index f99f7ae27820c..369f56c1d1b5a 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -1,6 +1,5 @@
 [
     {
-        "EventCode": "0x00",
         "UMask": "0x1",
         "BriefDescription": "Instructions retired from execution.",
         "Counter": "Fixed counter 0",
@@ -10,7 +9,6 @@
         "CounterHTOff": "Fixed counter 0"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x2",
         "BriefDescription": "Core cycles when the thread is not in halt state",
         "Counter": "Fixed counter 1",
@@ -20,7 +18,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x2",
         "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "Counter": "Fixed counter 1",
@@ -30,7 +27,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x3",
         "BriefDescription": "Reference cycles when the core is not in halt state.",
         "Counter": "Fixed counter 2",
@@ -99,24 +95,24 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "Invert": "1",
         "EventCode": "0x0E",
         "UMask": "0x1",
-        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_ISSUED.ANY",
-        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "Invert": "1",
         "EventCode": "0x0E",
         "UMask": "0x1",
-        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_ISSUED.STALL_CYCLES",
-        "CounterMask": "1",
-        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -126,7 +122,7 @@
         "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
         "Counter": "0,1,2,3",
         "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
-        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -203,19 +199,19 @@
     {
         "EventCode": "0x3C",
         "UMask": "0x1",
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "AnyThread": "1",
         "SampleAfterValue": "2503",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x3C",
         "UMask": "0x1",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
         "Counter": "0,1,2,3",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
-        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
         "SampleAfterValue": "2503",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -248,12 +244,12 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x5E",
+        "EventCode": "0x59",
         "UMask": "0x1",
-        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
         "Counter": "0,1,2,3",
-        "EventName": "RS_EVENTS.EMPTY_CYCLES",
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -270,6 +266,16 @@
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0x5E",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+        "Counter": "0,1,2,3",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0x87",
         "UMask": "0x1",
@@ -361,12 +367,12 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA2",
+        "EventCode": "0xa2",
         "UMask": "0x1",
         "BriefDescription": "Resource-related stall cycles",
         "Counter": "0,1,2,3",
         "EventName": "RESOURCE_STALLS.ANY",
-        "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "PublicDescription": "Counts resource-related stall cycles.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -519,6 +525,17 @@
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0xA8",
+        "UMask": "0x1",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "Counter": "0,1,2,3",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "CounterMask": "4",
+        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0xA8",
         "UMask": "0x1",
@@ -531,35 +548,35 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA8",
+        "EventCode": "0xB1",
         "UMask": "0x1",
-        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
         "Counter": "0,1,2,3",
-        "EventName": "LSD.CYCLES_4_UOPS",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
         "CounterMask": "4",
-        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xB1",
         "UMask": "0x1",
-        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.THREAD",
-        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "CounterMask": "3",
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "Invert": "1",
         "EventCode": "0xB1",
         "UMask": "0x1",
-        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
-        "CounterMask": "1",
-        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "CounterMask": "2",
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -575,35 +592,24 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "Invert": "1",
         "EventCode": "0xB1",
         "UMask": "0x1",
-        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
-        "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
-        "CounterMask": "2",
-        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
-        "SampleAfterValue": "2000003",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xB1",
-        "UMask": "0x1",
-        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
-        "CounterMask": "3",
-        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "CounterMask": "1",
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xB1",
         "UMask": "0x1",
-        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
-        "CounterMask": "4",
-        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -618,11 +624,12 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "Invert": "1",
         "EventCode": "0xB1",
         "UMask": "0x2",
-        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
         "CounterMask": "1",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
@@ -630,10 +637,10 @@
     {
         "EventCode": "0xB1",
         "UMask": "0x2",
-        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
-        "CounterMask": "2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "CounterMask": "4",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -650,20 +657,19 @@
     {
         "EventCode": "0xB1",
         "UMask": "0x2",
-        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
-        "CounterMask": "4",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "CounterMask": "2",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "Invert": "1",
         "EventCode": "0xB1",
         "UMask": "0x2",
-        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
         "CounterMask": "1",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
@@ -725,12 +731,14 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "Invert": "1",
         "EventCode": "0xC2",
         "UMask": "0x2",
-        "BriefDescription": "Retirement slots used.",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
-        "PublicDescription": "Counts the retirement slots used.",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "CounterMask": "10",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -742,19 +750,17 @@
         "Counter": "0,1,2,3",
         "EventName": "UOPS_RETIRED.STALL_CYCLES",
         "CounterMask": "1",
-        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "Invert": "1",
         "EventCode": "0xC2",
         "UMask": "0x2",
-        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "BriefDescription": "Retirement slots used.",
         "Counter": "0,1,2,3",
-        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
-        "CounterMask": "10",
-        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "Counts the retirement slots used.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -766,6 +772,7 @@
         "Counter": "0,1,2,3",
         "EventName": "MACHINE_CLEARS.COUNT",
         "CounterMask": "1",
+        "PublicDescription": "Number of machine clears (nukes) of any type.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -841,11 +848,12 @@
     {
         "EventCode": "0xC4",
         "UMask": "0x10",
-        "BriefDescription": "Not taken branch instructions retired.",
+        "BriefDescription": "Counts all not taken macro branch instructions retired.",
+        "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.NOT_TAKEN",
         "Errata": "SKL091",
-        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.",
         "SampleAfterValue": "400009",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -919,7 +927,7 @@
     {
         "EventCode": "0xC5",
         "UMask": "0x20",
-        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ",
+        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
@@ -937,6 +945,15 @@
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0xCC",
+        "UMask": "0x40",
+        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
+        "Counter": "0,1,2,3",
+        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+        "SampleAfterValue": "2000003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0xE6",
         "UMask": "0x1",
-- 
GitLab


From 9f0f4a242c24221ed006f449c3f67b863b12985c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:39:28 -0700
Subject: [PATCH 0438/1861] perf vendor events intel: Update BroadwellX events
 to v14

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/broadwellx/cache.json | 161 +++++++++---------
 .../arch/x86/broadwellx/floating-point.json   |  16 +-
 .../arch/x86/broadwellx/memory.json           | 148 ++++++++--------
 .../arch/x86/broadwellx/pipeline.json         |  50 +++---
 4 files changed, 186 insertions(+), 189 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json
index 141b1080429d3..75a3098d5775e 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json
@@ -57,17 +57,17 @@
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x41",
+        "UMask": "0xc1",
         "BriefDescription": "Demand Data Read requests that hit L2 cache",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
-        "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache.",
         "SampleAfterValue": "200003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x42",
+        "UMask": "0xc2",
         "BriefDescription": "RFO requests that hit L2 cache.",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.RFO_HIT",
@@ -76,7 +76,7 @@
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x44",
+        "UMask": "0xc4",
         "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.CODE_RD_HIT",
@@ -85,7 +85,7 @@
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x50",
+        "UMask": "0xd0",
         "BriefDescription": "L2 prefetch requests that hit L2 cache",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.L2_PF_HIT",
@@ -396,24 +396,24 @@
     {
         "EventCode": "0xD0",
         "UMask": "0x11",
-        "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops that miss the STLB.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+        "PublicDescription": "This event counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD0",
         "UMask": "0x12",
-        "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)",
+        "BriefDescription": "Retired store uops that miss the STLB.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+        "PublicDescription": "This event counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
         "SampleAfterValue": "100003",
         "L1_Hit_Indication": "1",
         "CounterHTOff": "0,1,2,3"
@@ -421,37 +421,37 @@
     {
         "EventCode": "0xD0",
         "UMask": "0x21",
-        "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops with locked access.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
         "Errata": "BDM35",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.",
+        "PublicDescription": "This event counts load uops with locked access retired to the architected path.",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD0",
         "UMask": "0x41",
-        "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD0",
         "UMask": "0x42",
-        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
         "SampleAfterValue": "100003",
         "L1_Hit_Indication": "1",
         "CounterHTOff": "0,1,2,3"
@@ -459,24 +459,24 @@
     {
         "EventCode": "0xD0",
         "UMask": "0x81",
-        "BriefDescription": "All retired load uops. (Precise Event - PEBS)",
+        "BriefDescription": "All retired load uops.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.",
+        "PublicDescription": "This event counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD0",
         "UMask": "0x82",
-        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+        "BriefDescription": "All retired store uops.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.",
+        "PublicDescription": "This event counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement.",
         "SampleAfterValue": "2000003",
         "L1_Hit_Indication": "1",
         "CounterHTOff": "0,1,2,3"
@@ -484,69 +484,69 @@
     {
         "EventCode": "0xD1",
         "UMask": "0x1",
-        "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.",
+        "PublicDescription": "This event counts retired load uops which data sources were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD1",
         "UMask": "0x2",
-        "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
         "Errata": "BDM35",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.",
+        "PublicDescription": "This event counts retired load uops which data sources were hits in the mid-level (L2) cache.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD1",
         "UMask": "0x4",
-        "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
         "Errata": "BDM100",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.",
+        "PublicDescription": "This event counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.",
         "SampleAfterValue": "50021",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD1",
         "UMask": "0x8",
-        "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.",
+        "BriefDescription": "Retired load uops misses in L1 cache as data sources.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.",
+        "PublicDescription": "This event counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD1",
         "UMask": "0x10",
-        "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.",
+        "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.",
+        "PublicDescription": "This event counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.",
         "SampleAfterValue": "50021",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD1",
         "UMask": "0x20",
-        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).",
+        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -558,83 +558,84 @@
     {
         "EventCode": "0xD1",
         "UMask": "0x40",
-        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.",
+        "PublicDescription": "This event counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit  even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD2",
         "UMask": "0x1",
-        "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
         "Errata": "BDM100",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.",
+        "PublicDescription": "This event counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.",
         "SampleAfterValue": "20011",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD2",
         "UMask": "0x2",
-        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
         "Errata": "BDM100",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.",
+        "PublicDescription": "This event counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.",
         "SampleAfterValue": "20011",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD2",
         "UMask": "0x4",
-        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
         "Errata": "BDM100",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).",
+        "PublicDescription": "This event counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).",
         "SampleAfterValue": "20011",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD2",
         "UMask": "0x8",
-        "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)",
+        "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
         "Errata": "BDM100",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.",
+        "PublicDescription": "This event counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD3",
         "UMask": "0x1",
+        "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
         "Errata": "BDE70, BDM100",
-        "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+        "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI).",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD3",
         "UMask": "0x4",
-        "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)",
+        "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI)",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -646,7 +647,7 @@
     {
         "EventCode": "0xD3",
         "UMask": "0x10",
-        "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)",
+        "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -658,7 +659,7 @@
     {
         "EventCode": "0xD3",
         "UMask": "0x20",
-        "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)",
+        "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -810,12 +811,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all requests that hit in the L3",
-        "MSRValue": "0x3f803c8fff",
+        "BriefDescription": "Counts all requests hit in the L3",
+        "MSRValue": "0x3F803C8FFF",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all requests hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -823,12 +824,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c07f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C07F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -836,12 +837,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c07f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C07F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -849,12 +850,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0244",
+        "BriefDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0244",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -862,12 +863,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c0122",
+        "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C0122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -875,12 +876,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0122",
+        "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -888,12 +889,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c0091",
+        "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C0091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -901,12 +902,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0091",
+        "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -914,12 +915,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
-        "MSRValue": "0x3f803c0200",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3",
+        "MSRValue": "0x3F803C0200",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -927,12 +928,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3",
-        "MSRValue": "0x3f803c0100",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3",
+        "MSRValue": "0x3F803C0100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -940,12 +941,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c0002",
+        "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C0002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -953,12 +954,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3",
-        "MSRValue": "0x3f803c0002",
+        "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3",
+        "MSRValue": "0x3F803C0002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json
index d7b9d9c9c5188..ba0e0c4e74eb2 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json
@@ -42,7 +42,7 @@
     {
         "EventCode": "0xC7",
         "UMask": "0x3",
-        "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. (RSQRT for single precision?)",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
         "SampleAfterValue": "2000003",
@@ -51,7 +51,7 @@
     {
         "EventCode": "0xC7",
         "UMask": "0x4",
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
@@ -60,7 +60,7 @@
     {
         "EventCode": "0xC7",
         "UMask": "0x8",
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
         "SampleAfterValue": "2000003",
@@ -69,7 +69,7 @@
     {
         "EventCode": "0xC7",
         "UMask": "0x10",
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
@@ -78,7 +78,7 @@
     {
         "EventCode": "0xC7",
         "UMask": "0x15",
-        "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.  ?.",
+        "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
         "SampleAfterValue": "2000006",
@@ -87,7 +87,7 @@
     {
         "EventCode": "0xc7",
         "UMask": "0x20",
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
         "SampleAfterValue": "2000003",
@@ -96,7 +96,7 @@
     {
         "EventCode": "0xC7",
         "UMask": "0x2a",
-        "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+        "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.SINGLE",
         "SampleAfterValue": "2000005",
@@ -105,7 +105,7 @@
     {
         "EventCode": "0xC7",
         "UMask": "0x3c",
-        "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+        "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. (RSQRT for single-precision?)",
         "Counter": "0,1,2,3",
         "EventName": "FP_ARITH_INST_RETIRED.PACKED",
         "SampleAfterValue": "2000004",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json
index d79a5cfea44bc..ecb413bb67caf 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json
@@ -170,11 +170,11 @@
     {
         "EventCode": "0xc8",
         "UMask": "0x4",
-        "BriefDescription": "Number of times HLE abort was triggered (PEBS)",
+        "BriefDescription": "Number of times HLE abort was triggered",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "HLE_RETIRED.ABORTED",
-        "PublicDescription": "Number of times HLE abort was triggered (PEBS).",
+        "PublicDescription": "Number of times HLE abort was triggered.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -251,11 +251,11 @@
     {
         "EventCode": "0xc9",
         "UMask": "0x4",
-        "BriefDescription": "Number of times RTM abort was triggered (PEBS)",
+        "BriefDescription": "Number of times RTM abort was triggered",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "RTM_RETIRED.ABORTED",
-        "PublicDescription": "Number of times RTM abort was triggered (PEBS).",
+        "PublicDescription": "Number of times RTM abort was triggered .",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -312,14 +312,14 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 4",
+        "BriefDescription": "Randomly selected loads with latency value being above 4",
         "PEBS": "2",
         "MSRValue": "0x4",
         "Counter": "3",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
         "MSRIndex": "0x3F6",
         "Errata": "BDM100, BDM35",
-        "PublicDescription": "This event counts loads with latency value being above four.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above four.",
         "TakenAlone": "1",
         "SampleAfterValue": "100003",
         "CounterHTOff": "3"
@@ -327,14 +327,14 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 8",
+        "BriefDescription": "Randomly selected loads with latency value being above 8",
         "PEBS": "2",
         "MSRValue": "0x8",
         "Counter": "3",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
         "MSRIndex": "0x3F6",
         "Errata": "BDM100, BDM35",
-        "PublicDescription": "This event counts loads with latency value being above eight.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above eight.",
         "TakenAlone": "1",
         "SampleAfterValue": "50021",
         "CounterHTOff": "3"
@@ -342,14 +342,14 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 16",
+        "BriefDescription": "Randomly selected loads with latency value being above 16",
         "PEBS": "2",
         "MSRValue": "0x10",
         "Counter": "3",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
         "MSRIndex": "0x3F6",
         "Errata": "BDM100, BDM35",
-        "PublicDescription": "This event counts loads with latency value being above 16.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 16.",
         "TakenAlone": "1",
         "SampleAfterValue": "20011",
         "CounterHTOff": "3"
@@ -357,14 +357,14 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 32",
+        "BriefDescription": "Randomly selected loads with latency value being above 32",
         "PEBS": "2",
         "MSRValue": "0x20",
         "Counter": "3",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
         "MSRIndex": "0x3F6",
         "Errata": "BDM100, BDM35",
-        "PublicDescription": "This event counts loads with latency value being above 32.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 32.",
         "TakenAlone": "1",
         "SampleAfterValue": "100007",
         "CounterHTOff": "3"
@@ -372,14 +372,14 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 64",
+        "BriefDescription": "Randomly selected loads with latency value being above 64",
         "PEBS": "2",
         "MSRValue": "0x40",
         "Counter": "3",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
         "MSRIndex": "0x3F6",
         "Errata": "BDM100, BDM35",
-        "PublicDescription": "This event counts loads with latency value being above 64.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 64.",
         "TakenAlone": "1",
         "SampleAfterValue": "2003",
         "CounterHTOff": "3"
@@ -387,14 +387,14 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 128",
+        "BriefDescription": "Randomly selected loads with latency value being above 128",
         "PEBS": "2",
         "MSRValue": "0x80",
         "Counter": "3",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
         "MSRIndex": "0x3F6",
         "Errata": "BDM100, BDM35",
-        "PublicDescription": "This event counts loads with latency value being above 128.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 128.",
         "TakenAlone": "1",
         "SampleAfterValue": "1009",
         "CounterHTOff": "3"
@@ -402,14 +402,14 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 256",
+        "BriefDescription": "Randomly selected loads with latency value being above 256",
         "PEBS": "2",
         "MSRValue": "0x100",
         "Counter": "3",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
         "MSRIndex": "0x3F6",
         "Errata": "BDM100, BDM35",
-        "PublicDescription": "This event counts loads with latency value being above 256.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 256.",
         "TakenAlone": "1",
         "SampleAfterValue": "503",
         "CounterHTOff": "3"
@@ -417,14 +417,14 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 512",
+        "BriefDescription": "Randomly selected loads with latency value being above 512",
         "PEBS": "2",
         "MSRValue": "0x200",
         "Counter": "3",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
         "MSRIndex": "0x3F6",
         "Errata": "BDM100, BDM35",
-        "PublicDescription": "This event counts loads with latency value being above 512.",
+        "PublicDescription": "Counts randomly selected loads with latency value being above 512.",
         "TakenAlone": "1",
         "SampleAfterValue": "101",
         "CounterHTOff": "3"
@@ -433,12 +433,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all requests that miss in the L3",
-        "MSRValue": "0x3fbfc08fff",
+        "BriefDescription": "Counts all requests miss in the L3",
+        "MSRValue": "0x3FBFC08FFF",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all requests miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -446,12 +446,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache",
-        "MSRValue": "0x087fc007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and clean or shared data is transferred from remote cache",
+        "MSRValue": "0x087FC007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and clean or shared data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -459,12 +459,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache",
-        "MSRValue": "0x103fc007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103FC007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the modified data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -472,12 +472,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram",
-        "MSRValue": "0x063bc007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from remote dram",
+        "MSRValue": "0x063BC007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from remote dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -485,12 +485,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
-        "MSRValue": "0x06040007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x06040007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -498,12 +498,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
-        "MSRValue": "0x3fbfc007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss in the L3",
+        "MSRValue": "0x3FBFC007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -511,12 +511,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0604000244",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -524,12 +524,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
-        "MSRValue": "0x3fbfc00244",
+        "BriefDescription": "Counts all demand & prefetch code reads miss in the L3",
+        "MSRValue": "0x3FBFC00244",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -537,12 +537,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0604000122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -550,12 +550,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
-        "MSRValue": "0x3fbfc00122",
+        "BriefDescription": "Counts all demand & prefetch RFOs miss in the L3",
+        "MSRValue": "0x3FBFC00122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -563,12 +563,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache",
-        "MSRValue": "0x087fc00091",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and clean or shared data is transferred from remote cache",
+        "MSRValue": "0x087FC00091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and clean or shared data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -576,12 +576,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache",
-        "MSRValue": "0x103fc00091",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103FC00091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the modified data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -589,12 +589,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram",
-        "MSRValue": "0x063bc00091",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from remote dram",
+        "MSRValue": "0x063BC00091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from remote dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -602,12 +602,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0604000091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -615,12 +615,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
-        "MSRValue": "0x3fbfc00091",
+        "BriefDescription": "Counts all demand & prefetch data reads miss in the L3",
+        "MSRValue": "0x3FBFC00091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -628,12 +628,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
-        "MSRValue": "0x3fbfc00200",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3",
+        "MSRValue": "0x3FBFC00200",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -641,12 +641,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3",
-        "MSRValue": "0x3fbfc00100",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3",
+        "MSRValue": "0x3FBFC00100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -654,12 +654,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache",
-        "MSRValue": "0x103fc00002",
+        "BriefDescription": "Counts all demand data writes (RFOs) miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103FC00002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) miss the L3 and the modified data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -667,12 +667,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
-        "MSRValue": "0x3fbfc00002",
+        "BriefDescription": "Counts all demand data writes (RFOs) miss in the L3",
+        "MSRValue": "0x3FBFC00002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json
index 0d04bf9db0008..c2f6932a58173 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json
@@ -1,6 +1,5 @@
 [
     {
-        "EventCode": "0x00",
         "UMask": "0x1",
         "BriefDescription": "Instructions retired from execution.",
         "Counter": "Fixed counter 0",
@@ -10,7 +9,6 @@
         "CounterHTOff": "Fixed counter 0"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x2",
         "BriefDescription": "Core cycles when the thread is not in halt state",
         "Counter": "Fixed counter 1",
@@ -20,7 +18,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x2",
         "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "Counter": "Fixed counter 1",
@@ -30,7 +27,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x3",
         "BriefDescription": "Reference cycles when the core is not in halt state.",
         "Counter": "Fixed counter 2",
@@ -322,7 +318,7 @@
         "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
         "Counter": "0,1,2,3",
         "EventName": "ILD_STALL.LCP",
-        "PublicDescription": "This event counts stalls occurred due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+        "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -786,12 +782,12 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA2",
+        "EventCode": "0xa2",
         "UMask": "0x1",
         "BriefDescription": "Resource-related stall cycles",
         "Counter": "0,1,2,3",
         "EventName": "RESOURCE_STALLS.ANY",
-        "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+        "PublicDescription": "This event counts resource-related stall cycles.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1168,12 +1164,12 @@
     {
         "EventCode": "0xC2",
         "UMask": "0x1",
-        "BriefDescription": "Actually retired uops. (Precise Event - PEBS)",
+        "BriefDescription": "Actually retired uops.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "UOPS_RETIRED.ALL",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.",
+        "PublicDescription": "This event counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1204,11 +1200,11 @@
     {
         "EventCode": "0xC2",
         "UMask": "0x2",
-        "BriefDescription": "Retirement slots used. (Precise Event - PEBS)",
+        "BriefDescription": "Retirement slots used.",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.",
+        "PublicDescription": "This event counts the number of retirement slots used.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1266,33 +1262,33 @@
     {
         "EventCode": "0xC4",
         "UMask": "0x1",
-        "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)",
+        "BriefDescription": "Conditional branch instructions retired.",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.CONDITIONAL",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+        "PublicDescription": "This event counts conditional branch instructions retired.",
         "SampleAfterValue": "400009",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xC4",
         "UMask": "0x2",
-        "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.NEAR_CALL",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+        "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xC4",
         "UMask": "0x2",
-        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).",
+        "PublicDescription": "This event counts both direct and indirect macro near call instructions retired (captured in ring 3).",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1311,11 +1307,11 @@
     {
         "EventCode": "0xC4",
         "UMask": "0x8",
-        "BriefDescription": "Return instructions retired. (Precise Event - PEBS)",
+        "BriefDescription": "Return instructions retired.",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.NEAR_RETURN",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+        "PublicDescription": "This event counts return instructions retired.",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1332,11 +1328,11 @@
     {
         "EventCode": "0xC4",
         "UMask": "0x20",
-        "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)",
+        "BriefDescription": "Taken branch instructions retired.",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+        "PublicDescription": "This event counts taken branch instructions retired.",
         "SampleAfterValue": "400009",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1364,11 +1360,11 @@
     {
         "EventCode": "0xC5",
         "UMask": "0x1",
-        "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_MISP_RETIRED.CONDITIONAL",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+        "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
         "SampleAfterValue": "400009",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1386,22 +1382,22 @@
     {
         "EventCode": "0xC5",
         "UMask": "0x8",
-        "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)",
+        "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_MISP_RETIRED.RET",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.",
+        "PublicDescription": "This event counts mispredicted return instructions retired.",
         "SampleAfterValue": "100007",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xC5",
         "UMask": "0x20",
-        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+        "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
-        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
         "SampleAfterValue": "400009",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
-- 
GitLab


From e313477f7e76f23c3e2d45428f9456ba675bc702 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:39:49 -0700
Subject: [PATCH 0439/1861] perf vendor events intel: Update HaswellX events to
 v20

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/haswellx/cache.json   | 173 +++++++++---------
 .../pmu-events/arch/x86/haswellx/memory.json  | 172 ++++++++---------
 .../arch/x86/haswellx/pipeline.json           |  10 +-
 3 files changed, 177 insertions(+), 178 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/haswellx/cache.json b/tools/perf/pmu-events/arch/x86/haswellx/cache.json
index b2fbd617306ac..a9e62d4357af0 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/cache.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/cache.json
@@ -64,18 +64,18 @@
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x41",
+        "UMask": "0xc1",
         "BriefDescription": "Demand Data Read requests that hit L2 cache",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
         "Errata": "HSD78",
-        "PublicDescription": "Demand data read requests that hit L2 cache.",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
         "SampleAfterValue": "200003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x42",
+        "UMask": "0xc2",
         "BriefDescription": "RFO requests that hit L2 cache",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.RFO_HIT",
@@ -85,7 +85,7 @@
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x44",
+        "UMask": "0xc4",
         "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.CODE_RD_HIT",
@@ -95,7 +95,7 @@
     },
     {
         "EventCode": "0x24",
-        "UMask": "0x50",
+        "UMask": "0xd0",
         "BriefDescription": "L2 prefetch requests that hit L2 cache",
         "Counter": "0,1,2,3",
         "EventName": "L2_RQSTS.L2_PF_HIT",
@@ -416,7 +416,7 @@
     {
         "EventCode": "0xD0",
         "UMask": "0x11",
-        "BriefDescription": "Retired load uops that miss the STLB. (precise Event)",
+        "BriefDescription": "Retired load uops that miss the STLB.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -428,7 +428,7 @@
     {
         "EventCode": "0xD0",
         "UMask": "0x12",
-        "BriefDescription": "Retired store uops that miss the STLB. (precise Event)",
+        "BriefDescription": "Retired store uops that miss the STLB.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -441,7 +441,7 @@
     {
         "EventCode": "0xD0",
         "UMask": "0x21",
-        "BriefDescription": "Retired load uops with locked access. (precise Event)",
+        "BriefDescription": "Retired load uops with locked access.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -453,34 +453,32 @@
     {
         "EventCode": "0xD0",
         "UMask": "0x41",
-        "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
         "Errata": "HSD29, HSM30",
-        "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD0",
         "UMask": "0x42",
-        "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
         "Errata": "HSD29, HSM30",
         "L1_Hit_Indication": "1",
-        "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD0",
         "UMask": "0x81",
-        "BriefDescription": "All retired load uops. (precise Event)",
+        "BriefDescription": "All retired load uops.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -492,14 +490,13 @@
     {
         "EventCode": "0xD0",
         "UMask": "0x82",
-        "BriefDescription": "All retired store uops. (precise Event)",
+        "BriefDescription": "All retired store uops.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
         "Errata": "HSD29, HSM30",
         "L1_Hit_Indication": "1",
-        "PublicDescription": "This event counts all store uops retired. This is a precise event.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -530,13 +527,13 @@
     {
         "EventCode": "0xD1",
         "UMask": "0x4",
-        "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+        "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
         "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
-        "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.",
+        "PublicDescription": "Retired load uops with L3 cache hits as data sources.",
         "SampleAfterValue": "50021",
         "CounterHTOff": "0,1,2,3"
     },
@@ -549,19 +546,20 @@
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
         "Errata": "HSM30",
-        "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.",
+        "PublicDescription": "Retired load uops missed L1 cache as data sources.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD1",
         "UMask": "0x10",
-        "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+        "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
         "Errata": "HSD29, HSM30",
+        "PublicDescription": "Retired load uops missed L2. Unknown data source excluded.",
         "SampleAfterValue": "50021",
         "CounterHTOff": "0,1,2,3"
     },
@@ -574,6 +572,7 @@
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
         "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+        "PublicDescription": "Retired load uops missed L3. Excludes unknown data source .",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -604,26 +603,24 @@
     {
         "EventCode": "0xD2",
         "UMask": "0x2",
-        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ",
+        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
         "Errata": "HSD29, HSD25, HSM26, HSM30",
-        "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
         "SampleAfterValue": "20011",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD2",
         "UMask": "0x4",
-        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
         "Errata": "HSD29, HSD25, HSM26, HSM30",
-        "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
         "SampleAfterValue": "20011",
         "CounterHTOff": "0,1,2,3"
     },
@@ -642,19 +639,20 @@
     {
         "EventCode": "0xD3",
         "UMask": "0x1",
+        "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
         "Errata": "HSD74, HSD29, HSD25, HSM30",
-        "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+        "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xD3",
         "UMask": "0x4",
-        "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)",
+        "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI)",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -666,7 +664,7 @@
     {
         "EventCode": "0xD3",
         "UMask": "0x10",
-        "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)",
+        "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -678,7 +676,7 @@
     {
         "EventCode": "0xD3",
         "UMask": "0x20",
-        "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)",
+        "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache",
         "Data_LA": "1",
         "PEBS": "1",
         "Counter": "0,1,2,3",
@@ -833,7 +831,6 @@
         "BriefDescription": "Split locks in SQ",
         "Counter": "0,1,2,3",
         "EventName": "SQ_MISC.SPLIT_LOCK",
-        "PublicDescription": "",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -841,12 +838,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0001",
+        "BriefDescription": "Counts demand data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -854,12 +851,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c0001",
+        "BriefDescription": "Counts demand data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C0001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -867,12 +864,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0002",
+        "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -880,12 +877,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c0002",
+        "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C0002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -893,12 +890,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0004",
+        "BriefDescription": "Counts all demand code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -906,12 +903,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c0004",
+        "BriefDescription": "Counts all demand code reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C0004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -919,12 +916,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3",
-        "MSRValue": "0x3f803c0010",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3",
+        "MSRValue": "0x3F803C0010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -932,12 +929,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3",
-        "MSRValue": "0x3f803c0020",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3",
+        "MSRValue": "0x3F803C0020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -945,12 +942,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3",
-        "MSRValue": "0x3f803c0040",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads hit in the L3",
+        "MSRValue": "0x3F803C0040",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -958,12 +955,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3",
-        "MSRValue": "0x3f803c0080",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3",
+        "MSRValue": "0x3F803C0080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -971,12 +968,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3",
-        "MSRValue": "0x3f803c0100",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3",
+        "MSRValue": "0x3F803C0100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -984,12 +981,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
-        "MSRValue": "0x3f803c0200",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3",
+        "MSRValue": "0x3F803C0200",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -997,12 +994,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0091",
+        "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1010,12 +1007,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c0091",
+        "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C0091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1023,12 +1020,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0122",
+        "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1036,12 +1033,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c0122",
+        "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C0122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1049,12 +1046,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c0244",
+        "BriefDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C0244",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1062,12 +1059,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
-        "MSRValue": "0x04003c07f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "MSRValue": "0x04003C07F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1075,12 +1072,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
-        "MSRValue": "0x10003c07f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "MSRValue": "0x10003C07F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1088,12 +1085,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all requests that hit in the L3",
-        "MSRValue": "0x3f803c8fff",
+        "BriefDescription": "Counts all requests hit in the L3",
+        "MSRValue": "0x3F803C8FFF",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all requests hit in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/memory.json b/tools/perf/pmu-events/arch/x86/haswellx/memory.json
index 56b0f24b8029b..a42d5ce86b6f4 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/memory.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/memory.json
@@ -291,7 +291,7 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 4.",
+        "BriefDescription": "Randomly selected loads with latency value being above 4.",
         "PEBS": "2",
         "MSRValue": "0x4",
         "Counter": "3",
@@ -305,7 +305,7 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 8.",
+        "BriefDescription": "Randomly selected loads with latency value being above 8.",
         "PEBS": "2",
         "MSRValue": "0x8",
         "Counter": "3",
@@ -319,7 +319,7 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 16.",
+        "BriefDescription": "Randomly selected loads with latency value being above 16.",
         "PEBS": "2",
         "MSRValue": "0x10",
         "Counter": "3",
@@ -333,7 +333,7 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 32.",
+        "BriefDescription": "Randomly selected loads with latency value being above 32.",
         "PEBS": "2",
         "MSRValue": "0x20",
         "Counter": "3",
@@ -347,7 +347,7 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 64.",
+        "BriefDescription": "Randomly selected loads with latency value being above 64.",
         "PEBS": "2",
         "MSRValue": "0x40",
         "Counter": "3",
@@ -361,7 +361,7 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 128.",
+        "BriefDescription": "Randomly selected loads with latency value being above 128.",
         "PEBS": "2",
         "MSRValue": "0x80",
         "Counter": "3",
@@ -375,7 +375,7 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 256.",
+        "BriefDescription": "Randomly selected loads with latency value being above 256.",
         "PEBS": "2",
         "MSRValue": "0x100",
         "Counter": "3",
@@ -389,7 +389,7 @@
     {
         "EventCode": "0xCD",
         "UMask": "0x1",
-        "BriefDescription": "Loads with latency value being above 512.",
+        "BriefDescription": "Randomly selected loads with latency value being above 512.",
         "PEBS": "2",
         "MSRValue": "0x200",
         "Counter": "3",
@@ -404,12 +404,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that miss in the L3",
-        "MSRValue": "0x3fbfc00001",
+        "BriefDescription": "Counts demand data reads miss in the L3",
+        "MSRValue": "0x3FBFC00001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -417,12 +417,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts demand data reads miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0600400001",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -430,12 +430,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
-        "MSRValue": "0x3fbfc00002",
+        "BriefDescription": "Counts all demand data writes (RFOs) miss in the L3",
+        "MSRValue": "0x3FBFC00002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -443,12 +443,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand data writes (RFOs) miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0600400002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -456,12 +456,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache",
-        "MSRValue": "0x103fc00002",
+        "BriefDescription": "Counts all demand data writes (RFOs) miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103FC00002",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) miss the L3 and the modified data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -469,12 +469,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that miss in the L3",
-        "MSRValue": "0x3fbfc00004",
+        "BriefDescription": "Counts all demand code reads miss in the L3",
+        "MSRValue": "0x3FBFC00004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -482,12 +482,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand code reads miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0600400004",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -495,12 +495,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3",
-        "MSRValue": "0x3fbfc00010",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads miss in the L3",
+        "MSRValue": "0x3FBFC00010",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -508,12 +508,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3",
-        "MSRValue": "0x3fbfc00020",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs miss in the L3",
+        "MSRValue": "0x3FBFC00020",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -521,12 +521,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3",
-        "MSRValue": "0x3fbfc00040",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads miss in the L3",
+        "MSRValue": "0x3FBFC00040",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -534,12 +534,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3",
-        "MSRValue": "0x3fbfc00080",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads miss in the L3",
+        "MSRValue": "0x3FBFC00080",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -547,12 +547,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3",
-        "MSRValue": "0x3fbfc00100",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3",
+        "MSRValue": "0x3FBFC00100",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -560,12 +560,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
-        "MSRValue": "0x3fbfc00200",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3",
+        "MSRValue": "0x3FBFC00200",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -573,12 +573,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
-        "MSRValue": "0x3fbfc00091",
+        "BriefDescription": "Counts all demand & prefetch data reads miss in the L3",
+        "MSRValue": "0x3FBFC00091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -586,12 +586,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0600400091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -599,12 +599,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram",
-        "MSRValue": "0x063f800091",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from remote dram",
+        "MSRValue": "0x063F800091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from remote dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -612,12 +612,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache",
-        "MSRValue": "0x103fc00091",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103FC00091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the modified data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -625,12 +625,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache",
-        "MSRValue": "0x083fc00091",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and clean or shared data is transferred from remote cache",
+        "MSRValue": "0x083FC00091",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and clean or shared data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -638,12 +638,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
-        "MSRValue": "0x3fbfc00122",
+        "BriefDescription": "Counts all demand & prefetch RFOs miss in the L3",
+        "MSRValue": "0x3FBFC00122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -651,12 +651,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0600400122",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -664,12 +664,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
-        "MSRValue": "0x3fbfc00244",
+        "BriefDescription": "Counts all demand & prefetch code reads miss in the L3",
+        "MSRValue": "0x3FBFC00244",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -677,12 +677,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram",
         "MSRValue": "0x0600400244",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -690,12 +690,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
-        "MSRValue": "0x3fbfc007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss in the L3",
+        "MSRValue": "0x3FBFC007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -703,12 +703,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
-        "MSRValue": "0x06004007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from local dram",
+        "MSRValue": "0x06004007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from local dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -716,12 +716,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram",
-        "MSRValue": "0x063f8007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from remote dram",
+        "MSRValue": "0x063F8007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the data is returned from remote dram",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -729,12 +729,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache",
-        "MSRValue": "0x103fc007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the modified data is transferred from remote cache",
+        "MSRValue": "0x103FC007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and the modified data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -742,12 +742,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache",
-        "MSRValue": "0x083fc007f7",
+        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and clean or shared data is transferred from remote cache",
+        "MSRValue": "0x083FC007F7",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) miss the L3 and clean or shared data is transferred from remote cache",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     },
@@ -755,12 +755,12 @@
         "Offcore": "1",
         "EventCode": "0xB7, 0xBB",
         "UMask": "0x1",
-        "BriefDescription": "Counts all requests that miss in the L3",
-        "MSRValue": "0x3fbfc08fff",
+        "BriefDescription": "Counts all requests miss in the L3",
+        "MSRValue": "0x3FBFC08FFF",
         "Counter": "0,1,2,3",
         "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
-        "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all requests miss in the L3",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json
index 8a18bfe9e3e4d..26f2888341ee0 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json
@@ -1,6 +1,5 @@
 [
     {
-        "EventCode": "0x00",
         "UMask": "0x1",
         "BriefDescription": "Instructions retired from execution.",
         "Counter": "Fixed counter 0",
@@ -11,7 +10,6 @@
         "CounterHTOff": "Fixed counter 0"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x2",
         "BriefDescription": "Core cycles when the thread is not in halt state.",
         "Counter": "Fixed counter 1",
@@ -21,7 +19,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x2",
         "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "Counter": "Fixed counter 1",
@@ -31,7 +28,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "UMask": "0x3",
         "BriefDescription": "Reference cycles when the core is not in halt state.",
         "Counter": "Fixed counter 2",
@@ -1098,6 +1094,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "UOPS_RETIRED.ALL",
+        "PublicDescription": "Counts the number of micro-ops retired. Use Cmask=1 and invert to count active cycles or stalled cycles.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1142,6 +1139,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "This event counts the number of retirement slots used each cycle.  There are potentially 4 slots that can be used each cycle - meaning, 4 uops or 4 instructions could retire each cycle.",
         "SampleAfterValue": "2000003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1201,6 +1199,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "PublicDescription": "Counts the number of conditional branch instructions retired.",
         "SampleAfterValue": "400009",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1241,6 +1240,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PublicDescription": "Counts the number of near return instructions retired.",
         "SampleAfterValue": "100003",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1261,6 +1261,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "PublicDescription": "Number of near taken branches retired.",
         "SampleAfterValue": "400009",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
@@ -1312,6 +1313,7 @@
         "PEBS": "1",
         "Counter": "0,1,2,3",
         "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PublicDescription": "Number of near branch instructions retired that were taken but mispredicted.",
         "SampleAfterValue": "400009",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
-- 
GitLab


From 009edd9ae0e77e5cbc6d8767e02be15b4e8b55c5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:40:32 -0700
Subject: [PATCH 0440/1861] perf vendor events intel: Update IvyTown events to
 v20

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/x86/ivytown/pipeline.json | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json
index 0afbfd95ea306..2a0aad91d83d0 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json
@@ -1,6 +1,5 @@
 [
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 0",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
@@ -9,7 +8,6 @@
         "CounterHTOff": "Fixed counter 0"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
@@ -19,7 +17,6 @@
     },
     {
         "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "AnyThread": "1",
@@ -29,7 +26,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
-- 
GitLab


From e6b32be4455f57884bf2a312692c1f4135e35e02 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:40:51 -0700
Subject: [PATCH 0441/1861] perf vendor events intel: Update JakeTown events to
 v20

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/x86/jaketown/cache.json   |  6 +++---
 .../perf/pmu-events/arch/x86/jaketown/pipeline.json  | 12 ++++--------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/jaketown/cache.json b/tools/perf/pmu-events/arch/x86/jaketown/cache.json
index ee22e4a5e30d2..52dc6ef40e635 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/cache.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/cache.json
@@ -31,7 +31,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "This event counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
         "EventCode": "0xD0",
         "Counter": "0,1,2,3",
         "UMask": "0x41",
@@ -42,7 +42,7 @@
     },
     {
         "PEBS": "1",
-        "PublicDescription": "This event counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+        "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
         "EventCode": "0xD0",
         "Counter": "0,1,2,3",
         "UMask": "0x42",
@@ -179,7 +179,7 @@
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "This event counts L1D data line replacements.  Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.  ",
+        "PublicDescription": "This event counts L1D data line replacements.  Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.",
         "EventCode": "0x51",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json b/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json
index 34a519d9bfa04..783a5b4a67b19 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json
@@ -1,7 +1,6 @@
 [
     {
-        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ",
-        "EventCode": "0x00",
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers.",
         "Counter": "Fixed counter 1",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
@@ -10,8 +9,7 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
-        "EventCode": "0x00",
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
         "Counter": "Fixed counter 2",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
@@ -20,8 +18,7 @@
         "CounterHTOff": "Fixed counter 2"
     },
     {
-        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
-        "EventCode": "0x00",
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
         "Counter": "Fixed counter 3",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
@@ -778,7 +775,7 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store.  See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual.  The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
+        "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store.  See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual.  The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
         "EventCode": "0x03",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
@@ -1098,7 +1095,6 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x2",
         "AnyThread": "1",
-- 
GitLab


From 59da390e54a4bfa674c6a61727a5e0243ba0ad74 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:41:12 -0700
Subject: [PATCH 0442/1861] perf vendor events intel: Update SandyBridge events
 to v16

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/x86/sandybridge/cache.json           |  680 ++++-----
 .../arch/x86/sandybridge/floating-point.json  |  126 +-
 .../arch/x86/sandybridge/frontend.json        |  268 ++--
 .../arch/x86/sandybridge/memory.json          |   68 +-
 .../arch/x86/sandybridge/other.json           |   18 +-
 .../arch/x86/sandybridge/pipeline.json        | 1338 +++++++++--------
 .../arch/x86/sandybridge/virtual-memory.json  |  108 +-
 7 files changed, 1306 insertions(+), 1300 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/cache.json b/tools/perf/pmu-events/arch/x86/sandybridge/cache.json
index 16b04a20bc12c..bb79e89c2049d 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/cache.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/cache.json
@@ -1,207 +1,200 @@
 [
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x11",
-        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load uops that miss the STLB.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x12",
-        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired store uops that miss the STLB.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x3",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Demand Data Read requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x21",
-        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired load uops with locked access.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x4",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts line-split load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
-        "EventCode": "0xD0",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
-        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load uops that split across a cacheline boundary.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x8",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts line-split store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
-        "EventCode": "0xD0",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x42",
-        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired store uops that split across a cacheline boundary.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0xc",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFO requests to L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts the number of load uops retired",
-        "EventCode": "0xD0",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x81",
-        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "All retired load uops.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x10",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts the number of store uops retired.",
-        "EventCode": "0xD0",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x82",
-        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "All retired store uops.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x20",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 cache misses when fetching instructions.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD1",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x30",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "L2 code requests.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD1",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x40",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts retired load uops that hit in the last-level (L3) cache without snoops required.",
-        "EventCode": "0xD1",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
-        "SampleAfterValue": "50021",
-        "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x80",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD1",
+        "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0xc0",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Requests from L2 hardware prefetchers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD2",
+        "EventCode": "0x27",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "PEBS": "1",
-        "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package).  Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line.  In this case, a snoop was required, and another L2 had the line in a non-modified state.",
-        "EventCode": "0xD2",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
-        "CounterHTOff": "0,1,2,3"
+        "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that miss cache lines.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package).  Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line.  In this case, a snoop was required, and another L2 had the line in a modified state, so the line had to be invalidated in that L2 cache and transferred to the requesting L2.",
-        "EventCode": "0xD2",
+        "EventCode": "0x27",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
-        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
-        "CounterHTOff": "0,1,2,3"
+        "EventName": "L2_STORE_LOCK_RQSTS.HIT_E",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that hit cache lines in E state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD2",
+        "EventCode": "0x27",
         "Counter": "0,1,2,3",
         "UMask": "0x8",
-        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
-        "CounterHTOff": "0,1,2,3"
+        "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that hit cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts retired demand loads that missed the  last-level (L3) cache. This means that the load is usually satisfied from memory in a client system or possibly from the remote socket in a server. Demand loads are non speculative load uops.",
-        "EventCode": "0xD4",
+        "EventCode": "0x27",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired load uops with unknown information as data source in cache serviced the load.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0xf",
+        "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "RFOs that access cache lines in any state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts L1D data line replacements.  Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.  ",
-        "EventCode": "0x51",
+        "EventCode": "0x28",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "L1D.REPLACEMENT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "L1D data line replacements.",
+        "EventName": "L2_L1D_WB_RQSTS.MISS",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x51",
+        "EventCode": "0x28",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "L1D.ALLOCATED_IN_M",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Allocated L1D data cache lines in M state.",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_S",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in S state.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x51",
+        "EventCode": "0x28",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
-        "EventName": "L1D.EVICTION",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "L1D data cache lines in M state evicted due to replacement.",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x51",
+        "EventCode": "0x28",
         "Counter": "0,1,2,3",
         "UMask": "0x8",
-        "EventName": "L1D.ALL_M_REPLACEMENT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line replacement.",
+        "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x28",
+        "Counter": "0,1,2,3",
+        "UMask": "0xf",
+        "EventName": "L2_L1D_WB_RQSTS.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x41",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests missed LLC.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x2E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4f",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to LLC.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -224,12 +217,61 @@
         "CounterHTOff": "2"
     },
     {
-        "EventCode": "0x63",
+        "EventCode": "0x48",
+        "Counter": "2",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+        "CounterMask": "1",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0x48",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when L1D is locked.",
+        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts L1D data line replacements.  Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier.",
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "L1D.REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data line replacements.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "L1D.ALLOCATED_IN_M",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Allocated L1D data cache lines in M state.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "L1D.EVICTION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "L1D data cache lines in M state evicted due to replacement.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x51",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "L1D.ALL_M_REPLACEMENT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line replacement.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -251,6 +293,16 @@
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_C6",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterMask": "6",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0x60",
         "Counter": "0,1,2,3",
@@ -260,6 +312,16 @@
         "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0x60",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0x60",
         "Counter": "0,1,2,3",
@@ -279,6 +341,15 @@
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0x63",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when L1D is locked.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0xB0",
         "Counter": "0,1,2,3",
@@ -325,148 +396,182 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x24",
+        "EventCode": "0xBF",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand Data Read requests that hit L2 cache.",
+        "UMask": "0x5",
+        "EventName": "L1D_BLOCKS.BANK_CONFLICT_CYCLES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cycles when dispatched loads are cancelled due to L1D bank conflicts with other load ports.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x24",
+        "PEBS": "1",
+        "EventCode": "0xD0",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "L2_RQSTS.RFO_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests that hit L2 cache.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x11",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x24",
+        "PEBS": "1",
+        "EventCode": "0xD0",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "L2_RQSTS.RFO_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests that miss L2 cache.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x12",
+        "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "PEBS": "1",
+        "EventCode": "0xD0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x21",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x24",
+        "PEBS": "1",
+        "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K). (Precise Event - PEBS)",
+        "EventCode": "0xD0",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "L2_RQSTS.CODE_RD_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x41",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x24",
+        "PEBS": "1",
+        "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K). (Precise Event - PEBS)",
+        "EventCode": "0xD0",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "L2_RQSTS.CODE_RD_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 cache misses when fetching instructions.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x42",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x24",
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of load uops retired (Precise Event)",
+        "EventCode": "0xD0",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "L2_RQSTS.PF_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x81",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired load uops. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x24",
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of store uops retired. (Precise Event - PEBS)",
+        "EventCode": "0xD0",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "L2_RQSTS.PF_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x82",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "All retired store uops. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x27",
+        "PEBS": "1",
+        "EventCode": "0xD1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "L2_STORE_LOCK_RQSTS.MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFOs that miss cache lines.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x27",
+        "PEBS": "1",
+        "EventCode": "0xD1",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "L2_STORE_LOCK_RQSTS.HIT_E",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFOs that hit cache lines in E state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x27",
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops that hit in the last-level (L3) cache without snoops required. (Precise Event - PEBS)",
+        "EventCode": "0xD1",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFOs that hit cache lines in M state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+        "SampleAfterValue": "50021",
+        "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x27",
+        "PEBS": "1",
+        "EventCode": "0xD1",
         "Counter": "0,1,2,3",
-        "UMask": "0xf",
-        "EventName": "L2_STORE_LOCK_RQSTS.ALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFOs that access cache lines in any state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x40",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x28",
+        "PEBS": "1",
+        "EventCode": "0xD2",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "L2_L1D_WB_RQSTS.MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x28",
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package).  Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line.  In this case, a snoop was required, and another L2 had the line in a non-modified state. (Precise Event - PEBS)",
+        "EventCode": "0xD2",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "L2_L1D_WB_RQSTS.HIT_S",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in S state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x28",
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package).  Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line.  In this case, a snoop was required, and another L2 had the line in a modified state, so the line had to be invalidated in that L2 cache and transferred to the requesting L2. (Precise Event - PEBS)",
+        "EventCode": "0xD2",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
-        "EventName": "L2_L1D_WB_RQSTS.HIT_E",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x28",
+        "PEBS": "1",
+        "EventCode": "0xD2",
         "Counter": "0,1,2,3",
         "UMask": "0x8",
-        "EventName": "L2_L1D_WB_RQSTS.HIT_M",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x28",
+        "PEBS": "1",
+        "PublicDescription": "This event counts retired demand loads that missed the  last-level (L3) cache. This means that the load is usually satisfied from memory in a client system or possibly from the remote socket in a server. Demand loads are non speculative load uops. (Precise Event - PEBS)",
+        "EventCode": "0xD4",
         "Counter": "0,1,2,3",
-        "UMask": "0xf",
-        "EventName": "L2_L1D_WB_RQSTS.ALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2",
+        "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Retired load uops with unknown information as data source in cache serviced the load. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
         "EventCode": "0xF0",
@@ -622,24 +727,6 @@
         "BriefDescription": "Dirty L2 cache lines filling the L2.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
-    {
-        "EventCode": "0x2E",
-        "Counter": "0,1,2,3",
-        "UMask": "0x41",
-        "EventName": "LONGEST_LAT_CACHE.MISS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Core-originated cacheable demand requests missed LLC.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x2E",
-        "Counter": "0,1,2,3",
-        "UMask": "0x4f",
-        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Core-originated cacheable demand requests that refer to LLC.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
     {
         "EventCode": "0xF4",
         "Counter": "0,1,2,3",
@@ -649,93 +736,6 @@
         "BriefDescription": "Split locks in SQ.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
-    {
-        "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0x3",
-        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand Data Read requests.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0xc",
-        "EventName": "L2_RQSTS.ALL_RFO",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests to L2 cache.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EventName": "L2_RQSTS.ALL_CODE_RD",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 code requests.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0xc0",
-        "EventName": "L2_RQSTS.ALL_PF",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from L2 hardware prefetchers.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xBF",
-        "Counter": "0,1,2,3",
-        "UMask": "0x5",
-        "EventName": "L1D_BLOCKS.BANK_CONFLICT_CYCLES",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles when dispatched loads are cancelled due to L1D bank conflicts with other load ports.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x60",
-        "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x60",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_C6",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
-        "CounterMask": "6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x48",
-        "Counter": "2",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
-        "CounterMask": "1",
-        "CounterHTOff": "2"
-    },
-    {
-        "EventCode": "0x48",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "L1D_PEND_MISS.FB_FULL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
     {
         "EventCode": "0xB7, 0xBB",
         "MSRValue": "0x10003c0244",
@@ -1825,7 +1825,7 @@
         "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = DATA_INTO_CORE and RESPONSE = ANY_RESPONSE",
+        "BriefDescription": "REQUEST = DATA_INTO_CORE and RESPONSE = ANY_RESPONSE",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -1837,7 +1837,7 @@
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_M.HITM",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_M and SNOOP = HITM",
+        "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_M and SNOOP = HITM",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -1849,7 +1849,7 @@
         "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = PF_RFO and RESPONSE = ANY_RESPONSE",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_RESPONSE",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -1861,7 +1861,7 @@
         "EventName": "OFFCORE_RESPONSE.PF_L_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = PF_LLC_DATA_RD and RESPONSE = ANY_RESPONSE",
+        "BriefDescription": "REQUEST = PF_LLC_DATA_RD and RESPONSE = ANY_RESPONSE",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -1873,7 +1873,7 @@
         "EventName": "OFFCORE_RESPONSE.PF_L_IFETCH.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = PF_LLC_IFETCH and RESPONSE = ANY_RESPONSE",
+        "BriefDescription": "REQUEST = PF_LLC_IFETCH and RESPONSE = ANY_RESPONSE",
         "CounterHTOff": "0,1,2,3"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json b/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json
index 982eda48785ec..ce26537c7d47f 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json
@@ -1,67 +1,4 @@
 [
-    {
-        "EventCode": "0xC1",
-        "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "OTHER_ASSISTS.AVX_STORE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xC1",
-        "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xC1",
-        "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xCA",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "FP_ASSIST.X87_OUTPUT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of X87 assists due to output value.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xCA",
-        "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "FP_ASSIST.X87_INPUT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of X87 assists due to input value.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xCA",
-        "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "FP_ASSIST.SIMD_OUTPUT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of SIMD FP assists due to Output values.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xCA",
-        "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "FP_ASSIST.SIMD_INPUT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of SIMD FP assists due to input values.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
     {
         "EventCode": "0x10",
         "Counter": "0,1,2,3",
@@ -125,6 +62,69 @@
         "BriefDescription": "Number of AVX-256 Computational FP double precision uops issued this cycle.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "OTHER_ASSISTS.AVX_STORE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x20",
+        "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "FP_ASSIST.X87_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to output value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "FP_ASSIST.X87_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of X87 assists due to input value.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "FP_ASSIST.SIMD_OUTPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to Output values.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xCA",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "FP_ASSIST.SIMD_INPUT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of SIMD FP assists due to input values.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json b/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json
index 1b7b1dd36c68e..e58ed14a204cc 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json
@@ -1,23 +1,4 @@
 [
-    {
-        "EventCode": "0x80",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ICACHE.HIT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes unchacheable accesses.",
-        "EventCode": "0x80",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "ICACHE.MISSES",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Instruction cache, streaming buffer and victim cache misses.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
     {
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
@@ -39,158 +20,200 @@
     {
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "IDQ.DSB_UOPS",
+        "UMask": "0x4",
+        "EventName": "IDQ.MITE_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "IDQ.MS_DSB_UOPS",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "IDQ.MS_MITE_UOPS",
+        "UMask": "0x8",
+        "EventName": "IDQ.DSB_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EventName": "IDQ.MS_UOPS",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the front-end in delivering uops.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.  See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more information.",
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EventName": "IDQ.MS_CYCLES",
+        "UMask": "0x10",
+        "EventName": "IDQ.MS_DSB_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the number of uops not delivered to the back-end per cycle, per thread, when the back-end was not stalled.  In the ideal case 4 uops can be delivered each cycle.  The event counts the undelivered uops - so if 3 were delivered in one cycle, the counter would be incremented by 1 for that cycle (4 - 3). If the back-end is stalled, the count for this event is not incremented even when uops were not delivered, because the back-end would not have been able to accept them.  This event is used in determining the front-end bound category of the top-down pipeline slots characterization.",
-        "EventCode": "0x9C",
+        "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "UMask": "0x10",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_DSB_OCCUR",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled .",
-        "CounterHTOff": "0,1,2,3"
+        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x9C",
+        "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops.",
         "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x9C",
+        "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "UMask": "0x18",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
-        "CounterMask": "3",
-        "CounterHTOff": "0,1,2,3"
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xAB",
+        "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "UMask": "0x20",
+        "EventName": "IDQ.MS_MITE_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches.",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the cycles attributed to a switch from the Decoded Stream Buffer (DSB), which holds decoded instructions, to the legacy decode pipeline.  It excludes cycles when the back-end cannot  accept new micro-ops.  The penalty for these switches is potentially several cycles of instruction starvation, where no micro-ops are delivered to the back-end.",
-        "EventCode": "0xAB",
+        "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops.",
+        "CounterMask": "4",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xAC",
+        "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "DSB_FILL.OTHER_CANCEL",
+        "UMask": "0x24",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cases of cancelling valid DSB fill not because of exceeding way limit.",
+        "BriefDescription": "Cycles MITE is delivering any Uop.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xAC",
+        "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines.",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the front-end in delivering uops.  Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder.  Using other instructions, if possible, will usually improve performance.  See the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual for more information.",
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "IDQ.MITE_CYCLES",
+        "UMask": "0x30",
+        "EventName": "IDQ.MS_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "IDQ.DSB_CYCLES",
+        "UMask": "0x30",
+        "EdgeDetect": "1",
+        "EventName": "IDQ.MS_SWITCHES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x79",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "IDQ.MS_DSB_CYCLES",
+        "UMask": "0x3c",
+        "EventName": "IDQ.MITE_ALL_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
-        "CounterMask": "1",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x79",
+        "EventCode": "0x80",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EdgeDetect": "1",
-        "EventName": "IDQ.MS_DSB_OCCUR",
+        "UMask": "0x1",
+        "EventName": "ICACHE.HIT",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
-        "CounterMask": "1",
+        "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes unchacheable accesses.",
+        "EventCode": "0x80",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "ICACHE.MISSES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Instruction cache, streaming buffer and victim cache misses.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "PublicDescription": "This event counts the number of uops not delivered to the back-end per cycle, per thread, when the back-end was not stalled.  In the ideal case 4 uops can be delivered each cycle.  The event counts the undelivered uops - so if 3 were delivered in one cycle, the counter would be incremented by 1 for that cycle (4 - 3). If the back-end is stalled, the count for this event is not incremented even when uops were not delivered, because the back-end would not have been able to accept them.  This event is used in determining the front-end bound category of the top-down pipeline slots characterization.",
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled .",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0x9C",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+        "CounterMask": "3",
+        "CounterHTOff": "0,1,2,3"
+    },
     {
         "EventCode": "0x9C",
         "Counter": "0,1,2,3",
@@ -223,83 +246,60 @@
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x79",
-        "Counter": "0,1,2,3",
-        "UMask": "0x18",
-        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x79",
+        "EventCode": "0x9C",
+        "Invert": "1",
         "Counter": "0,1,2,3",
-        "UMask": "0x18",
-        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "UMask": "0x1",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop.",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x79",
+        "EventCode": "0xAB",
         "Counter": "0,1,2,3",
-        "UMask": "0x24",
-        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "UMask": "0x1",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles MITE is delivering 4 Uops.",
-        "CounterMask": "4",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x79",
+        "PublicDescription": "This event counts the cycles attributed to a switch from the Decoded Stream Buffer (DSB), which holds decoded instructions, to the legacy decode pipeline.  It excludes cycles when the back-end cannot  accept new micro-ops.  The penalty for these switches is potentially several cycles of instruction starvation, where no micro-ops are delivered to the back-end.",
+        "EventCode": "0xAB",
         "Counter": "0,1,2,3",
-        "UMask": "0x24",
-        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "UMask": "0x2",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles MITE is delivering any Uop.",
-        "CounterMask": "1",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xAC",
         "Counter": "0,1,2,3",
-        "UMask": "0xa",
-        "EventName": "DSB_FILL.ALL_CANCEL",
+        "UMask": "0x2",
+        "EventName": "DSB_FILL.OTHER_CANCEL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding way limit.",
+        "BriefDescription": "Cases of cancelling valid DSB fill not because of exceeding way limit.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x9C",
-        "Invert": "1",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
-        "EventCode": "0x79",
+        "EventCode": "0xAC",
         "Counter": "0,1,2,3",
-        "UMask": "0x3c",
-        "EventName": "IDQ.MITE_ALL_UOPS",
+        "UMask": "0x8",
+        "EventName": "DSB_FILL.EXCEED_DSB_LINES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+        "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x79",
+        "EventCode": "0xAC",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EdgeDetect": "1",
-        "EventName": "IDQ.MS_SWITCHES",
+        "UMask": "0xa",
+        "EventName": "DSB_FILL.ALL_CANCEL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
-        "CounterMask": "1",
+        "BriefDescription": "Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding way limit.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/memory.json b/tools/perf/pmu-events/arch/x86/sandybridge/memory.json
index e6dfa89d00f3f..78c1a987f9a22 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/memory.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/memory.json
@@ -1,4 +1,31 @@
 [
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "MISALIGN_MEM_REF.LOADS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x05",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "MISALIGN_MEM_REF.STORES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xBE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "PAGE_WALKS.LLC_MISS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of any page walk that had a miss in LLC. Does not necessary cause a SUSPEND.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from memory disambiguation, external snoops, or cross SMT-HW-thread snoop (stores) hitting load buffers.  Machine clears can have a significant performance impact if they are happening frequently.",
         "EventCode": "0xC3",
@@ -125,33 +152,6 @@
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
-    {
-        "EventCode": "0xBE",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "PAGE_WALKS.LLC_MISS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of any page walk that had a miss in LLC. Does not necessary cause a SUSPEND.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x05",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MISALIGN_MEM_REF.LOADS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x05",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "MISALIGN_MEM_REF.STORES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
     {
         "EventCode": "0xB7, 0xBB",
         "MSRValue": "0x300400244",
@@ -367,7 +367,7 @@
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_MISS_LOCAL.DRAM",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = ANY_REQUEST and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -379,7 +379,7 @@
         "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS_LOCAL.ANY_LLC_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = DATA_IN_SOCKET and RESPONSE = LLC_MISS_LOCAL and SNOOP = ANY_LLC_HIT",
+        "BriefDescription": "REQUEST = DATA_IN_SOCKET and RESPONSE = LLC_MISS_LOCAL and SNOOP = ANY_LLC_HIT",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -391,7 +391,7 @@
         "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_MISS_LOCAL.DRAM",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = DEMAND_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -403,7 +403,7 @@
         "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_MISS_LOCAL.DRAM",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = PF_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -415,7 +415,7 @@
         "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_MISS_LOCAL.DRAM",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = PF_RFO and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -427,7 +427,7 @@
         "EventName": "OFFCORE_RESPONSE.PF_L_DATA_RD.LLC_MISS_LOCAL.DRAM",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = PF_LLC_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "BriefDescription": "REQUEST = PF_LLC_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -439,7 +439,7 @@
         "EventName": "OFFCORE_RESPONSE.PF_L_IFETCH.LLC_MISS_LOCAL.DRAM",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": " REQUEST = PF_LLC_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+        "BriefDescription": "REQUEST = PF_LLC_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
         "CounterHTOff": "0,1,2,3"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/other.json b/tools/perf/pmu-events/arch/x86/sandybridge/other.json
index 64b195b82c502..874eb40a2e0f7 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/other.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/other.json
@@ -8,6 +8,15 @@
         "BriefDescription": "Valid instructions written to IQ per cycle.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
+    {
+        "EventCode": "0x4E",
+        "Counter": "0,1,2,3",
+        "UMask": "0x2",
+        "EventName": "HW_PRE_REQ.DL1_MISS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Hardware Prefetch requests that miss the L1D cache. This accounts for both L1 streamer and IP-based (IPP) HW prefetchers. A request is being counted each time it access the cache & miss it, including if a block is applicable or if hit the Fill Buffer for .",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
     {
         "EventCode": "0x5C",
         "Counter": "0,1,2,3",
@@ -37,15 +46,6 @@
         "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
-    {
-        "EventCode": "0x4E",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "HW_PRE_REQ.DL1_MISS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Hardware Prefetch requests that miss the L1D cache. This accounts for both L1 streamer and IP-based (IPP) HW prefetchers. A request is being counted each time it access the cache & miss it, including if a block is applicable or if hit the Fill Buffer for .",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
     {
         "EventCode": "0x63",
         "Counter": "0,1,2,3",
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json b/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json
index 34a519d9bfa04..b7150f65f16d6 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json
@@ -1,289 +1,307 @@
 [
     {
-        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ",
-        "EventCode": "0x00",
-        "Counter": "Fixed counter 1",
+        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "Counter": "Fixed counter 2",
+        "UMask": "0x3",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "CounterHTOff": "Fixed counter 2"
+    },
+    {
+        "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers.",
+        "Counter": "Fixed counter 0",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
         "SampleAfterValue": "2000003",
         "BriefDescription": "Instructions retired from execution.",
-        "CounterHTOff": "Fixed counter 1"
+        "CounterHTOff": "Fixed counter 0"
     },
     {
-        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
-        "EventCode": "0x00",
-        "Counter": "Fixed counter 2",
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+        "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
         "SampleAfterValue": "2000003",
         "BriefDescription": "Core cycles when the thread is not in halt state.",
-        "CounterHTOff": "Fixed counter 2"
+        "CounterHTOff": "Fixed counter 1"
     },
     {
-        "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
-        "EventCode": "0x00",
-        "Counter": "Fixed counter 3",
-        "UMask": "0x3",
-        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "Counter": "Fixed counter 1",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Reference cycles when the core is not in halt state.",
-        "CounterHTOff": "Fixed counter 3"
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x03",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
-        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Not taken macro-conditional branches.",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Loads delayed due to SB blocks, preceding store operations with known addresses but unknown data.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store.  See the table of not supported store forwards in the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual.  The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
+        "EventCode": "0x03",
         "Counter": "0,1,2,3",
-        "UMask": "0x81",
-        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+        "UMask": "0x2",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x03",
         "Counter": "0,1,2,3",
-        "UMask": "0x82",
-        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x03",
         "Counter": "0,1,2,3",
-        "UMask": "0x84",
-        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+        "UMask": "0x10",
+        "EventName": "LD_BLOCKS.ALL_BLOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of cases where any load ends up with a valid block-code written to the load buffer (including blocks due to Memory Order Buffer (MOB), Data Cache Unit (DCU), TLB, but load has no DCU miss).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K.  This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline.  The enhanced address check typically has a performance penalty of 5 cycles.",
+        "EventCode": "0x07",
         "Counter": "0,1,2,3",
-        "UMask": "0x88",
-        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+        "UMask": "0x1",
+        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "False dependencies in MOB due to partial compare.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x07",
         "Counter": "0,1,2,3",
-        "UMask": "0x90",
-        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired direct near calls.",
+        "UMask": "0x8",
+        "EventName": "LD_BLOCKS_PARTIAL.ALL_STA_BLOCK",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x0D",
         "Counter": "0,1,2,3",
-        "UMask": "0xa0",
-        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired indirect calls.",
+        "UMask": "0x3",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x0D",
         "Counter": "0,1,2,3",
-        "UMask": "0xc1",
-        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired macro-conditional branches.",
+        "UMask": "0x3",
+        "EdgeDetect": "1",
+        "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x0D",
         "Counter": "0,1,2,3",
-        "UMask": "0xc2",
-        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+        "UMask": "0x3",
+        "AnyThread": "1",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x0D",
         "Counter": "0,1,2,3",
-        "UMask": "0xc4",
-        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+        "UMask": "0x40",
+        "EventName": "INT_MISC.RAT_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "PublicDescription": "This event counts the number of Uops issued by the front-end of the pipeilne to the back-end.",
+        "EventCode": "0x0E",
         "Counter": "0,1,2,3",
-        "UMask": "0xc8",
-        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired indirect return branches.",
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "EventCode": "0x0E",
+        "Invert": "1",
         "Counter": "0,1,2,3",
-        "UMask": "0xd0",
-        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired direct near calls.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x89",
+        "EventCode": "0x0E",
+        "Invert": "1",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
-        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x89",
+        "EventCode": "0x14",
         "Counter": "0,1,2,3",
-        "UMask": "0x81",
-        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+        "UMask": "0x1",
+        "EventName": "ARITH.FPU_DIV_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles when divider is busy executing divide operations.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x89",
+        "PublicDescription": "This event counts the number of the divide operations executed.",
+        "EventCode": "0x14",
         "Counter": "0,1,2,3",
-        "UMask": "0x84",
-        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+        "UMask": "0x1",
+        "EdgeDetect": "1",
+        "EventName": "ARITH.FPU_DIV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Divide operations executed.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x89",
+        "EventCode": "0x3C",
         "Counter": "0,1,2,3",
-        "UMask": "0x88",
-        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+        "UMask": "0x0",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Thread cycles when thread is not in halt state.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x89",
+        "EventCode": "0x3C",
         "Counter": "0,1,2,3",
-        "UMask": "0x90",
-        "EventName": "BR_MISP_EXEC.TAKEN_DIRECT_NEAR_CALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired mispredicted direct near calls.",
+        "UMask": "0x0",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x89",
+        "EventCode": "0x3C",
         "Counter": "0,1,2,3",
-        "UMask": "0xa0",
-        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x89",
+        "EventCode": "0x3C",
         "Counter": "0,1,2,3",
-        "UMask": "0xc1",
-        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x89",
+        "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+        "EventCode": "0x3C",
         "Counter": "0,1,2,3",
-        "UMask": "0xc4",
-        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+        "UMask": "0x1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x89",
+        "EventCode": "0x3C",
         "Counter": "0,1,2,3",
-        "UMask": "0xd0",
-        "EventName": "BR_MISP_EXEC.ALL_DIRECT_NEAR_CALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired mispredicted direct near calls.",
+        "UMask": "0x1",
+        "AnyThread": "1",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0x3C",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Thread cycles when thread is not in halt state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xA8",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.UOPS",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of Uops delivered by the LSD.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0xA8",
+        "EventCode": "0x3C",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.CYCLES_ACTIVE",
+        "UMask": "0x2",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
-        "CounterMask": "1",
+        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x87",
+        "EventCode": "0x4C",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "ILD_STALL.LCP",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x87",
+        "EventCode": "0x4C",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "ILD_STALL.IQ_FULL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Stall cycles because IQ is full.",
+        "UMask": "0x2",
+        "EventName": "LOAD_HIT_PRE.HW_PF",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x0D",
+        "EventCode": "0x59",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "INT_MISC.RAT_STALL_CYCLES",
+        "UMask": "0x20",
+        "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread.",
+        "BriefDescription": "Increments the number of flags-merge uops in flight each cycle.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "PublicDescription": "This event counts the number of cycles spent executing performance-sensitive flags-merging uops. For example, shift CL (merge_arith_flags). For more details, See the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual.",
         "EventCode": "0x59",
         "Counter": "0,1,2,3",
         "UMask": "0x20",
-        "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP",
+        "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Increments the number of flags-merge uops in flight each cycle.",
+        "BriefDescription": "Performance sensitive flags-merging uops added by Sandy Bridge u-arch.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the number of cycles with at least one slow LEA uop being allocated. A uop is generally considered as slow LEA if it has three sources (for example, two sources and immediate) regardless of whether it is a result of LEA instruction or not. Examples of the slow LEA uop are or uops with base, index, and offset source operands using base and index reqisters, where base is EBR/RBP/R13, using RIP relative or 16-bit addressing modes. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more details about slow LEA instructions.",
+        "PublicDescription": "This event counts the number of cycles with at least one slow LEA uop being allocated. A uop is generally considered as slow LEA if it has three sources (for example, two sources and immediate) regardless of whether it is a result of LEA instruction or not. Examples of the slow LEA uop are or uops with base, index, and offset source operands using base and index reqisters, where base is EBR/RBP/R13, using RIP relative or 16-bit addressing modes. See the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual for more details about slow LEA instructions.",
         "EventCode": "0x59",
         "Counter": "0,1,2,3",
         "UMask": "0x40",
@@ -302,48 +320,21 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA2",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "RESOURCE_STALLS.ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Resource-related stall cycles.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xA2",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "RESOURCE_STALLS.LB",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the cycles of stall due to lack of load buffers.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xA2",
-        "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "RESOURCE_STALLS.RS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xA2",
+        "EventCode": "0x5B",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "RESOURCE_STALLS.SB",
+        "UMask": "0xc",
+        "EventName": "RESOURCE_STALLS2.ALL_FL_EMPTY",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+        "BriefDescription": "Cycles with either free list is empty.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA2",
+        "EventCode": "0x5B",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "RESOURCE_STALLS.ROB",
+        "UMask": "0xf",
+        "EventName": "RESOURCE_STALLS2.ALL_PRF_CONTROL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles stalled due to re-order buffer full.",
+        "BriefDescription": "Resource stalls2 control structures full for physical registers.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -356,577 +347,609 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the number of Uops issued by the front-end of the pipeilne to the back-end.",
-        "EventCode": "0x0E",
+        "EventCode": "0x5B",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_ISSUED.ANY",
+        "UMask": "0x4f",
+        "EventName": "RESOURCE_STALLS2.OOO_RSRC",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS).",
+        "BriefDescription": "Resource stalls out of order resources full.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x0E",
-        "Invert": "1",
+        "EventCode": "0x5E",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3"
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x0E",
+        "EventCode": "0x5E",
         "Invert": "1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+        "EdgeDetect": "1",
+        "EventName": "RS_EVENTS.EMPTY_END",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x5E",
+        "EventCode": "0x87",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "EventName": "ILD_STALL.LCP",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xCC",
+        "EventCode": "0x87",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "UMask": "0x4",
+        "EventName": "ILD_STALL.IQ_FULL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Count cases of saving new LBR.",
+        "BriefDescription": "Stall cycles because IQ is full.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear.  Machine clears can have a significant performance impact if they are happening frequently.",
-        "EventCode": "0xC3",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MACHINE_CLEARS.SMC",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "UMask": "0x41",
+        "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken macro-conditional branches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
-        "EventCode": "0xC3",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "MACHINE_CLEARS.MASKMOV",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+        "UMask": "0x81",
+        "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC0",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "EventName": "INST_RETIRED.ANY_P",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of instructions retired. General Counter   - architectural event.",
+        "UMask": "0x82",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts the number of micro-ops retired.",
-        "EventCode": "0xC2",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_RETIRED.ALL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Actually retired uops.",
+        "UMask": "0x84",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts the number of retirement slots used each cycle.  There are potentially 4 slots that can be used each cycle - meaning, 4 micro-ops or 4 instructions could retire each cycle.  This event is used in determining the 'Retiring' category of the Top-Down pipeline slots characterization.",
-        "EventCode": "0xC2",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Retirement slots used.",
+        "UMask": "0x88",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC2",
-        "Invert": "1",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_RETIRED.STALL_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles without actually retired uops.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x90",
+        "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired direct near calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC2",
-        "Invert": "1",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 10 actually retired uops.",
-        "CounterMask": "10",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0xa0",
+        "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired indirect calls.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC4",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "BR_INST_RETIRED.CONDITIONAL",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Conditional branch instructions retired.",
+        "UMask": "0xc1",
+        "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-conditional branches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC4",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "BR_INST_RETIRED.NEAR_CALL",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Direct and indirect near call instructions retired.",
+        "UMask": "0xc2",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC4",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "All (macro) branch instructions retired.",
+        "UMask": "0xc4",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC4",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Return instructions retired.",
+        "UMask": "0xc8",
+        "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired indirect return branches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC4",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Not taken branch instructions retired.",
+        "UMask": "0xd0",
+        "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired direct near calls.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC4",
+        "EventCode": "0x88",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Taken branch instructions retired.",
+        "UMask": "0xff",
+        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired  branches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC4",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Far branch instructions retired.",
+        "UMask": "0x41",
+        "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "2",
-        "EventCode": "0xC4",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS).",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x81",
+        "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC5",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted conditional branch instructions retired.",
+        "UMask": "0x84",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC5",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Direct and indirect mispredicted near call instructions retired.",
+        "UMask": "0x88",
+        "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC5",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "All mispredicted macro branch instructions retired.",
+        "UMask": "0x90",
+        "EventName": "BR_MISP_EXEC.TAKEN_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted direct near calls.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC5",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "BR_MISP_RETIRED.NOT_TAKEN",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted not taken branch instructions retired.",
+        "UMask": "0xa0",
+        "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC5",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "BR_MISP_RETIRED.TAKEN",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted taken branch instructions retired.",
+        "UMask": "0xc1",
+        "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
-        "EventCode": "0xC5",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS).",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0xc4",
+        "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC1",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "OTHER_ASSISTS.ITLB_MISS_RETIRED",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired instructions experiencing ITLB misses.",
+        "UMask": "0xd0",
+        "EventName": "BR_MISP_EXEC.ALL_DIRECT_NEAR_CALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted direct near calls.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x14",
+        "EventCode": "0x89",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ARITH.FPU_DIV_ACTIVE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when divider is busy executing divide operations.",
+        "UMask": "0xff",
+        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts the number of the divide operations executed.",
-        "EventCode": "0x14",
+        "EventCode": "0xA1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EdgeDetect": "1",
-        "EventName": "ARITH.FPU_DIV",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Divide operations executed.",
-        "CounterMask": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 0.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xB1",
+        "EventCode": "0xA1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "UOPS_DISPATCHED.THREAD",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops dispatched per thread.",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 0.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xB1",
+        "EventCode": "0xA1",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "UOPS_DISPATCHED.CORE",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops dispatched from any thread.",
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 1.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xA1",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "UMask": "0x2",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are dispatched to port 0.",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 1.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xA1",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "UMask": "0xc",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are dispatched to port 1.",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xA1",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "UMask": "0xc",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are dispatched to port 4.",
+        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 2.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
         "EventCode": "0xA1",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "UMask": "0x30",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are dispatched to port 5.",
+        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA3",
+        "EventCode": "0xA1",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_DISPATCH",
+        "UMask": "0x30",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Each cycle there was no dispatch for this thread, increment by 1. Note this is connect to Umask 2. No dispatch can be deduced from the UOPS_EXECUTED event.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3"
+        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA3",
-        "Counter": "2",
-        "UMask": "0x2",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x40",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Each cycle there was a miss-pending demand load this thread, increment by 1. Note this is in DCU and connected to Umask 1. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
-        "CounterMask": "2",
-        "CounterHTOff": "2"
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 4.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA3",
+        "EventCode": "0xA1",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "UMask": "0x40",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Each cycle there was a MLC-miss pending demand load this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0.",
-        "CounterMask": "1",
+        "BriefDescription": "Cycles per core when uops are dispatched to port 4.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA3",
-        "Counter": "2",
-        "UMask": "0x6",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "EventCode": "0xA1",
+        "Counter": "0,1,2,3",
+        "UMask": "0x80",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Each cycle there was a miss-pending demand load this thread and no uops dispatched, increment by 1. Note this is in DCU and connected to Umask 1 and 2. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
-        "CounterMask": "6",
-        "CounterHTOff": "2"
+        "BriefDescription": "Cycles per thread when uops are dispatched to port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA3",
+        "EventCode": "0xA1",
         "Counter": "0,1,2,3",
-        "UMask": "0x5",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "UMask": "0x80",
+        "AnyThread": "1",
+        "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Each cycle there was a MLC-miss pending demand load and no uops dispatched on this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0 and 2.",
-        "CounterMask": "5",
-        "CounterHTOff": "0,1,2,3"
+        "BriefDescription": "Cycles per core when uops are dispatched to port 5.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x4C",
+        "EventCode": "0xA2",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "LOAD_HIT_PRE.SW_PF",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch.",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource-related stall cycles.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x4C",
+        "EventCode": "0xA2",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "LOAD_HIT_PRE.HW_PF",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch.",
+        "EventName": "RESOURCE_STALLS.LB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Counts the cycles of stall due to lack of load buffers.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x03",
+        "EventCode": "0xA2",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Loads delayed due to SB blocks, preceding store operations with known addresses but unknown data.",
+        "UMask": "0x4",
+        "EventName": "RESOURCE_STALLS.RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load.  The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store.  See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual.  The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
-        "EventCode": "0x03",
+        "EventCode": "0xA2",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "LD_BLOCKS.STORE_FORWARD",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding.",
+        "UMask": "0x8",
+        "EventName": "RESOURCE_STALLS.SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x03",
+        "EventCode": "0xA2",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "LD_BLOCKS.NO_SR",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "UMask": "0xa",
+        "EventName": "RESOURCE_STALLS.LB_SB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to load or store buffers all being in use.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x03",
+        "EventCode": "0xA2",
+        "Counter": "0,1,2,3",
+        "UMask": "0xe",
+        "EventName": "RESOURCE_STALLS.MEM_RS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to memory buffers or Reservation Station (RS) being fully utilized.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xA2",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
-        "EventName": "LD_BLOCKS.ALL_BLOCK",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of cases where any load ends up with a valid block-code written to the load buffer (including blocks due to Memory Order Buffer (MOB), Data Cache Unit (DCU), TLB, but load has no DCU miss).",
+        "EventName": "RESOURCE_STALLS.ROB",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycles stalled due to re-order buffer full.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K.  This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline.  The enhanced address check typically has a performance penalty of 5 cycles.",
-        "EventCode": "0x07",
+        "EventCode": "0xA2",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "False dependencies in MOB due to partial compare.",
+        "UMask": "0xf0",
+        "EventName": "RESOURCE_STALLS.OOO_RSRC",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Resource stalls due to Rob being full, FCSW, MXCSR and OTHER.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x07",
+        "EventCode": "0xA3",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "LD_BLOCKS_PARTIAL.ALL_STA_BLOCK",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.",
+        "UMask": "0x1",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a MLC-miss pending demand load this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xB6",
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x2",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a miss-pending demand load this thread, increment by 1. Note this is in DCU and connected to Umask 1. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+        "CounterMask": "2",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x4",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_NO_DISPATCH",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was no dispatch for this thread, increment by 1. Note this is connect to Umask 2. No dispatch can be deduced from the UOPS_EXECUTED event.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "0,1,2,3",
+        "UMask": "0x5",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a MLC-miss pending demand load and no uops dispatched on this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0 and 2.",
+        "CounterMask": "5",
+        "CounterHTOff": "0,1,2,3"
+    },
+    {
+        "EventCode": "0xA3",
+        "Counter": "2",
+        "UMask": "0x6",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Each cycle there was a miss-pending demand load this thread and no uops dispatched, increment by 1. Note this is in DCU and connected to Umask 1 and 2. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+        "CounterMask": "6",
+        "CounterHTOff": "2"
+    },
+    {
+        "EventCode": "0xA8",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "AGU_BYPASS_CANCEL.COUNT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "This event counts executed load operations with all the following traits: 1. addressing of the format [base + offset], 2. the offset is between 1 and 2047, 3. the address specified in the base register is in one page and the address [base+offset] is in an.",
+        "EventName": "LSD.UOPS",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x3C",
+        "EventCode": "0xA8",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+        "EventName": "LSD.CYCLES_ACTIVE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x3C",
+        "EventCode": "0xA8",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "UMask": "0x1",
+        "EventName": "LSD.CYCLES_4_UOPS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
-        "CounterHTOff": "0,1,2,3"
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+        "CounterMask": "4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA1",
+        "EventCode": "0xB1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+        "EventName": "UOPS_DISPATCHED.THREAD",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per core when uops are dispatched to port 0.",
+        "BriefDescription": "Uops dispatched per thread.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA1",
+        "EventCode": "0xB1",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "AnyThread": "1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+        "EventName": "UOPS_DISPATCHED.CORE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per core when uops are dispatched to port 1.",
+        "BriefDescription": "Uops dispatched from any thread.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA1",
+        "EventCode": "0xB1",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "AnyThread": "1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per core when uops are dispatched to port 4.",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+        "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA1",
+        "EventCode": "0xB1",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "AnyThread": "1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per core when uops are dispatched to port 5.",
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+        "CounterMask": "2",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA1",
+        "EventCode": "0xB1",
         "Counter": "0,1,2,3",
-        "UMask": "0xc",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2.",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+        "CounterMask": "3",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA1",
+        "EventCode": "0xB1",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3.",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+        "CounterMask": "4",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA1",
+        "EventCode": "0xB1",
+        "Invert": "1",
         "Counter": "0,1,2,3",
-        "UMask": "0xc",
-        "AnyThread": "1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+        "UMask": "0x2",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 2.",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA1",
+        "EventCode": "0xB6",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "AnyThread": "1",
-        "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+        "UMask": "0x1",
+        "EventName": "AGU_BYPASS_CANCEL.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts executed load operations with all the following traits: 1. addressing of the format [base + offset], 2. the offset is between 1 and 2047, 3. the address specified in the base register is in one page and the address [base+offset] is in an.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xC0",
+        "Counter": "0,1,2,3",
+        "UMask": "0x0",
+        "EventName": "INST_RETIRED.ANY_P",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+        "BriefDescription": "Number of instructions retired. General Counter   - architectural event.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -941,280 +964,263 @@
         "CounterHTOff": "1"
     },
     {
-        "EventCode": "0x5B",
+        "EventCode": "0xC1",
         "Counter": "0,1,2,3",
-        "UMask": "0xf",
-        "EventName": "RESOURCE_STALLS2.ALL_PRF_CONTROL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Resource stalls2 control structures full for physical registers.",
+        "UMask": "0x2",
+        "EventName": "OTHER_ASSISTS.ITLB_MISS_RETIRED",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Retired instructions experiencing ITLB misses.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x5B",
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of micro-ops retired. (Precise Event)",
+        "EventCode": "0xC2",
         "Counter": "0,1,2,3",
-        "UMask": "0xc",
-        "EventName": "RESOURCE_STALLS2.ALL_FL_EMPTY",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.ALL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with either free list is empty.",
+        "BriefDescription": "Actually retired uops. (Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA2",
+        "EventCode": "0xC2",
+        "Invert": "1",
         "Counter": "0,1,2,3",
-        "UMask": "0xe",
-        "EventName": "RESOURCE_STALLS.MEM_RS",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Resource stalls due to memory buffers or Reservation Station (RS) being fully utilized.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0xA2",
+        "EventCode": "0xC2",
+        "Invert": "1",
         "Counter": "0,1,2,3",
-        "UMask": "0xf0",
-        "EventName": "RESOURCE_STALLS.OOO_RSRC",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Resource stalls due to Rob being full, FCSW, MXCSR and OTHER.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x5B",
-        "Counter": "0,1,2,3",
-        "UMask": "0x4f",
-        "EventName": "RESOURCE_STALLS2.OOO_RSRC",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Resource stalls out of order resources full.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0xA2",
-        "Counter": "0,1,2,3",
-        "UMask": "0xa",
-        "EventName": "RESOURCE_STALLS.LB_SB",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Resource stalls due to load or store buffers all being in use.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "CounterMask": "10",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x0D",
+        "EventCode": "0xC2",
+        "Invert": "1",
         "Counter": "0,1,2,3",
-        "UMask": "0x3",
-        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "UMask": "0x1",
+        "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+        "BriefDescription": "Cycles without actually retired uops.",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "This event counts the number of cycles spent executing performance-sensitive flags-merging uops. For example, shift CL (merge_arith_flags). For more details, See the Intel? 64 and IA-32 Architectures Optimization Reference Manual.",
-        "EventCode": "0x59",
+        "PEBS": "1",
+        "PublicDescription": "This event counts the number of retirement slots used each cycle.  There are potentially 4 slots that can be used each cycle - meaning, 4 micro-ops or 4 instructions could retire each cycle.  This event is used in determining the 'Retiring' category of the Top-Down pipeline slots characterization. (Precise Event - PEBS)",
+        "EventCode": "0xC2",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES",
+        "UMask": "0x2",
+        "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Performance sensitive flags-merging uops added by Sandy Bridge u-arch.",
-        "CounterMask": "1",
+        "BriefDescription": "Retirement slots used. (Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x0D",
+        "EventCode": "0xc3",
         "Counter": "0,1,2,3",
-        "UMask": "0x3",
+        "UMask": "0x1",
         "EdgeDetect": "1",
-        "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xE6",
+        "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear.  Machine clears can have a significant performance impact if they are happening frequently.",
+        "EventCode": "0xC3",
         "Counter": "0,1,2,3",
-        "UMask": "0x1f",
-        "EventName": "BACLEARS.ANY",
+        "UMask": "0x4",
+        "EventName": "MACHINE_CLEARS.SMC",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x88",
+        "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+        "EventCode": "0xC3",
         "Counter": "0,1,2,3",
-        "UMask": "0xff",
-        "EventName": "BR_INST_EXEC.ALL_BRANCHES",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired  branches.",
+        "UMask": "0x20",
+        "EventName": "MACHINE_CLEARS.MASKMOV",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x89",
+        "EventCode": "0xC4",
         "Counter": "0,1,2,3",
-        "UMask": "0xff",
-        "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+        "UMask": "0x0",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xC2",
-        "Invert": "1",
+        "PEBS": "1",
+        "EventCode": "0xC4",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles without actually retired uops.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3"
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xA8",
+        "PEBS": "1",
+        "EventCode": "0xC4",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.CYCLES_4_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
-        "CounterMask": "4",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xc3",
+        "PEBS": "1",
+        "EventCode": "0xC4",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EdgeDetect": "1",
-        "EventName": "MACHINE_CLEARS.COUNT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of machine clears (nukes) of any type.",
-        "CounterMask": "1",
+        "UMask": "0x2",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x5E",
-        "Invert": "1",
+        "PEBS": "2",
+        "EventCode": "0xC4",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EdgeDetect": "1",
-        "EventName": "RS_EVENTS.EMPTY_END",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0x00",
-        "Counter": "Fixed counter 2",
-        "UMask": "0x2",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
-        "CounterHTOff": "Fixed counter 2"
+        "PEBS": "1",
+        "EventCode": "0xC4",
+        "Counter": "0,1,2,3",
+        "UMask": "0x8",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Return instructions retired. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x3C",
+        "EventCode": "0xC4",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "UMask": "0x10",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Not taken branch instructions retired.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x3C",
+        "PEBS": "1",
+        "EventCode": "0xC4",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "UMask": "0x20",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x0D",
+        "EventCode": "0xC4",
         "Counter": "0,1,2,3",
-        "UMask": "0x3",
-        "AnyThread": "1",
-        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
-        "CounterMask": "1",
+        "UMask": "0x40",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Far branch instructions retired.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xB1",
+        "EventCode": "0xC5",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
-        "CounterMask": "1",
+        "UMask": "0x0",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xB1",
+        "PEBS": "1",
+        "EventCode": "0xC5",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
-        "CounterMask": "2",
+        "UMask": "0x1",
+        "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xB1",
+        "PEBS": "1",
+        "EventCode": "0xC5",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
-        "CounterMask": "3",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Direct and indirect mispredicted near call instructions retired. (Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0xB1",
+        "PEBS": "2",
+        "PublicDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+        "EventCode": "0xC5",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS).",
+        "CounterHTOff": "0,1,2,3"
     },
     {
-        "EventCode": "0xB1",
-        "Invert": "1",
+        "PEBS": "1",
+        "EventCode": "0xC5",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+        "UMask": "0x10",
+        "EventName": "BR_MISP_RETIRED.NOT_TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted not taken branch instructions retired.(Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
-        "EventCode": "0x3C",
+        "PEBS": "1",
+        "EventCode": "0xC5",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+        "UMask": "0x20",
+        "EventName": "BR_MISP_RETIRED.TAKEN",
+        "SampleAfterValue": "400009",
+        "BriefDescription": "Mispredicted taken branch instructions retired. (Precise Event - PEBS).",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x3C",
+        "EventCode": "0xCC",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "UMask": "0x20",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+        "BriefDescription": "Count cases of saving new LBR.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x3C",
+        "EventCode": "0xE6",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+        "UMask": "0x1f",
+        "EventName": "BACLEARS.ANY",
+        "SampleAfterValue": "100003",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json b/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json
index a654ab771fce7..b8eccce5d75d4 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json
@@ -1,131 +1,131 @@
 [
     {
-        "EventCode": "0xAE",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ITLB.ITLB_FLUSH",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x4F",
-        "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "EPT.WALK_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycle count for an Extended Page table walk.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x85",
+        "EventCode": "0x08",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Misses at all ITLB levels that cause page walks.",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x85",
+        "EventCode": "0x08",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+        "BriefDescription": "Load misses at all DTLB levels that cause completed page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event count cycles when Page Miss Handler (PMH) is servicing page walks caused by ITLB misses.",
-        "EventCode": "0x85",
+        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+        "EventCode": "0x08",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
-        "EventName": "ITLB_MISSES.WALK_DURATION",
+        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
         "SampleAfterValue": "2000003",
         "BriefDescription": "Cycles when PMH is busy with page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x85",
+        "PublicDescription": "This event counts load operations that miss the first DTLB level but hit the second and do not cause any page walks. The penalty in this case is approximately 7 cycles.",
+        "EventCode": "0x08",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
-        "EventName": "ITLB_MISSES.STLB_HIT",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x08",
+        "EventCode": "0x49",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Load misses in all DTLB levels that cause page walks.",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x08",
+        "EventCode": "0x49",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Load misses at all DTLB levels that cause completed page walks.",
+        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts cycles when the  page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
-        "EventCode": "0x08",
+        "EventCode": "0x49",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
-        "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
         "SampleAfterValue": "2000003",
         "BriefDescription": "Cycles when PMH is busy with page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "This event counts load operations that miss the first DTLB level but hit the second and do not cause any page walks. The penalty in this case is approximately 7 cycles.",
-        "EventCode": "0x08",
+        "EventCode": "0x49",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
-        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x49",
+        "EventCode": "0x4F",
+        "Counter": "0,1,2,3",
+        "UMask": "0x10",
+        "EventName": "EPT.WALK_CYCLES",
+        "SampleAfterValue": "2000003",
+        "BriefDescription": "Cycle count for an Extended Page table walk.  The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0x85",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Store misses in all DTLB levels that cause page walks.",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x49",
+        "EventCode": "0x85",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+        "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x49",
+        "PublicDescription": "This event count cycles when Page Miss Handler (PMH) is servicing page walks caused by ITLB misses.",
+        "EventCode": "0x85",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
-        "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+        "EventName": "ITLB_MISSES.WALK_DURATION",
         "SampleAfterValue": "2000003",
         "BriefDescription": "Cycles when PMH is busy with page walks.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "EventCode": "0x49",
+        "EventCode": "0x85",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
-        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "EventName": "ITLB_MISSES.STLB_HIT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+        "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7"
+    },
+    {
+        "EventCode": "0xAE",
+        "Counter": "0,1,2,3",
+        "UMask": "0x1",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "SampleAfterValue": "100007",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-- 
GitLab


From 2111da70ff1094ed65d15f00e6285b668f689b87 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:41:35 -0700
Subject: [PATCH 0443/1861] perf vendor events intel: Update IvyBridge events
 to v21

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/x86/ivybridge/cache.json    | 10 +++++-----
 tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json |  4 ----
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json
index 999a01bc64670..5f6cb2abc3840 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json
@@ -1012,7 +1012,7 @@
         "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address ",
+        "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -1036,7 +1036,7 @@
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data reads ",
+        "BriefDescription": "Counts all demand data reads",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -1048,7 +1048,7 @@
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand rfo's ",
+        "BriefDescription": "Counts all demand rfo's",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -1084,7 +1084,7 @@
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch prefetch RFOs ",
+        "BriefDescription": "Counts all demand & prefetch prefetch RFOs",
         "CounterHTOff": "0,1,2,3"
     },
     {
@@ -1096,7 +1096,7 @@
         "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) ",
+        "BriefDescription": "Counts all data/code/rfo references (demand & prefetch)",
         "CounterHTOff": "0,1,2,3"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json
index 0afbfd95ea306..2a0aad91d83d0 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json
@@ -1,6 +1,5 @@
 [
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 0",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
@@ -9,7 +8,6 @@
         "CounterHTOff": "Fixed counter 0"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
@@ -19,7 +17,6 @@
     },
     {
         "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "AnyThread": "1",
@@ -29,7 +26,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
-- 
GitLab


From efc351f1b55070887184549ec3b8b47a3570ae78 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:42:14 -0700
Subject: [PATCH 0444/1861] perf vendor events intel: Update Haswell events to
 v28

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/haswell/cache.json    | 175 +++++++++---------
 .../arch/x86/haswell/floating-point.json      |  33 ++--
 .../pmu-events/arch/x86/haswell/memory.json   | 172 ++++++++---------
 .../pmu-events/arch/x86/haswell/pipeline.json |  33 ++--
 4 files changed, 213 insertions(+), 200 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/haswell/cache.json b/tools/perf/pmu-events/arch/x86/haswell/cache.json
index da4d6ddd4f924..7fb0ad8d8ca1d 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/cache.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/cache.json
@@ -63,10 +63,10 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Demand data read requests that hit L2 cache.",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
+        "UMask": "0xc1",
         "Errata": "HSD78",
         "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
         "SampleAfterValue": "200003",
@@ -77,7 +77,7 @@
         "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x42",
+        "UMask": "0xc2",
         "EventName": "L2_RQSTS.RFO_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "RFO requests that hit L2 cache",
@@ -87,7 +87,7 @@
         "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x44",
+        "UMask": "0xc4",
         "EventName": "L2_RQSTS.CODE_RD_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
@@ -97,7 +97,7 @@
         "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
         "EventCode": "0x24",
         "Counter": "0,1,2,3",
-        "UMask": "0x50",
+        "UMask": "0xd0",
         "EventName": "L2_RQSTS.L2_PF_HIT",
         "SampleAfterValue": "200003",
         "BriefDescription": "L2 prefetch requests that hit L2 cache",
@@ -610,7 +610,7 @@
         "Errata": "HSD29, HSD25, HSM26, HSM30",
         "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
         "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ",
+        "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
         "CounterHTOff": "0,1,2,3",
         "Data_LA": "1"
     },
@@ -623,7 +623,7 @@
         "Errata": "HSD29, HSD25, HSM26, HSM30",
         "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
         "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ",
+        "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.",
         "CounterHTOff": "0,1,2,3",
         "Data_LA": "1"
     },
@@ -792,7 +792,6 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "",
         "EventCode": "0xf4",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
@@ -802,262 +801,262 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all requests hit in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c8fff",
+        "MSRValue": "0x3F803C8FFF",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.L3_HIT.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all requests that hit in the L3",
+        "BriefDescription": "Counts all requests hit in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c07f7",
+        "MSRValue": "0x10003C07F7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "BriefDescription": "hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c07f7",
+        "MSRValue": "0x04003C07F7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "BriefDescription": "hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0244",
+        "MSRValue": "0x04003C0244",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "BriefDescription": "Counts all demand & prefetch code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0122",
+        "MSRValue": "0x10003C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0122",
+        "MSRValue": "0x04003C0122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "BriefDescription": "Counts all demand & prefetch RFOs hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0091",
+        "MSRValue": "0x10003C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0091",
+        "MSRValue": "0x04003C0091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "BriefDescription": "Counts all demand & prefetch data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0200",
+        "MSRValue": "0x3F803C0200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads hit in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs  that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0100",
+        "MSRValue": "0x3F803C0100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  that hit in the L3",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0080",
+        "MSRValue": "0x3F803C0080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads hit in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0040",
+        "MSRValue": "0x3F803C0040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads hit in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0020",
+        "MSRValue": "0x3F803C0020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3f803c0010",
+        "MSRValue": "0x3F803C0010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0004",
+        "MSRValue": "0x10003C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "BriefDescription": "Counts all demand code reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0004",
+        "MSRValue": "0x04003C0004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "BriefDescription": "Counts all demand code reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0002",
+        "MSRValue": "0x10003C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0002",
+        "MSRValue": "0x04003C0002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10003c0001",
+        "MSRValue": "0x10003C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+        "BriefDescription": "Counts demand data reads hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04003c0001",
+        "MSRValue": "0x04003C0001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+        "BriefDescription": "Counts demand data reads hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json
index f9843e5a9b429..f5a3beaa19fc8 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json
@@ -1,22 +1,26 @@
 [
     {
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xC1",
         "Counter": "0,1,2,3",
         "UMask": "0x8",
         "Errata": "HSD56, HSM57",
         "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+        "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xC1",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
         "Errata": "HSD56, HSM57",
         "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+        "BriefDescription": "Number of transitions from legacy SSE to AVX-256 when penalty applicable",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -30,53 +34,58 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Number of X87 FP assists due to output values.",
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
         "EventName": "FP_ASSIST.X87_OUTPUT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of X87 assists due to output value.",
+        "BriefDescription": "output - Numeric Overflow, Numeric Underflow, Inexact Result",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Number of X87 FP assists due to input values.",
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x4",
         "EventName": "FP_ASSIST.X87_INPUT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of X87 assists due to input value.",
+        "BriefDescription": "input - Invalid Operation, Denormal Operand, SNaN Operand",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Number of SIMD FP assists due to output values.",
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x8",
         "EventName": "FP_ASSIST.SIMD_OUTPUT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of SIMD FP assists due to Output values",
+        "BriefDescription": "SSE* FP micro-code assist when output value is invalid.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Number of SIMD FP assists due to input values.",
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
         "EventName": "FP_ASSIST.SIMD_INPUT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of SIMD FP assists due to input values",
+        "BriefDescription": "Any input SSE* FP Assist",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xCA",
         "Counter": "0,1,2,3",
         "UMask": "0x1e",
         "EventName": "FP_ASSIST.ANY",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles with any input/output SSE or FP assist",
+        "BriefDescription": "Counts any FP_ASSIST umask was incrementing",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/haswell/memory.json b/tools/perf/pmu-events/arch/x86/haswell/memory.json
index e5f9fa6655b30..ef13ed88e2eae 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/memory.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/memory.json
@@ -298,7 +298,7 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Loads with latency value being above 4.",
+        "BriefDescription": "Randomly selected loads with latency value being above 4.",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
@@ -312,7 +312,7 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "50021",
-        "BriefDescription": "Loads with latency value being above 8.",
+        "BriefDescription": "Randomly selected loads with latency value being above 8.",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
@@ -326,7 +326,7 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "20011",
-        "BriefDescription": "Loads with latency value being above 16.",
+        "BriefDescription": "Randomly selected loads with latency value being above 16.",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
@@ -340,7 +340,7 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Loads with latency value being above 32.",
+        "BriefDescription": "Randomly selected loads with latency value being above 32.",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
@@ -354,7 +354,7 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "2003",
-        "BriefDescription": "Loads with latency value being above 64.",
+        "BriefDescription": "Randomly selected loads with latency value being above 64.",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
@@ -368,7 +368,7 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "1009",
-        "BriefDescription": "Loads with latency value being above 128.",
+        "BriefDescription": "Randomly selected loads with latency value being above 128.",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
@@ -382,7 +382,7 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "503",
-        "BriefDescription": "Loads with latency value being above 256.",
+        "BriefDescription": "Randomly selected loads with latency value being above 256.",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
@@ -396,280 +396,280 @@
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
         "MSRIndex": "0x3F6",
         "SampleAfterValue": "101",
-        "BriefDescription": "Loads with latency value being above 512.",
+        "BriefDescription": "Randomly selected loads with latency value being above 512.",
         "TakenAlone": "1",
         "CounterHTOff": "3"
     },
     {
-        "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all requests miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc08fff",
+        "MSRValue": "0x3FFFC08FFF",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all requests that miss in the L3",
+        "BriefDescription": "Counts all requests miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "miss the L3 and the data is returned from local dram",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01004007f7",
+        "MSRValue": "0x01004007F7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.LOCAL_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "miss the L3 and the data is returned from local dram",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc007f7",
+        "MSRValue": "0x3FFFC007F7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
+        "BriefDescription": "miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram",
         "EventCode": "0xB7, 0xBB",
         "MSRValue": "0x0100400244",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_MISS.LOCAL_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch code reads miss the L3 and the data is returned from local dram",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch code reads miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00244",
+        "MSRValue": "0x3FFFC00244",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
+        "BriefDescription": "Counts all demand & prefetch code reads miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram",
         "EventCode": "0xB7, 0xBB",
         "MSRValue": "0x0100400122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.LOCAL_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch RFOs miss the L3 and the data is returned from local dram",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch RFOs miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00122",
+        "MSRValue": "0x3FFFC00122",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
+        "BriefDescription": "Counts all demand & prefetch RFOs miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram",
         "EventCode": "0xB7, 0xBB",
         "MSRValue": "0x0100400091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.LOCAL_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand & prefetch data reads miss the L3 and the data is returned from local dram",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand & prefetch data reads miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00091",
+        "MSRValue": "0x3FFFC00091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
+        "BriefDescription": "Counts all demand & prefetch data reads miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00200",
+        "MSRValue": "0x3FFFC00200",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
+        "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs  that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00100",
+        "MSRValue": "0x3FFFC00100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs  that miss in the L3",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00080",
+        "MSRValue": "0x3FFFC00080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00040",
+        "MSRValue": "0x3FFFC00040",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3",
+        "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00020",
+        "MSRValue": "0x3FFFC00020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3",
+        "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts prefetch (that bring data to L2) data reads miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00010",
+        "MSRValue": "0x3FFFC00010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3",
+        "BriefDescription": "Counts prefetch (that bring data to L2) data reads miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads miss the L3 and the data is returned from local dram",
         "EventCode": "0xB7, 0xBB",
         "MSRValue": "0x0100400004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.LOCAL_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand code reads miss the L3 and the data is returned from local dram",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand code reads miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00004",
+        "MSRValue": "0x3FFFC00004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand code reads that miss in the L3",
+        "BriefDescription": "Counts all demand code reads miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) miss the L3 and the data is returned from local dram",
         "EventCode": "0xB7, 0xBB",
         "MSRValue": "0x0100400002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.LOCAL_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts all demand data writes (RFOs) miss the L3 and the data is returned from local dram",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts all demand data writes (RFOs) miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00002",
+        "MSRValue": "0x3FFFC00002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
+        "BriefDescription": "Counts all demand data writes (RFOs) miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads miss the L3 and the data is returned from local dram",
         "EventCode": "0xB7, 0xBB",
         "MSRValue": "0x0100400001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.LOCAL_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram",
+        "BriefDescription": "Counts demand data reads miss the L3 and the data is returned from local dram",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
-        "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+        "PublicDescription": "Counts demand data reads miss in the L3",
         "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3fffc00001",
+        "MSRValue": "0x3FFFC00001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
+        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads that miss in the L3",
+        "BriefDescription": "Counts demand data reads miss in the L3",
         "Offcore": "1",
         "CounterHTOff": "0,1,2,3"
     }
diff --git a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json
index a4dcfce4a512f..734d3873729e8 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json
@@ -1,7 +1,6 @@
 [
     {
         "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 0",
         "UMask": "0x1",
         "Errata": "HSD140, HSD143",
@@ -12,7 +11,6 @@
     },
     {
         "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
@@ -21,7 +19,6 @@
         "CounterHTOff": "Fixed counter 1"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "AnyThread": "1",
@@ -32,7 +29,6 @@
     },
     {
         "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
@@ -1071,7 +1067,8 @@
         "CounterHTOff": "1"
     },
     {
-        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+        "PEBS": "1",
+        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
         "EventCode": "0xC0",
         "Counter": "0,1,2,3",
         "UMask": "0x2",
@@ -1081,13 +1078,13 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.",
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xC1",
         "Counter": "0,1,2,3",
         "UMask": "0x40",
         "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -1102,28 +1099,34 @@
         "Data_LA": "1"
     },
     {
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xC2",
         "Invert": "1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "UOPS_RETIRED.STALL_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles without actually retired uops.",
+        "BriefDescription": "Cycles no executable uops retired",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3"
     },
     {
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xC2",
         "Invert": "1",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 10 actually retired uops.",
+        "BriefDescription": "Number of cycles using always true condition applied to  PEBS uops retired event.",
         "CounterMask": "10",
         "CounterHTOff": "0,1,2,3"
     },
     {
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xC2",
         "Invert": "1",
         "Counter": "0,1,2,3",
@@ -1131,7 +1134,7 @@
         "AnyThread": "1",
         "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles without actually retired uops.",
+        "BriefDescription": "Cycles no executable uops retired on core",
         "CounterMask": "1",
         "CounterHTOff": "0,1,2,3"
     },
@@ -1245,13 +1248,14 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Counts the number of not taken branch instructions retired.",
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xC4",
         "Counter": "0,1,2,3",
         "UMask": "0x10",
         "EventName": "BR_INST_RETIRED.NOT_TAKEN",
         "SampleAfterValue": "400009",
-        "BriefDescription": "Not taken branch instructions retired.",
+        "BriefDescription": "Counts all not taken macro branch instructions retired.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
@@ -1265,13 +1269,14 @@
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-        "PublicDescription": "Number of far branches retired.",
+        "PEBS": "1",
+        "PublicDescription": "",
         "EventCode": "0xC4",
         "Counter": "0,1,2,3",
         "UMask": "0x40",
         "EventName": "BR_INST_RETIRED.FAR_BRANCH",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Far branch instructions retired.",
+        "BriefDescription": "Counts the number of far branch instructions retired.",
         "CounterHTOff": "0,1,2,3,4,5,6,7"
     },
     {
-- 
GitLab


From 643e72255e35f49ac1a2e1ca409a8ccd5b4598b6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 08:43:04 -0700
Subject: [PATCH 0445/1861] perf vendor events intel: Update KnightsLanding
 events to v9

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/x86/knightslanding/cache.json        | 666 +++++++++---------
 .../arch/x86/knightslanding/memory.json       | 268 +++----
 .../arch/x86/knightslanding/pipeline.json     |  15 +-
 .../x86/knightslanding/virtual-memory.json    |   2 +-
 4 files changed, 474 insertions(+), 477 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/cache.json b/tools/perf/pmu-events/arch/x86/knightslanding/cache.json
index e434ec7230018..e847b0fd696df 100644
--- a/tools/perf/pmu-events/arch/x86/knightslanding/cache.json
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/cache.json
@@ -32,16 +32,16 @@
         "BriefDescription": "Counts the number of L2 cache misses"
     },
     {
-        "PublicDescription": "This event counts the number of core cycles the fetch stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses. ",
+        "PublicDescription": "This event counts the number of core cycles the fetch stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses.",
         "EventCode": "0x86",
         "Counter": "0,1",
         "UMask": "0x4",
         "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of core cycles the fetch stalls because of an icache miss. This is a cummulative count of core cycles the fetch stalled for all icache misses. "
+        "BriefDescription": "Counts the number of core cycles the fetch stalls because of an icache miss. This is a cummulative count of core cycles the fetch stalled for all icache misses."
     },
     {
-        "PublicDescription": "This event counts the number of load micro-ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted. ",
+        "PublicDescription": "This event counts the number of load micro-ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted.",
         "EventCode": "0x04",
         "Counter": "0,1",
         "UMask": "0x1",
@@ -115,29 +115,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000070 ",
+        "MSRValue": "0x4000000070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts any Prefetch requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400070 ",
+        "MSRValue": "0x1000400070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400070 ",
+        "MSRValue": "0x0800400070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE_E_F",
@@ -148,29 +148,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080070 ",
+        "MSRValue": "0x1000080070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080070 ",
+        "MSRValue": "0x0800080070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010070 ",
+        "MSRValue": "0x0000010070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.ANY_RESPONSE",
@@ -181,29 +181,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x40000032f7 ",
+        "MSRValue": "0x40000032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts any Read request  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x10004032f7 ",
+        "MSRValue": "0x10004032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x08004032f7 ",
+        "MSRValue": "0x08004032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE_E_F",
@@ -214,29 +214,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x10000832f7 ",
+        "MSRValue": "0x10000832f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x08000832f7 ",
+        "MSRValue": "0x08000832f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts any Read request  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x00000132f7 ",
+        "MSRValue": "0x00000132f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
@@ -247,29 +247,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000044 ",
+        "MSRValue": "0x4000000044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400044 ",
+        "MSRValue": "0x1000400044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400044 ",
+        "MSRValue": "0x0800400044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE_E_F",
@@ -280,29 +280,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080044 ",
+        "MSRValue": "0x1000080044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080044 ",
+        "MSRValue": "0x0800080044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010044 ",
+        "MSRValue": "0x0000010044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.ANY_RESPONSE",
@@ -313,29 +313,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000022 ",
+        "MSRValue": "0x4000000022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400022 ",
+        "MSRValue": "0x1000400022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400022 ",
+        "MSRValue": "0x0800400022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE_E_F",
@@ -346,29 +346,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080022 ",
+        "MSRValue": "0x1000080022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080022 ",
+        "MSRValue": "0x0800080022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010022 ",
+        "MSRValue": "0x0000010022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
@@ -379,29 +379,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000003091 ",
+        "MSRValue": "0x4000003091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000403091 ",
+        "MSRValue": "0x1000403091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800403091 ",
+        "MSRValue": "0x0800403091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE_E_F",
@@ -412,29 +412,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000083091 ",
+        "MSRValue": "0x1000083091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800083091 ",
+        "MSRValue": "0x0800083091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000013091 ",
+        "MSRValue": "0x0000013091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
@@ -445,29 +445,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000008000 ",
+        "MSRValue": "0x4000008000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts any request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000408000 ",
+        "MSRValue": "0x1000408000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800408000 ",
+        "MSRValue": "0x0800408000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE_E_F",
@@ -478,29 +478,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000088000 ",
+        "MSRValue": "0x1000088000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800088000 ",
+        "MSRValue": "0x0800088000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000018000 ",
+        "MSRValue": "0x0000018000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
@@ -511,7 +511,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000014800 ",
+        "MSRValue": "0x0000014800",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
@@ -522,7 +522,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000014000 ",
+        "MSRValue": "0x0000014000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE",
@@ -533,29 +533,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000002000 ",
+        "MSRValue": "0x4000002000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts L1 data HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000402000 ",
+        "MSRValue": "0x1000402000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800402000 ",
+        "MSRValue": "0x0800402000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE_E_F",
@@ -566,29 +566,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000082000 ",
+        "MSRValue": "0x1000082000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800082000 ",
+        "MSRValue": "0x0800082000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000012000 ",
+        "MSRValue": "0x0000012000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
@@ -599,29 +599,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000001000 ",
+        "MSRValue": "0x4000001000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts Software Prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000401000 ",
+        "MSRValue": "0x1000401000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800401000 ",
+        "MSRValue": "0x0800401000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE_E_F",
@@ -632,29 +632,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000081000 ",
+        "MSRValue": "0x1000081000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800081000 ",
+        "MSRValue": "0x0800081000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000011000 ",
+        "MSRValue": "0x0000011000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.ANY_RESPONSE",
@@ -665,7 +665,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010800 ",
+        "MSRValue": "0x0000010800",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
@@ -676,29 +676,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000400 ",
+        "MSRValue": "0x4000000400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts Bus locks and split lock requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400400 ",
+        "MSRValue": "0x1000400400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400400 ",
+        "MSRValue": "0x0800400400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE_E_F",
@@ -709,29 +709,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080400 ",
+        "MSRValue": "0x1000080400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080400 ",
+        "MSRValue": "0x0800080400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010400 ",
+        "MSRValue": "0x0000010400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
@@ -742,29 +742,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000200 ",
+        "MSRValue": "0x4000000200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400200 ",
+        "MSRValue": "0x1000400200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400200 ",
+        "MSRValue": "0x0800400200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_FAR_TILE_E_F",
@@ -775,29 +775,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080200 ",
+        "MSRValue": "0x1000080200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080200 ",
+        "MSRValue": "0x0800080200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010200 ",
+        "MSRValue": "0x0000010200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.ANY_RESPONSE",
@@ -808,18 +808,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400100 ",
+        "MSRValue": "0x1000400100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400100 ",
+        "MSRValue": "0x0800400100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE_E_F",
@@ -830,29 +830,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080100 ",
+        "MSRValue": "0x1000080100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080100 ",
+        "MSRValue": "0x0800080100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010100 ",
+        "MSRValue": "0x0000010100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE",
@@ -863,29 +863,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000080 ",
+        "MSRValue": "0x4000000080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400080 ",
+        "MSRValue": "0x1000400080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400080 ",
+        "MSRValue": "0x0800400080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE_E_F",
@@ -896,29 +896,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080080 ",
+        "MSRValue": "0x1000080080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080080 ",
+        "MSRValue": "0x0800080080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010080 ",
+        "MSRValue": "0x0000010080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE",
@@ -929,29 +929,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000040 ",
+        "MSRValue": "0x4000000040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 code HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts L2 code HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400040 ",
+        "MSRValue": "0x1000400040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400040 ",
+        "MSRValue": "0x0800400040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE_E_F",
@@ -962,29 +962,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080040 ",
+        "MSRValue": "0x1000080040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080040 ",
+        "MSRValue": "0x0800080040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010040 ",
+        "MSRValue": "0x0000010040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.ANY_RESPONSE",
@@ -995,18 +995,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400020 ",
+        "MSRValue": "0x1000400020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400020 ",
+        "MSRValue": "0x0800400020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_FAR_TILE_E_F",
@@ -1017,29 +1017,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080020 ",
+        "MSRValue": "0x1000080020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080020 ",
+        "MSRValue": "0x0800080020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000020020 ",
+        "MSRValue": "0x0000020020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE",
@@ -1050,7 +1050,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010020 ",
+        "MSRValue": "0x0000010020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
@@ -1061,29 +1061,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000004 ",
+        "MSRValue": "0x4000000004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400004 ",
+        "MSRValue": "0x1000400004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400004 ",
+        "MSRValue": "0x0800400004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE_E_F",
@@ -1094,29 +1094,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080004 ",
+        "MSRValue": "0x1000080004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080004 ",
+        "MSRValue": "0x0800080004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010004 ",
+        "MSRValue": "0x0000010004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
@@ -1127,29 +1127,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000002 ",
+        "MSRValue": "0x4000000002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts Demand cacheable data writes that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400002 ",
+        "MSRValue": "0x1000400002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400002 ",
+        "MSRValue": "0x0800400002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE_E_F",
@@ -1160,29 +1160,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080002 ",
+        "MSRValue": "0x1000080002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080002 ",
+        "MSRValue": "0x0800080002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010002 ",
+        "MSRValue": "0x0000010002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
@@ -1193,29 +1193,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000001 ",
+        "MSRValue": "0x4000000001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The outstanding response should be programmed only on PMC0. ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000400001 ",
+        "MSRValue": "0x1000400001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_FAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800400001 ",
+        "MSRValue": "0x0800400001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_FAR_TILE_E_F",
@@ -1226,29 +1226,29 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1000080001 ",
+        "MSRValue": "0x1000080001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_NEAR_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0800080001 ",
+        "MSRValue": "0x0800080001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_NEAR_TILE_E_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010001 ",
+        "MSRValue": "0x0000010001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
@@ -1259,722 +1259,722 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000001 ",
+        "MSRValue": "0x0002000001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000002 ",
+        "MSRValue": "0x0002000002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000004 ",
+        "MSRValue": "0x0002000004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000020 ",
+        "MSRValue": "0x0002000020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000080 ",
+        "MSRValue": "0x0002000080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000100 ",
+        "MSRValue": "0x0002000100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000200 ",
+        "MSRValue": "0x0002000200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000400 ",
+        "MSRValue": "0x0002000400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002001000 ",
+        "MSRValue": "0x0002001000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002002000 ",
+        "MSRValue": "0x0002002000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002008000 ",
+        "MSRValue": "0x0002008000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002003091 ",
+        "MSRValue": "0x0002003091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000022 ",
+        "MSRValue": "0x0002000022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000044 ",
+        "MSRValue": "0x0002000044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x00020032f7 ",
+        "MSRValue": "0x00020032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0002000070 ",
+        "MSRValue": "0x0002000070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_M",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in M state ",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in M state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000001 ",
+        "MSRValue": "0x0004000001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000002 ",
+        "MSRValue": "0x0004000002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000004 ",
+        "MSRValue": "0x0004000004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000020 ",
+        "MSRValue": "0x0004000020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000040 ",
+        "MSRValue": "0x0004000040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000080 ",
+        "MSRValue": "0x0004000080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000100 ",
+        "MSRValue": "0x0004000100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000200 ",
+        "MSRValue": "0x0004000200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000400 ",
+        "MSRValue": "0x0004000400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004001000 ",
+        "MSRValue": "0x0004001000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004002000 ",
+        "MSRValue": "0x0004002000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004008000 ",
+        "MSRValue": "0x0004008000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004003091 ",
+        "MSRValue": "0x0004003091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000022 ",
+        "MSRValue": "0x0004000022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000044 ",
+        "MSRValue": "0x0004000044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x00040032f7 ",
+        "MSRValue": "0x00040032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0004000070 ",
+        "MSRValue": "0x0004000070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_E",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in E state ",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in E state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000001 ",
+        "MSRValue": "0x0008000001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000002 ",
+        "MSRValue": "0x0008000002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000004 ",
+        "MSRValue": "0x0008000004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000020 ",
+        "MSRValue": "0x0008000020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000080 ",
+        "MSRValue": "0x0008000080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000100 ",
+        "MSRValue": "0x0008000100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000200 ",
+        "MSRValue": "0x0008000200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000400 ",
+        "MSRValue": "0x0008000400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008001000 ",
+        "MSRValue": "0x0008001000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008002000 ",
+        "MSRValue": "0x0008002000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008008000 ",
+        "MSRValue": "0x0008008000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008003091 ",
+        "MSRValue": "0x0008003091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000022 ",
+        "MSRValue": "0x0008000022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0008000044 ",
+        "MSRValue": "0x0008000044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x00080032f7 ",
+        "MSRValue": "0x00080032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_S",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in S state ",
+        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in S state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000001 ",
+        "MSRValue": "0x0010000001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000002 ",
+        "MSRValue": "0x0010000002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000004 ",
+        "MSRValue": "0x0010000004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000020 ",
+        "MSRValue": "0x0010000020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000040 ",
+        "MSRValue": "0x0010000040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000080 ",
+        "MSRValue": "0x0010000080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000100 ",
+        "MSRValue": "0x0010000100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000200 ",
+        "MSRValue": "0x0010000200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000400 ",
+        "MSRValue": "0x0010000400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010001000 ",
+        "MSRValue": "0x0010001000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010002000 ",
+        "MSRValue": "0x0010002000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010008000 ",
+        "MSRValue": "0x0010008000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010003091 ",
+        "MSRValue": "0x0010003091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000022 ",
+        "MSRValue": "0x0010000022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000044 ",
+        "MSRValue": "0x0010000044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x00100032f7 ",
+        "MSRValue": "0x00100032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts any Read request  that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0010000070 ",
+        "MSRValue": "0x0010000070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_F",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in F state ",
+        "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in F state",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180002 ",
+        "MSRValue": "0x1800180002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE",
@@ -1985,7 +1985,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180004 ",
+        "MSRValue": "0x1800180004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE",
@@ -1996,7 +1996,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180020 ",
+        "MSRValue": "0x1800180020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE",
@@ -2007,7 +2007,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180040 ",
+        "MSRValue": "0x1800180040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE",
@@ -2018,7 +2018,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180080 ",
+        "MSRValue": "0x1800180080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE",
@@ -2029,7 +2029,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180100 ",
+        "MSRValue": "0x1800180100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE",
@@ -2040,7 +2040,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180200 ",
+        "MSRValue": "0x1800180200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE",
@@ -2051,7 +2051,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180400 ",
+        "MSRValue": "0x1800180400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE",
@@ -2062,7 +2062,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800181000 ",
+        "MSRValue": "0x1800181000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE",
@@ -2073,7 +2073,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800182000 ",
+        "MSRValue": "0x1800182000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE",
@@ -2084,7 +2084,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800188000 ",
+        "MSRValue": "0x1800188000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE",
@@ -2095,7 +2095,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800183091 ",
+        "MSRValue": "0x1800183091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE",
@@ -2106,7 +2106,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180022 ",
+        "MSRValue": "0x1800180022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE",
@@ -2117,7 +2117,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180044 ",
+        "MSRValue": "0x1800180044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE",
@@ -2128,7 +2128,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x18001832f7 ",
+        "MSRValue": "0x18001832f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE",
@@ -2139,7 +2139,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800180070 ",
+        "MSRValue": "0x1800180070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE",
@@ -2150,7 +2150,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400002 ",
+        "MSRValue": "0x1800400002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE",
@@ -2161,7 +2161,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400004 ",
+        "MSRValue": "0x1800400004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE",
@@ -2172,7 +2172,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400040 ",
+        "MSRValue": "0x1800400040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE",
@@ -2183,7 +2183,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400080 ",
+        "MSRValue": "0x1800400080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE",
@@ -2194,7 +2194,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400100 ",
+        "MSRValue": "0x1800400100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE",
@@ -2205,7 +2205,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400400 ",
+        "MSRValue": "0x1800400400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE",
@@ -2216,7 +2216,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800401000 ",
+        "MSRValue": "0x1800401000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE",
@@ -2227,7 +2227,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800402000 ",
+        "MSRValue": "0x1800402000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE",
@@ -2238,7 +2238,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800408000 ",
+        "MSRValue": "0x1800408000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE",
@@ -2249,7 +2249,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800403091 ",
+        "MSRValue": "0x1800403091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE",
@@ -2260,7 +2260,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400022 ",
+        "MSRValue": "0x1800400022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE",
@@ -2271,7 +2271,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400044 ",
+        "MSRValue": "0x1800400044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE",
@@ -2282,7 +2282,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x18004032f7 ",
+        "MSRValue": "0x18004032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE",
@@ -2293,7 +2293,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x1800400070 ",
+        "MSRValue": "0x1800400070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE",
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/memory.json b/tools/perf/pmu-events/arch/x86/knightslanding/memory.json
index 7006525662007..c6bb16ba0f865 100644
--- a/tools/perf/pmu-events/arch/x86/knightslanding/memory.json
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/memory.json
@@ -9,18 +9,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400070 ",
+        "MSRValue": "0x0100400070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts any Prefetch requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200070 ",
+        "MSRValue": "0x0080200070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM_NEAR",
@@ -31,18 +31,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000070 ",
+        "MSRValue": "0x0101000070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Prefetch requests that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts any Prefetch requests that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800070 ",
+        "MSRValue": "0x0080800070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.DDR_NEAR",
@@ -53,18 +53,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x01004032f7 ",
+        "MSRValue": "0x01004032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts any Read request  that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x00802032f7 ",
+        "MSRValue": "0x00802032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM_NEAR",
@@ -75,18 +75,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x01010032f7 ",
+        "MSRValue": "0x01010032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any Read request  that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts any Read request  that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x00808032f7 ",
+        "MSRValue": "0x00808032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR_NEAR",
@@ -97,18 +97,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400044 ",
+        "MSRValue": "0x0100400044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200044 ",
+        "MSRValue": "0x0080200044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM_NEAR",
@@ -119,18 +119,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000044 ",
+        "MSRValue": "0x0101000044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts Demand code reads and prefetch code read requests  that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800044 ",
+        "MSRValue": "0x0080800044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR_NEAR",
@@ -141,18 +141,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400022 ",
+        "MSRValue": "0x0100400022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200022 ",
+        "MSRValue": "0x0080200022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM_NEAR",
@@ -163,18 +163,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000022 ",
+        "MSRValue": "0x0101000022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts Demand cacheable data write requests  that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800022 ",
+        "MSRValue": "0x0080800022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR_NEAR",
@@ -185,18 +185,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100403091 ",
+        "MSRValue": "0x0100403091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080203091 ",
+        "MSRValue": "0x0080203091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM_NEAR",
@@ -207,18 +207,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101003091 ",
+        "MSRValue": "0x0101003091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests  that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080803091 ",
+        "MSRValue": "0x0080803091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR_NEAR",
@@ -229,18 +229,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100408000 ",
+        "MSRValue": "0x0100408000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts any request that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080208000 ",
+        "MSRValue": "0x0080208000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM_NEAR",
@@ -251,18 +251,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101008000 ",
+        "MSRValue": "0x0101008000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any request that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts any request that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080808000 ",
+        "MSRValue": "0x0080808000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR_NEAR",
@@ -273,18 +273,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100402000 ",
+        "MSRValue": "0x0100402000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080202000 ",
+        "MSRValue": "0x0080202000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.MCDRAM_NEAR",
@@ -295,18 +295,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101002000 ",
+        "MSRValue": "0x0101002000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080802000 ",
+        "MSRValue": "0x0080802000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR_NEAR",
@@ -317,18 +317,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100401000 ",
+        "MSRValue": "0x0100401000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts Software Prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080201000 ",
+        "MSRValue": "0x0080201000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM_NEAR",
@@ -339,18 +339,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101001000 ",
+        "MSRValue": "0x0101001000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Software Prefetches that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts Software Prefetches that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080801000 ",
+        "MSRValue": "0x0080801000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR_NEAR",
@@ -361,18 +361,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400400 ",
+        "MSRValue": "0x0100400400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200400 ",
+        "MSRValue": "0x0080200400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM_NEAR",
@@ -383,18 +383,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000400 ",
+        "MSRValue": "0x0101000400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800400 ",
+        "MSRValue": "0x0080800400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR_NEAR",
@@ -405,18 +405,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400200 ",
+        "MSRValue": "0x0100400200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200200 ",
+        "MSRValue": "0x0080200200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM_NEAR",
@@ -427,18 +427,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000200 ",
+        "MSRValue": "0x0101000200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts UC code reads (valid only for Outstanding response type)  that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800200 ",
+        "MSRValue": "0x0080800200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR_NEAR",
@@ -449,18 +449,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400100 ",
+        "MSRValue": "0x0100400100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM_FAR",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200100 ",
+        "MSRValue": "0x0080200100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM_NEAR",
@@ -471,18 +471,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000100 ",
+        "MSRValue": "0x0101000100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.DDR_FAR",
         "MSRIndex": "0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800100 ",
+        "MSRValue": "0x0080800100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.DDR_NEAR",
@@ -493,7 +493,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x2000020080 ",
+        "MSRValue": "0x2000020080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.NON_DRAM",
@@ -504,18 +504,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400080 ",
+        "MSRValue": "0x0100400080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200080 ",
+        "MSRValue": "0x0080200080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM_NEAR",
@@ -526,18 +526,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000080 ",
+        "MSRValue": "0x0101000080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type).  that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800080 ",
+        "MSRValue": "0x0080800080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR_NEAR",
@@ -548,18 +548,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400040 ",
+        "MSRValue": "0x0100400040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200040 ",
+        "MSRValue": "0x0080200040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.MCDRAM_NEAR",
@@ -570,18 +570,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000040 ",
+        "MSRValue": "0x0101000040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800040 ",
+        "MSRValue": "0x0080800040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR_NEAR",
@@ -592,7 +592,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x2000020020 ",
+        "MSRValue": "0x2000020020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.NON_DRAM",
@@ -603,18 +603,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400020 ",
+        "MSRValue": "0x0100400020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200020 ",
+        "MSRValue": "0x0080200020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM_NEAR",
@@ -625,18 +625,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000020 ",
+        "MSRValue": "0x0101000020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800020 ",
+        "MSRValue": "0x0080800020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR_NEAR",
@@ -647,18 +647,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400004 ",
+        "MSRValue": "0x0100400004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200004 ",
+        "MSRValue": "0x0080200004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM_NEAR",
@@ -669,18 +669,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000004 ",
+        "MSRValue": "0x0101000004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800004 ",
+        "MSRValue": "0x0080800004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR_NEAR",
@@ -691,18 +691,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400002 ",
+        "MSRValue": "0x0100400002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200002 ",
+        "MSRValue": "0x0080200002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM_NEAR",
@@ -713,18 +713,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000002 ",
+        "MSRValue": "0x0101000002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800002 ",
+        "MSRValue": "0x0080800002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR_NEAR",
@@ -735,18 +735,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0100400001 ",
+        "MSRValue": "0x0100400001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080200001 ",
+        "MSRValue": "0x0080200001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM_NEAR",
@@ -757,18 +757,18 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0101000001 ",
+        "MSRValue": "0x0101000001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR_FAR",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from DRAM Far. ",
+        "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from DRAM Far.",
         "Offcore": "1"
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0080800001 ",
+        "MSRValue": "0x0080800001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR_NEAR",
@@ -779,7 +779,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600001 ",
+        "MSRValue": "0x0180600001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM",
@@ -790,7 +790,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600002 ",
+        "MSRValue": "0x0180600002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM",
@@ -801,7 +801,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600004 ",
+        "MSRValue": "0x0180600004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM",
@@ -812,7 +812,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600020 ",
+        "MSRValue": "0x0180600020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM",
@@ -823,7 +823,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600080 ",
+        "MSRValue": "0x0180600080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM",
@@ -834,7 +834,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600100 ",
+        "MSRValue": "0x0180600100",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM",
@@ -845,7 +845,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600200 ",
+        "MSRValue": "0x0180600200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM",
@@ -856,7 +856,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600400 ",
+        "MSRValue": "0x0180600400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM",
@@ -867,7 +867,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180601000 ",
+        "MSRValue": "0x0180601000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM",
@@ -878,7 +878,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180608000 ",
+        "MSRValue": "0x0180608000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM",
@@ -889,7 +889,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180603091 ",
+        "MSRValue": "0x0180603091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM",
@@ -900,7 +900,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600022 ",
+        "MSRValue": "0x0180600022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM",
@@ -911,7 +911,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600044 ",
+        "MSRValue": "0x0180600044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM",
@@ -922,7 +922,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x01806032f7 ",
+        "MSRValue": "0x01806032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM",
@@ -933,7 +933,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0180600070 ",
+        "MSRValue": "0x0180600070",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM",
@@ -944,7 +944,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800001 ",
+        "MSRValue": "0x0181800001",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR",
@@ -955,7 +955,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800002 ",
+        "MSRValue": "0x0181800002",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR",
@@ -966,7 +966,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800004 ",
+        "MSRValue": "0x0181800004",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR",
@@ -977,7 +977,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800020 ",
+        "MSRValue": "0x0181800020",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR",
@@ -988,7 +988,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800040 ",
+        "MSRValue": "0x0181800040",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR",
@@ -999,7 +999,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800080 ",
+        "MSRValue": "0x0181800080",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR",
@@ -1010,7 +1010,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800200 ",
+        "MSRValue": "0x0181800200",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR",
@@ -1021,7 +1021,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800400 ",
+        "MSRValue": "0x0181800400",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR",
@@ -1032,7 +1032,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181801000 ",
+        "MSRValue": "0x0181801000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR",
@@ -1043,7 +1043,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181802000 ",
+        "MSRValue": "0x0181802000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR",
@@ -1054,7 +1054,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181808000 ",
+        "MSRValue": "0x0181808000",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR",
@@ -1065,7 +1065,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181803091 ",
+        "MSRValue": "0x0181803091",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR",
@@ -1076,7 +1076,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800022 ",
+        "MSRValue": "0x0181800022",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR",
@@ -1087,7 +1087,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x0181800044 ",
+        "MSRValue": "0x0181800044",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR",
@@ -1098,7 +1098,7 @@
     },
     {
         "EventCode": "0xB7",
-        "MSRValue": "0x01818032f7 ",
+        "MSRValue": "0x01818032f7",
         "Counter": "0,1",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR",
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json b/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json
index bb5494cfb5aed..92e4ef2e22c62 100644
--- a/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json
@@ -144,7 +144,7 @@
         "BriefDescription": "Counts the number of micro-ops retired that are from the complex flows issued by the micro-sequencer (MS)."
     },
     {
-        "PublicDescription": "This event counts the number of micro-ops (uops) retired. The processor decodes complex macro instructions into a sequence of simpler uops. Most instructions are composed of one or two uops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. ",
+        "PublicDescription": "This event counts the number of micro-ops (uops) retired. The processor decodes complex macro instructions into a sequence of simpler uops. Most instructions are composed of one or two uops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists.",
         "EventCode": "0xC2",
         "Counter": "0,1",
         "UMask": "0x10",
@@ -218,7 +218,7 @@
         "UMask": "0x20",
         "EventName": "NO_ALLOC_CYCLES.RAT_STALL",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and a RATstall (caused by reservation station full) is asserted.  "
+        "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and a RATstall (caused by reservation station full) is asserted."
     },
     {
         "PublicDescription": "This event counts the number of core cycles when no uops are allocated, the instruction queue is empty and the alloc pipe is stalled waiting for instructions to be fetched.",
@@ -251,7 +251,7 @@
         "UMask": "0x1f",
         "EventName": "RS_FULL_STALL.ALL",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the total number of core cycles the Alloc pipeline is stalled when any one of the reservation stations is full. "
+        "BriefDescription": "Counts the total number of core cycles the Alloc pipeline is stalled when any one of the reservation stations is full."
     },
     {
         "EventCode": "0xC0",
@@ -268,11 +268,10 @@
         "UMask": "0x1",
         "EventName": "CYCLES_DIV_BUSY.ALL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles the number of core cycles when divider is busy.  Does not imply a stall waiting for the divider.  "
+        "BriefDescription": "Cycles the number of core cycles when divider is busy.  Does not imply a stall waiting for the divider."
     },
     {
         "PublicDescription": "This event counts the number of instructions that retire.  For instructions that consist of multiple micro-ops, this event counts exactly once, as the last micro-op of the instruction retires.  The event continues counting while instructions retire, including during interrupt service routines caused by hardware interrupts, faults or traps.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
@@ -296,8 +295,7 @@
         "BriefDescription": "Counts the number of unhalted reference clock cycles"
     },
     {
-        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter\r\n",
-        "EventCode": "0x00",
+        "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter",
         "Counter": "Fixed counter 2",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
@@ -305,7 +303,6 @@
         "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles"
     },
     {
-        "EventCode": "0x00",
         "Counter": "Fixed counter 3",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
@@ -343,7 +340,7 @@
         "UMask": "0x1",
         "EventName": "RECYCLEQ.LD_BLOCK_ST_FORWARD",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of occurences a retired load gets blocked because its address partially overlaps with a store ",
+        "BriefDescription": "Counts the number of occurences a retired load gets blocked because its address partially overlaps with a store",
         "Data_LA": "1"
     },
     {
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json b/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json
index f31594507f8c6..9e493977771f1 100644
--- a/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json
@@ -36,7 +36,7 @@
         "EdgeDetect": "1"
     },
     {
-        "PublicDescription": "This event counts every cycle when an I-side (walks due to an instruction fetch) page walk is in progress. ",
+        "PublicDescription": "This event counts every cycle when an I-side (walks due to an instruction fetch) page walk is in progress.",
         "EventCode": "0x05",
         "Counter": "0,1",
         "UMask": "0x2",
-- 
GitLab


From b1580f542ca7d4bec387559b05e62b1b2f5f08d2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 14:55:07 -0700
Subject: [PATCH 0446/1861] perf vendor events intel: Update Bonnell to V4

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/x86/bonnell/frontend.json | 2 +-
 tools/perf/pmu-events/arch/x86/bonnell/pipeline.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/bonnell/frontend.json b/tools/perf/pmu-events/arch/x86/bonnell/frontend.json
index 935b7dcf067d2..ef69540ab61db 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/frontend.json
@@ -77,7 +77,7 @@
         "UMask": "0x1",
         "EventName": "UOPS.MS_CYCLES",
         "SampleAfterValue": "2000000",
-        "BriefDescription": "This event counts the cycles where 1 or more uops are issued by the micro-sequencer (MS), including microcode assists and inserted flows, and written to the IQ. ",
+        "BriefDescription": "This event counts the cycles where 1 or more uops are issued by the micro-sequencer (MS), including microcode assists and inserted flows, and written to the IQ.",
         "CounterMask": "1"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json b/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json
index b2e681c78466b..09c6de13de20d 100644
--- a/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json
@@ -189,7 +189,7 @@
         "UMask": "0x8",
         "EventName": "BR_MISSP_TYPE_RETIRED.IND_CALL",
         "SampleAfterValue": "200000",
-        "BriefDescription": "Mispredicted indirect calls, including both register and memory indirect. "
+        "BriefDescription": "Mispredicted indirect calls, including both register and memory indirect."
     },
     {
         "EventCode": "0x89",
-- 
GitLab


From f3ef08583ea61eb4d86dfb33f0e0ebe2bc4f6e64 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 14:55:34 -0700
Subject: [PATCH 0447/1861] perf vendor events intel: Update Goldmont to v13

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/goldmont/cache.json   | 1244 ++++-------------
 .../pmu-events/arch/x86/goldmont/memory.json  |  260 ----
 .../arch/x86/goldmont/pipeline.json           |    5 +-
 .../arch/x86/goldmont/virtual-memory.json     |    9 +-
 4 files changed, 259 insertions(+), 1259 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/goldmont/cache.json b/tools/perf/pmu-events/arch/x86/goldmont/cache.json
index f8bbe087b0f8a..52a105666afcb 100644
--- a/tools/perf/pmu-events/arch/x86/goldmont/cache.json
+++ b/tools/perf/pmu-events/arch/x86/goldmont/cache.json
@@ -77,7 +77,8 @@
         "UMask": "0x21",
         "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Locked load uops retired (Precise event capable)"
+        "BriefDescription": "Locked load uops retired (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -88,7 +89,8 @@
         "UMask": "0x41",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)"
+        "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -99,7 +101,8 @@
         "UMask": "0x42",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)"
+        "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -110,7 +113,8 @@
         "UMask": "0x43",
         "EventName": "MEM_UOPS_RETIRED.SPLIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)"
+        "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -121,7 +125,8 @@
         "UMask": "0x81",
         "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired (Precise event capable)"
+        "BriefDescription": "Load uops retired (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -132,7 +137,8 @@
         "UMask": "0x82",
         "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Store uops retired (Precise event capable)"
+        "BriefDescription": "Store uops retired (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -143,7 +149,8 @@
         "UMask": "0x83",
         "EventName": "MEM_UOPS_RETIRED.ALL",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Memory uops retired (Precise event capable)"
+        "BriefDescription": "Memory uops retired (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -154,7 +161,8 @@
         "UMask": "0x1",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)"
+        "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -165,7 +173,8 @@
         "UMask": "0x2",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that hit L2 (Precise event capable)"
+        "BriefDescription": "Load uops retired that hit L2 (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -176,7 +185,8 @@
         "UMask": "0x8",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)"
+        "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -187,7 +197,8 @@
         "UMask": "0x10",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that missed L2 (Precise event capable)"
+        "BriefDescription": "Load uops retired that missed L2 (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -198,7 +209,8 @@
         "UMask": "0x20",
         "EventName": "MEM_LOAD_UOPS_RETIRED.HITM",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)"
+        "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -209,7 +221,8 @@
         "UMask": "0x40",
         "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Loads retired that hit WCB (Precise event capable)"
+        "BriefDescription": "Loads retired that hit WCB (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -220,26 +233,14 @@
         "UMask": "0x80",
         "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Loads retired that came from DRAM (Precise event capable)"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x40000032b7 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
+        "BriefDescription": "Loads retired that came from DRAM (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x36000032b7 ",
+        "MSRValue": "0x36000032b7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.ANY",
@@ -252,7 +253,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x10000032b7 ",
+        "MSRValue": "0x10000032b7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE",
@@ -265,7 +266,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x04000032b7 ",
+        "MSRValue": "0x04000032b7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HIT_OTHER_CORE_NO_FWD",
@@ -278,20 +279,20 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x02000032b7 ",
+        "MSRValue": "0x02000032b7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x00000432b7 ",
+        "MSRValue": "0x00000432b7",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT",
@@ -300,37 +301,11 @@
         "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.",
         "Offcore": "1"
     },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x00000132b7 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000000022 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000022 ",
+        "MSRValue": "0x3600000022",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.ANY",
@@ -343,7 +318,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000022 ",
+        "MSRValue": "0x1000000022",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE",
@@ -356,1164 +331,566 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000022 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0200000022 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000040022 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000010022 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000003091",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x3600003091",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.ANY",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x1000003091",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0400003091",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0200003091",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000043091",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) that hit the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000013091",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000003010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x3600003010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.ANY",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x1000003010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0400003010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0200003010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module. ",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000043010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000013010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000008000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x3600008000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.ANY",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x1000008000 ",
+        "MSRValue": "0x0400000022",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0400008000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0200008000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. ",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts requests to the uncore subsystem that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000048000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem that hit the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000018000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000004800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x3600004800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x1000004800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0400004800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0200004800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module. ",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000044800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000014800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000004000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x3600004000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x1000004000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0400004000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0200004000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module. ",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000044000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000014000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000002000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x3600002000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x1000002000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0400002000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0200002000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000042000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000012000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000001000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING",
-        "MSRIndex": "0x1a6",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600001000 ",
+        "MSRValue": "0x0200000022",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache.",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000001000 ",
+        "MSRValue": "0x0000040022",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400001000 ",
+        "MSRValue": "0x3600003091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.ANY",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200001000 ",
+        "MSRValue": "0x1000003091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000041000 ",
+        "MSRValue": "0x0400003091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache.",
+        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000011000 ",
+        "MSRValue": "0x0200003091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem.",
+        "BriefDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000800 ",
+        "MSRValue": "0x0000043091",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING",
-        "MSRIndex": "0x1a6",
+        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "BriefDescription": "Counts data reads (demand & prefetch) that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000800 ",
+        "MSRValue": "0x3600003010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.ANY",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000800 ",
+        "MSRValue": "0x1000003010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000800 ",
+        "MSRValue": "0x0400003010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000800 ",
+        "MSRValue": "0x0200003010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040800 ",
+        "MSRValue": "0x0000043010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT",
+        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010800 ",
+        "MSRValue": "0x1000008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem.",
+        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000400 ",
+        "MSRValue": "0x0400008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
-        "MSRIndex": "0x1a6",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000400 ",
+        "MSRValue": "0x0200008000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.ANY",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache.",
+        "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts requests to the uncore subsystem that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000400 ",
+        "MSRValue": "0x0000048000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "BriefDescription": "Counts requests to the uncore subsystem that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000400 ",
+        "MSRValue": "0x0000018000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "BriefDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000400 ",
+        "MSRValue": "0x3600004800",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts bus lock and split lock requests that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040400 ",
+        "MSRValue": "0x0000044800",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT",
+        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests that hit the L2 cache.",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010400 ",
+        "MSRValue": "0x3600004000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000200 ",
+        "MSRValue": "0x1000004000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.OUTSTANDING",
-        "MSRIndex": "0x1a6",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+        "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000200 ",
+        "MSRValue": "0x0400004000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.ANY",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache.",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000200 ",
+        "MSRValue": "0x0200004000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000200 ",
+        "MSRValue": "0x0000044000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000200 ",
+        "MSRValue": "0x3600002000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040200 ",
+        "MSRValue": "0x1000002000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_HIT",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache.",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010200 ",
+        "MSRValue": "0x0400002000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem.",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000100 ",
+        "MSRValue": "0x0200002000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.OUTSTANDING",
-        "MSRIndex": "0x1a6",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000100 ",
+        "MSRValue": "0x0000042000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY",
+        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000100 ",
+        "MSRValue": "0x3600001000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000100 ",
+        "MSRValue": "0x1000001000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000100 ",
+        "MSRValue": "0x0400001000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040100 ",
+        "MSRValue": "0x0200001000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache.",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010100 ",
+        "MSRValue": "0x0000041000",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem.",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000080 ",
+        "MSRValue": "0x3600000800",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING",
-        "MSRIndex": "0x1a6",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000080 ",
+        "MSRValue": "0x1000000800",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000080 ",
+        "MSRValue": "0x0400000800",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HITM_OTHER_CORE",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000080 ",
+        "MSRValue": "0x0200000800",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000080 ",
+        "MSRValue": "0x0000040800",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040080 ",
+        "MSRValue": "0x0000010400",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT",
+        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache.",
+        "BriefDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000010080 ",
+        "MSRValue": "0x3600000100",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem.",
+        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000020 ",
+        "MSRValue": "0x3600000080",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING",
-        "MSRIndex": "0x1a6",
+        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY",
+        "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000020 ",
+        "MSRValue": "0x3600000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.ANY",
@@ -1526,7 +903,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000020 ",
+        "MSRValue": "0x1000000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE",
@@ -1539,7 +916,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000020 ",
+        "MSRValue": "0x0400000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
@@ -1552,20 +929,20 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000020 ",
+        "MSRValue": "0x0200000020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040020 ",
+        "MSRValue": "0x0000040020",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT",
@@ -1574,37 +951,11 @@
         "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache.",
         "Offcore": "1"
     },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000010020 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000000010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000010 ",
+        "MSRValue": "0x3600000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.ANY",
@@ -1617,7 +968,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000010 ",
+        "MSRValue": "0x1000000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE",
@@ -1630,7 +981,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000010 ",
+        "MSRValue": "0x0400000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
@@ -1643,20 +994,20 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000010 ",
+        "MSRValue": "0x0200000010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040010 ",
+        "MSRValue": "0x0000040010",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT",
@@ -1665,37 +1016,11 @@
         "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache.",
         "Offcore": "1"
     },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000010010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x4000000008 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
-        "Offcore": "1"
-    },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000008 ",
+        "MSRValue": "0x3600000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.ANY",
@@ -1708,7 +1033,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000008 ",
+        "MSRValue": "0x1000000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE",
@@ -1721,7 +1046,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000008 ",
+        "MSRValue": "0x0400000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HIT_OTHER_CORE_NO_FWD",
@@ -1734,20 +1059,20 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000008 ",
+        "MSRValue": "0x0200000008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040008 ",
+        "MSRValue": "0x0000040008",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.COREWB.L2_HIT",
@@ -1756,24 +1081,11 @@
         "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache.",
         "Offcore": "1"
     },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000010008 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000004 ",
+        "MSRValue": "0x4000000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
@@ -1786,7 +1098,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000004 ",
+        "MSRValue": "0x3600000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.ANY",
@@ -1795,24 +1107,11 @@
         "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache.",
         "Offcore": "1"
     },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x1000000004 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
-        "Offcore": "1"
-    },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000004 ",
+        "MSRValue": "0x0400000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
@@ -1825,20 +1124,20 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000004 ",
+        "MSRValue": "0x0200000004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040004 ",
+        "MSRValue": "0x0000040004",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT",
@@ -1847,24 +1146,11 @@
         "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache.",
         "Offcore": "1"
     },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000010004 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000002 ",
+        "MSRValue": "0x4000000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
@@ -1877,7 +1163,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000002 ",
+        "MSRValue": "0x3600000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.ANY",
@@ -1890,7 +1176,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000002 ",
+        "MSRValue": "0x1000000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE",
@@ -1903,7 +1189,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000002 ",
+        "MSRValue": "0x0400000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
@@ -1916,20 +1202,20 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000002 ",
+        "MSRValue": "0x0200000002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040002 ",
+        "MSRValue": "0x0000040002",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT",
@@ -1938,24 +1224,11 @@
         "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache.",
         "Offcore": "1"
     },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000010002 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
-    },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x4000000001 ",
+        "MSRValue": "0x4000000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
@@ -1968,7 +1241,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x3600000001 ",
+        "MSRValue": "0x3600000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.ANY",
@@ -1981,7 +1254,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x1000000001 ",
+        "MSRValue": "0x1000000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE",
@@ -1994,7 +1267,7 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0400000001 ",
+        "MSRValue": "0x0400000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
@@ -2007,20 +1280,20 @@
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module.  Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0200000001 ",
+        "MSRValue": "0x0200000001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
         "MSRIndex": "0x1a6,0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
         "EventCode": "0xB7",
-        "MSRValue": "0x0000040001 ",
+        "MSRValue": "0x0000040001",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT",
@@ -2028,18 +1301,5 @@
         "SampleAfterValue": "100007",
         "BriefDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache.",
         "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x0000010001 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem.",
-        "Offcore": "1"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/memory.json b/tools/perf/pmu-events/arch/x86/goldmont/memory.json
index 690cebd12a94b..197dc76d49ddc 100644
--- a/tools/perf/pmu-events/arch/x86/goldmont/memory.json
+++ b/tools/perf/pmu-events/arch/x86/goldmont/memory.json
@@ -30,265 +30,5 @@
         "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
         "SampleAfterValue": "200003",
         "BriefDescription": "Machine clears due to memory ordering issue"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x20000032b7 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000022 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000003091",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000003010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000008000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000004800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000004000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region  that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000002000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000001000 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000800 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000400 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000200 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000100 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000080 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000020 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000010 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000008 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000004 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000002 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
-    },
-    {
-        "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
-        "EventCode": "0xB7",
-        "MSRValue": "0x2000000001 ",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.NON_DRAM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address.",
-        "Offcore": "1"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json
index 254788af8ab67..6342368accf8a 100644
--- a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json
@@ -1,7 +1,6 @@
 [
     {
         "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers.  This event uses fixed counter 0.  You cannot collect a PEBs record for this event.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 0",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
@@ -10,7 +9,6 @@
     },
     {
         "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time.  This event uses fixed counter 1.  You cannot collect a PEBs record for this event.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.CORE",
@@ -19,7 +17,6 @@
     },
     {
         "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction.  In mobile systems the core frequency may change from time.  This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  This event uses fixed counter 2.  You cannot collect a PEBs record for this event.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
@@ -188,7 +185,7 @@
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification.  Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.",
+        "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification.  Self-modifying code (SMC) causes a severe penalty in all Intel\u00ae architecture processors.",
         "EventCode": "0xC3",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json
index 9805198d3f5f1..343d66bbd7770 100644
--- a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json
@@ -48,7 +48,8 @@
         "UMask": "0x11",
         "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)"
+        "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -59,7 +60,8 @@
         "UMask": "0x12",
         "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)"
+        "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -70,6 +72,7 @@
         "UMask": "0x13",
         "EventName": "MEM_UOPS_RETIRED.DTLB_MISS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)"
+        "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)",
+        "Data_LA": "1"
     }
 ]
\ No newline at end of file
-- 
GitLab


From c53dd58988385c3f0861ea1d487489bad8cc69a7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 14:55:53 -0700
Subject: [PATCH 0448/1861] perf vendor events intel: Update GoldmontPlus to
 v1.01

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/x86/goldmontplus/cache.json          | 74 +++++++++++--------
 .../arch/x86/goldmontplus/pipeline.json       |  5 +-
 .../arch/x86/goldmontplus/virtual-memory.json |  9 ++-
 3 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json
index b4791b443a667..5a6ac8285ad4b 100644
--- a/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json
@@ -92,7 +92,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Locked load uops retired (Precise event capable)"
+        "BriefDescription": "Locked load uops retired (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -104,7 +105,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)"
+        "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -116,7 +118,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)"
+        "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -128,7 +131,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.SPLIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)"
+        "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -140,7 +144,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired (Precise event capable)"
+        "BriefDescription": "Load uops retired (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -152,7 +157,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Store uops retired (Precise event capable)"
+        "BriefDescription": "Store uops retired (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -164,7 +170,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.ALL",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Memory uops retired (Precise event capable)"
+        "BriefDescription": "Memory uops retired (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -176,7 +183,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)"
+        "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -188,7 +196,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that hit L2 (Precise event capable)"
+        "BriefDescription": "Load uops retired that hit L2 (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -200,7 +209,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)"
+        "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -212,7 +222,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that missed L2 (Precise event capable)"
+        "BriefDescription": "Load uops retired that missed L2 (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -224,7 +235,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.HITM",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)"
+        "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -236,7 +248,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Loads retired that hit WCB (Precise event capable)"
+        "BriefDescription": "Loads retired that hit WCB (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -248,7 +261,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Loads retired that came from DRAM (Precise event capable)"
+        "BriefDescription": "Loads retired that came from DRAM (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "CollectPEBSRecord": "1",
@@ -292,7 +306,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -367,7 +381,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -442,7 +456,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -517,7 +531,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -592,7 +606,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -667,7 +681,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -742,7 +756,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -817,7 +831,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -892,7 +906,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -967,7 +981,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -1042,7 +1056,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region  true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -1117,7 +1131,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -1192,7 +1206,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -1267,7 +1281,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -1342,7 +1356,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
@@ -1417,7 +1431,7 @@
         "PDIR_COUNTER": "na",
         "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+        "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module.",
         "Offcore": "1"
     },
     {
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json
index ccf1aed69197b..e3fa1a0ba71b6 100644
--- a/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json
@@ -3,7 +3,6 @@
         "PEBS": "2",
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers.  This event uses fixed counter 0.  You cannot collect a PEBs record for this event.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 0",
         "UMask": "0x1",
         "PEBScounters": "32",
@@ -15,7 +14,6 @@
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time.  This event uses fixed counter 1.  You cannot collect a PEBs record for this event.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x2",
         "PEBScounters": "33",
@@ -27,7 +25,6 @@
     {
         "CollectPEBSRecord": "1",
         "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction.  In mobile systems the core frequency may change from time.  This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  This event uses fixed counter 2.  You cannot collect a PEBs record for this event.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x3",
         "PEBScounters": "34",
@@ -231,7 +228,7 @@
     },
     {
         "CollectPEBSRecord": "1",
-        "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification.  Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.",
+        "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification.  Self-modifying code (SMC) causes a severe penalty in all Intel\u00ae architecture processors.",
         "EventCode": "0xC3",
         "Counter": "0,1,2,3",
         "UMask": "0x1",
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json
index 0b53a3b0dfb87..0d32fd26ded14 100644
--- a/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json
@@ -189,7 +189,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)"
+        "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -201,7 +202,8 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)"
+        "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)",
+        "Data_LA": "1"
     },
     {
         "PEBS": "2",
@@ -213,6 +215,7 @@
         "PEBScounters": "0,1,2,3",
         "EventName": "MEM_UOPS_RETIRED.DTLB_MISS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)"
+        "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)",
+        "Data_LA": "1"
     }
 ]
\ No newline at end of file
-- 
GitLab


From 1c3a2c864d2da0454bca1e41d3e0090c18678909 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 14 Mar 2019 14:56:26 -0700
Subject: [PATCH 0449/1861] perf vendor events intel: Update Silvermont to v14

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lkml.kernel.org/r/20190315165219.GA21223@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/silvermont/cache.json |  2 +-
 .../pmu-events/arch/x86/silvermont/other.json | 20 +++++++++++++++++++
 .../arch/x86/silvermont/pipeline.json         |  5 +----
 3 files changed, 22 insertions(+), 5 deletions(-)
 create mode 100644 tools/perf/pmu-events/arch/x86/silvermont/other.json

diff --git a/tools/perf/pmu-events/arch/x86/silvermont/cache.json b/tools/perf/pmu-events/arch/x86/silvermont/cache.json
index 82be7d1b8b814..805ef14365399 100644
--- a/tools/perf/pmu-events/arch/x86/silvermont/cache.json
+++ b/tools/perf/pmu-events/arch/x86/silvermont/cache.json
@@ -36,7 +36,7 @@
         "BriefDescription": "L2 cache request misses"
     },
     {
-        "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss.  Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.\r\nCounts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes.  This will include cycles due to an ITLB miss, ICache miss and other events. \r\n",
+        "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss.  Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.\r\nCounts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes.  This will include cycles due to an ITLB miss, ICache miss and other events.",
         "EventCode": "0x86",
         "Counter": "0,1",
         "UMask": "0x4",
diff --git a/tools/perf/pmu-events/arch/x86/silvermont/other.json b/tools/perf/pmu-events/arch/x86/silvermont/other.json
new file mode 100644
index 0000000000000..47814046fa9d4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/silvermont/other.json
@@ -0,0 +1,20 @@
+[
+    {
+        "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss.  Note: this event is not the same as page walk cycles to retrieve an instruction translation.",
+        "EventCode": "0x86",
+        "Counter": "0,1",
+        "UMask": "0x2",
+        "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles code-fetch stalled due to an outstanding ITLB miss."
+    },
+    {
+        "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes.  This will include cycles due to an ITLB miss, ICache miss and other events.",
+        "EventCode": "0x86",
+        "Counter": "0,1",
+        "UMask": "0x3f",
+        "EventName": "FETCH_STALL.ALL",
+        "SampleAfterValue": "200003",
+        "BriefDescription": "Cycles code-fetch stalled due to any reason."
+    }
+]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json b/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json
index 7468af99190ad..1ed62ad4cf778 100644
--- a/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json
@@ -210,7 +210,7 @@
         "UMask": "0x4",
         "EventName": "NO_ALLOC_CYCLES.MISPREDICTS",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire.  After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted "
+        "BriefDescription": "Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire.  After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted"
     },
     {
         "EventCode": "0xCA",
@@ -275,7 +275,6 @@
     },
     {
         "PublicDescription": "This event counts the number of instructions that retire.  For instructions that consist of multiple micro-ops, this event counts exactly once, as the last micro-op of the instruction retires.  The event continues counting while instructions retire, including during interrupt service routines caused by hardware interrupts, faults or traps.  Background: Modern microprocessors employ extensive pipelining and speculative techniques.  Since sometimes an instruction is started but never completed, the notion of \"retirement\" is introduced.  A retired instruction is one that commits its states. Or stated differently, an instruction might be abandoned at some point. No instruction is truly finished until it retires.  This counter measures the number of completed instructions.  The fixed event is INST_RETIRED.ANY and the programmable event is INST_RETIRED.ANY_P.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 1",
         "UMask": "0x1",
         "EventName": "INST_RETIRED.ANY",
@@ -284,7 +283,6 @@
     },
     {
         "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios.  The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. In systems with a constant core frequency, this event can give you a measurement of the elapsed time while the core was not in halt state by dividing the event count by the core frequency. This event is architecturally defined and is a designated fixed counter.  CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.CORE_P use the core frequency which may change from time to time.  CPU_CLK_UNHALTE.REF_TSC and CPU_CLK_UNHALTED.REF are not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  The fixed events are CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.REF_TSC and the programmable events are CPU_CLK_UNHALTED.CORE_P and CPU_CLK_UNHALTED.REF.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 2",
         "UMask": "0x2",
         "EventName": "CPU_CLK_UNHALTED.CORE",
@@ -293,7 +291,6 @@
     },
     {
         "PublicDescription": "Counts the number of reference cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios.  The core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  Divide this event count by core frequency to determine the elapsed time while the core was not in halt state.  Divide this event count by core frequency to determine the elapsed time while the core was not in halt state.  This event is architecturally defined and is a designated fixed counter.  CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.CORE_P use the core frequency which may change from time to time.  CPU_CLK_UNHALTE.REF_TSC and CPU_CLK_UNHALTED.REF are not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.  The fixed events are CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.REF_TSC and the programmable events are CPU_CLK_UNHALTED.CORE_P and CPU_CLK_UNHALTED.REF.",
-        "EventCode": "0x00",
         "Counter": "Fixed counter 3",
         "UMask": "0x3",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
-- 
GitLab


From 44ddd4f1709249dd1779dda7c907668a0b9ef833 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Tue, 12 Feb 2019 09:52:05 +0100
Subject: [PATCH 0450/1861] i40e: add tracking of AF_XDP ZC state for each
 queue pair
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit f3fef2b6e1cc ("i40e: Remove umem from VSI") a regression was
introduced; When the VSI was reset, the setup code would try to enable
AF_XDP ZC unconditionally (as long as there was a umem placed in the
netdev._rx struct). Here, we add a bitmap to the VSI that tracks if a
certain queue pair has been "zero-copy enabled" via the ndo_bpf. The
bitmap is used in i40e_xsk_umem, and enables zero-copy if and only if
XDP is enabled, the corresponding qid in the bitmap is set and the
umem is non-NULL.

Fixes: f3fef2b6e1cc ("i40e: Remove umem from VSI")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h      |  2 ++
 drivers/net/ethernet/intel/i40e/i40e_main.c | 10 +++++++++-
 drivers/net/ethernet/intel/i40e/i40e_xsk.c  |  3 +++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index cc583ad5236b7..d3cc3427caad1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -790,6 +790,8 @@ struct i40e_vsi {
 
 	/* VSI specific handlers */
 	irqreturn_t (*irq_handler)(int irq, void *data);
+
+	unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */
 } ____cacheline_internodealigned_in_smp;
 
 struct i40e_netdev_priv {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index dd77793a08a0d..b1c265012c8ad 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3077,7 +3077,7 @@ static struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
 	if (ring_is_xdp(ring))
 		qid -= ring->vsi->alloc_queue_pairs;
 
-	if (!xdp_on)
+	if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps))
 		return NULL;
 
 	return xdp_get_umem_from_qid(ring->vsi->netdev, qid);
@@ -10084,6 +10084,12 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 	hash_init(vsi->mac_filter_hash);
 	vsi->irqs_ready = false;
 
+	if (type == I40E_VSI_MAIN) {
+		vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL);
+		if (!vsi->af_xdp_zc_qps)
+			goto err_rings;
+	}
+
 	ret = i40e_set_num_rings_in_vsi(vsi);
 	if (ret)
 		goto err_rings;
@@ -10102,6 +10108,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 	goto unlock_pf;
 
 err_rings:
+	bitmap_free(vsi->af_xdp_zc_qps);
 	pf->next_vsi = i - 1;
 	kfree(vsi);
 unlock_pf:
@@ -10182,6 +10189,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
 	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
 	i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
 
+	bitmap_free(vsi->af_xdp_zc_qps);
 	i40e_vsi_free_arrays(vsi, true);
 	i40e_clear_rss_config_user(vsi);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index b5c182e688e35..1b17486543ac7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -102,6 +102,8 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 	if (err)
 		return err;
 
+	set_bit(qid, vsi->af_xdp_zc_qps);
+
 	if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);
 
 	if (if_running) {
@@ -148,6 +150,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
 			return err;
 	}
 
+	clear_bit(qid, vsi->af_xdp_zc_qps);
 	i40e_xsk_umem_dma_unmap(vsi, umem);
 
 	if (if_running) {
-- 
GitLab


From 2f94a3125b8742b05a011d62b16f52eb8f9ebe1c Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Thu, 28 Mar 2019 11:20:02 +1000
Subject: [PATCH 0451/1861] cifs: fix kref underflow in close_shroot()

Fix a bug where we used to not initialize the cached fid structure at all
in open_shroot() if the open was successful but we did not get a lease.
This would leave the structure uninitialized and later when we close the handle
we would in close_shroot() try to kref_put() an uninitialized refcount.

Fix this by always initializing this structure if the open was successful
but only do the extra get() if we got a lease.
This extra get() is only used to hold the structure until we get a lease
break from the server at which point we will kref_put() it during lease
processing.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2ops.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 1022a3771e140..7cfafac255aad 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -717,20 +717,18 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
 	oparms.fid->mid = le64_to_cpu(o_rsp->sync_hdr.MessageId);
 #endif /* CIFS_DEBUG2 */
 
-	if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
-		oplock = smb2_parse_lease_state(server, o_rsp,
-						&oparms.fid->epoch,
-						oparms.fid->lease_key);
-	else
-		goto oshr_exit;
-
-
 	memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid));
 	tcon->crfid.tcon = tcon;
 	tcon->crfid.is_valid = true;
 	kref_init(&tcon->crfid.refcount);
-	kref_get(&tcon->crfid.refcount);
 
+	if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
+		kref_get(&tcon->crfid.refcount);
+		oplock = smb2_parse_lease_state(server, o_rsp,
+						&oparms.fid->epoch,
+						oparms.fid->lease_key);
+	} else
+		goto oshr_exit;
 
 	qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
 	if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info))
-- 
GitLab


From 153322f7536a181e4d1b288aa6f01c0ce65f5c7c Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 28 Mar 2019 22:32:49 -0500
Subject: [PATCH 0452/1861] smb3: Fix enumerating snapshots to Azure

Some servers (see MS-SMB2 protocol specification
section 3.3.5.15.1) expect that the FSCTL enumerate snapshots
is done twice, with the first query having EXACTLY the minimum
size response buffer requested (16 bytes) which refreshes
the snapshot list (otherwise that and subsequent queries get
an empty list returned).  So had to add code to set
the maximum response size differently for the first snapshot
query (which gets the size needed for the second query which
contains the actual list of snapshots).

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org> # 4.19+
---
 fs/cifs/smb2file.c  |  2 +-
 fs/cifs/smb2ops.c   | 44 ++++++++++++++++++++++++++++++++------------
 fs/cifs/smb2pdu.c   | 35 ++++++++++++++++++++++-------------
 fs/cifs/smb2proto.h |  5 +++--
 4 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index b204e84b87fb5..ac97e0c01d4ef 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -74,7 +74,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 			fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
 			true /* is_fsctl */,
 			(char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
-			NULL, NULL /* no return info */);
+			CIFSMaxBufSize, NULL, NULL /* no return info */);
 		if (rc == -EOPNOTSUPP) {
 			cifs_dbg(VFS,
 			     "resiliency not supported by server, disabling\n");
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 7cfafac255aad..b22be10ee980b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -581,7 +581,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
 			FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
 			NULL /* no data input */, 0 /* no data input */,
-			(char **)&out_buf, &ret_data_len);
+			CIFSMaxBufSize, (char **)&out_buf, &ret_data_len);
 	if (rc == -EOPNOTSUPP) {
 		cifs_dbg(FYI,
 			 "server does not support query network interfaces\n");
@@ -1297,7 +1297,7 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
-			NULL, 0 /* no input */,
+			NULL, 0 /* no input */, CIFSMaxBufSize,
 			(char **)&res_key, &ret_data_len);
 
 	if (rc) {
@@ -1402,7 +1402,7 @@ smb2_ioctl_query_info(const unsigned int xid,
 			rc = SMB2_ioctl_init(tcon, &rqst[1],
 					     COMPOUND_FID, COMPOUND_FID,
 					     qi.info_type, true, NULL,
-					     0);
+					     0, CIFSMaxBufSize);
 		}
 	} else if (qi.flags == PASSTHRU_QUERY_INFO) {
 		memset(&qi_iov, 0, sizeof(qi_iov));
@@ -1530,8 +1530,8 @@ smb2_copychunk_range(const unsigned int xid,
 		rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
 			trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
 			true /* is_fsctl */, (char *)pcchunk,
-			sizeof(struct copychunk_ioctl),	(char **)&retbuf,
-			&ret_data_len);
+			sizeof(struct copychunk_ioctl),	CIFSMaxBufSize,
+			(char **)&retbuf, &ret_data_len);
 		if (rc == 0) {
 			if (ret_data_len !=
 					sizeof(struct copychunk_ioctl_rsp)) {
@@ -1691,7 +1691,7 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
 			true /* is_fctl */,
-			&setsparse, 1, NULL, NULL);
+			&setsparse, 1, CIFSMaxBufSize, NULL, NULL);
 	if (rc) {
 		tcon->broken_sparse_sup = true;
 		cifs_dbg(FYI, "set sparse rc = %d\n", rc);
@@ -1764,7 +1764,7 @@ smb2_duplicate_extents(const unsigned int xid,
 			true /* is_fsctl */,
 			(char *)&dup_ext_buf,
 			sizeof(struct duplicate_extents_to_file),
-			NULL,
+			CIFSMaxBufSize, NULL,
 			&ret_data_len);
 
 	if (ret_data_len > 0)
@@ -1799,7 +1799,7 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
 			true /* is_fsctl */,
 			(char *)&integr_info,
 			sizeof(struct fsctl_set_integrity_information_req),
-			NULL,
+			CIFSMaxBufSize, NULL,
 			&ret_data_len);
 
 }
@@ -1807,6 +1807,8 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
 /* GMT Token is @GMT-YYYY.MM.DD-HH.MM.SS Unicode which is 48 bytes + null */
 #define GMT_TOKEN_SIZE 50
 
+#define MIN_SNAPSHOT_ARRAY_SIZE 16 /* See MS-SMB2 section 3.3.5.15.1 */
+
 /*
  * Input buffer contains (empty) struct smb_snapshot array with size filled in
  * For output see struct SRV_SNAPSHOT_ARRAY in MS-SMB2 section 2.2.32.2
@@ -1818,13 +1820,29 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
 	char *retbuf = NULL;
 	unsigned int ret_data_len = 0;
 	int rc;
+	u32 max_response_size;
 	struct smb_snapshot_array snapshot_in;
 
+	if (get_user(ret_data_len, (unsigned int __user *)ioc_buf))
+		return -EFAULT;
+
+	/*
+	 * Note that for snapshot queries that servers like Azure expect that
+	 * the first query be minimal size (and just used to get the number/size
+	 * of previous versions) so response size must be specified as EXACTLY
+	 * sizeof(struct snapshot_array) which is 16 when rounded up to multiple
+	 * of eight bytes.
+	 */
+	if (ret_data_len == 0)
+		max_response_size = MIN_SNAPSHOT_ARRAY_SIZE;
+	else
+		max_response_size = CIFSMaxBufSize;
+
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid,
 			FSCTL_SRV_ENUMERATE_SNAPSHOTS,
 			true /* is_fsctl */,
-			NULL, 0 /* no input data */,
+			NULL, 0 /* no input data */, max_response_size,
 			(char **)&retbuf,
 			&ret_data_len);
 	cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n",
@@ -2302,7 +2320,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
 		rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
 				FSCTL_DFS_GET_REFERRALS,
 				true /* is_fsctl */,
-				(char *)dfs_req, dfs_req_size,
+				(char *)dfs_req, dfs_req_size, CIFSMaxBufSize,
 				(char **)&dfs_rsp, &dfs_rsp_size);
 	} while (rc == -EAGAIN);
 
@@ -2656,7 +2674,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
 	rc = SMB2_ioctl_init(tcon, &rqst[num++], cfile->fid.persistent_fid,
 			     cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
 			     true /* is_fctl */, (char *)&fsctl_buf,
-			     sizeof(struct file_zero_data_information));
+			     sizeof(struct file_zero_data_information),
+			     CIFSMaxBufSize);
 	if (rc)
 		goto zero_range_exit;
 
@@ -2733,7 +2752,8 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
 			true /* is_fctl */, (char *)&fsctl_buf,
-			sizeof(struct file_zero_data_information), NULL, NULL);
+			sizeof(struct file_zero_data_information),
+			CIFSMaxBufSize, NULL, NULL);
 	free_xid(xid);
 	return rc;
 }
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 21ac19ff19cb2..0dc66f36ff5b4 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1002,7 +1002,8 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 
 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
 		FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
-		(char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen);
+		(char *)pneg_inbuf, inbuflen, CIFSMaxBufSize,
+		(char **)&pneg_rsp, &rsplen);
 	if (rc == -EOPNOTSUPP) {
 		/*
 		 * Old Windows versions or Netapp SMB server can return
@@ -2478,7 +2479,8 @@ creat_exit:
 int
 SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
 		u64 persistent_fid, u64 volatile_fid, u32 opcode,
-		bool is_fsctl, char *in_data, u32 indatalen)
+		bool is_fsctl, char *in_data, u32 indatalen,
+		__u32 max_response_size)
 {
 	struct smb2_ioctl_req *req;
 	struct kvec *iov = rqst->rq_iov;
@@ -2520,16 +2522,21 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
 	req->OutputCount = 0; /* MBZ */
 
 	/*
-	 * Could increase MaxOutputResponse, but that would require more
-	 * than one credit. Windows typically sets this smaller, but for some
+	 * In most cases max_response_size is set to 16K (CIFSMaxBufSize)
+	 * We Could increase default MaxOutputResponse, but that could require
+	 * more credits. Windows typically sets this smaller, but for some
 	 * ioctls it may be useful to allow server to send more. No point
 	 * limiting what the server can send as long as fits in one credit
-	 * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE
-	 * (by default, note that it can be overridden to make max larger)
-	 * in responses (except for read responses which can be bigger.
-	 * We may want to bump this limit up
+	 * We can not handle more than CIFS_MAX_BUF_SIZE yet but may want
+	 * to increase this limit up in the future.
+	 * Note that for snapshot queries that servers like Azure expect that
+	 * the first query be minimal size (and just used to get the number/size
+	 * of previous versions) so response size must be specified as EXACTLY
+	 * sizeof(struct snapshot_array) which is 16 when rounded up to multiple
+	 * of eight bytes.  Currently that is the only case where we set max
+	 * response size smaller.
 	 */
-	req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize);
+	req->MaxOutputResponse = cpu_to_le32(max_response_size);
 
 	if (is_fsctl)
 		req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
@@ -2550,13 +2557,14 @@ SMB2_ioctl_free(struct smb_rqst *rqst)
 		cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
 }
 
+
 /*
  *	SMB2 IOCTL is used for both IOCTLs and FSCTLs
  */
 int
 SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	   u64 volatile_fid, u32 opcode, bool is_fsctl,
-	   char *in_data, u32 indatalen,
+	   char *in_data, u32 indatalen, u32 max_out_data_len,
 	   char **out_data, u32 *plen /* returned data len */)
 {
 	struct smb_rqst rqst;
@@ -2593,8 +2601,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	rqst.rq_iov = iov;
 	rqst.rq_nvec = SMB2_IOCTL_IOV_SIZE;
 
-	rc = SMB2_ioctl_init(tcon, &rqst, persistent_fid, volatile_fid,
-			     opcode, is_fsctl, in_data, indatalen);
+	rc = SMB2_ioctl_init(tcon, &rqst, persistent_fid, volatile_fid, opcode,
+			     is_fsctl, in_data, indatalen, max_out_data_len);
 	if (rc)
 		goto ioctl_exit;
 
@@ -2672,7 +2680,8 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SET_COMPRESSION, true /* is_fsctl */,
 			(char *)&fsctl_input /* data input */,
-			2 /* in data len */, &ret_data /* out data */, NULL);
+			2 /* in data len */, CIFSMaxBufSize /* max out data */,
+			&ret_data /* out data */, NULL);
 
 	cifs_dbg(FYI, "set compression rc %d\n", rc);
 
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 3c32d0cfea69b..52df125e91898 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -142,11 +142,12 @@ extern int SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
 extern void SMB2_open_free(struct smb_rqst *rqst);
 extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
 		     u64 persistent_fid, u64 volatile_fid, u32 opcode,
-		     bool is_fsctl, char *in_data, u32 indatalen,
+		     bool is_fsctl, char *in_data, u32 indatalen, u32 maxoutlen,
 		     char **out_data, u32 *plen /* returned data len */);
 extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
 			   u64 persistent_fid, u64 volatile_fid, u32 opcode,
-			   bool is_fsctl, char *in_data, u32 indatalen);
+			   bool is_fsctl, char *in_data, u32 indatalen,
+			   __u32 max_response_size);
 extern void SMB2_ioctl_free(struct smb_rqst *rqst);
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
-- 
GitLab


From ca567eb2b3f014d5be0f44c6f68b01a522f15ca4 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 29 Mar 2019 16:31:07 -0500
Subject: [PATCH 0453/1861] SMB3: Allow persistent handle timeout to be
 configurable on mount

Reconnecting after server or network failure can be improved
(to maintain availability and protect data integrity) by allowing
the client to choose the default persistent (or resilient)
handle timeout in some use cases.  Today we default to 0 which lets
the server pick the default timeout (usually 120 seconds) but this
can be problematic for some workloads.  Add the new mount parameter
to cifs.ko for SMB3 mounts "handletimeout" which enables the user
to override the default handle timeout for persistent (mount
option "persistenthandles") or resilient handles (mount option
"resilienthandles").  Maximum allowed is 16 minutes (960000 ms).
Units for the timeout are expressed in milliseconds. See
section 2.2.14.2.12 and 2.2.31.3 of the MS-SMB2 protocol
specification for more information.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/cifsfs.c   |  2 ++
 fs/cifs/cifsglob.h |  8 ++++++++
 fs/cifs/connect.c  | 30 +++++++++++++++++++++++++++++-
 fs/cifs/smb2file.c |  4 +++-
 fs/cifs/smb2pdu.c  | 14 +++++++++++---
 5 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f9b71c12cc9f6..a05bf1d6e1d04 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -559,6 +559,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 			tcon->ses->server->echo_interval / HZ);
 	if (tcon->snapshot_time)
 		seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
+	if (tcon->handle_timeout)
+		seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
 	/* convert actimeo and display it in seconds */
 	seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 38feae812b470..5b18d45857409 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -59,6 +59,12 @@
  */
 #define CIFS_MAX_ACTIMEO (1 << 30)
 
+/*
+ * Max persistent and resilient handle timeout (milliseconds).
+ * Windows durable max was 960000 (16 minutes)
+ */
+#define SMB3_MAX_HANDLE_TIMEOUT 960000
+
 /*
  * MAX_REQ is the maximum number of requests that WE will send
  * on one socket concurrently.
@@ -586,6 +592,7 @@ struct smb_vol {
 	struct nls_table *local_nls;
 	unsigned int echo_interval; /* echo interval in secs */
 	__u64 snapshot_time; /* needed for timewarp tokens */
+	__u32 handle_timeout; /* persistent and durable handle timeout in ms */
 	unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
 };
 
@@ -1058,6 +1065,7 @@ struct cifs_tcon {
 	__u32 vol_serial_number;
 	__le64 vol_create_time;
 	__u64 snapshot_time; /* for timewarp tokens - timestamp of snapshot */
+	__u32 handle_timeout; /* persistent and durable handle timeout in ms */
 	__u32 ss_flags;		/* sector size flags */
 	__u32 perf_sector_size; /* best sector size for perf */
 	__u32 max_chunks;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a8e9738db6912..4c0e44489f214 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -103,7 +103,7 @@ enum {
 	Opt_cruid, Opt_gid, Opt_file_mode,
 	Opt_dirmode, Opt_port,
 	Opt_blocksize, Opt_rsize, Opt_wsize, Opt_actimeo,
-	Opt_echo_interval, Opt_max_credits,
+	Opt_echo_interval, Opt_max_credits, Opt_handletimeout,
 	Opt_snapshot,
 
 	/* Mount options which take string value */
@@ -208,6 +208,7 @@ static const match_table_t cifs_mount_option_tokens = {
 	{ Opt_rsize, "rsize=%s" },
 	{ Opt_wsize, "wsize=%s" },
 	{ Opt_actimeo, "actimeo=%s" },
+	{ Opt_handletimeout, "handletimeout=%s" },
 	{ Opt_echo_interval, "echo_interval=%s" },
 	{ Opt_max_credits, "max_credits=%s" },
 	{ Opt_snapshot, "snapshot=%s" },
@@ -1619,6 +1620,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 	vol->actimeo = CIFS_DEF_ACTIMEO;
 
+	/* Most clients set timeout to 0, allows server to use its default */
+	vol->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
+
 	/* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
 	vol->ops = &smb30_operations;
 	vol->vals = &smbdefault_values;
@@ -2017,6 +2021,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 				goto cifs_parse_mount_err;
 			}
 			break;
+		case Opt_handletimeout:
+			if (get_option_ul(args, &option)) {
+				cifs_dbg(VFS, "%s: Invalid handletimeout value\n",
+					 __func__);
+				goto cifs_parse_mount_err;
+			}
+			vol->handle_timeout = option;
+			if (vol->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
+				cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n");
+				goto cifs_parse_mount_err;
+			}
+			break;
 		case Opt_echo_interval:
 			if (get_option_ul(args, &option)) {
 				cifs_dbg(VFS, "%s: Invalid echo interval value\n",
@@ -3183,6 +3199,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 		return 0;
 	if (tcon->snapshot_time != volume_info->snapshot_time)
 		return 0;
+	if (tcon->handle_timeout != volume_info->handle_timeout)
+		return 0;
 	return 1;
 }
 
@@ -3297,6 +3315,16 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 			tcon->snapshot_time = volume_info->snapshot_time;
 	}
 
+	if (volume_info->handle_timeout) {
+		if (ses->server->vals->protocol_id == 0) {
+			cifs_dbg(VFS,
+			     "Use SMB2.1 or later for handle timeout option\n");
+			rc = -EOPNOTSUPP;
+			goto out_fail;
+		} else
+			tcon->handle_timeout = volume_info->handle_timeout;
+	}
+
 	tcon->ses = ses;
 	if (volume_info->password) {
 		tcon->password = kstrdup(volume_info->password, GFP_KERNEL);
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index ac97e0c01d4ef..54bffb2a1786d 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -68,7 +68,9 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 
 
 	 if (oparms->tcon->use_resilient) {
-		nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */
+		/* default timeout is 0, servers pick default (120 seconds) */
+		nr_ioctl_req.Timeout =
+			cpu_to_le32(oparms->tcon->handle_timeout);
 		nr_ioctl_req.Reserved = 0;
 		rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
 			fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0dc66f36ff5b4..21ad01d55ab2d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1859,8 +1859,9 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov,
 }
 
 static struct create_durable_v2 *
-create_durable_v2_buf(struct cifs_fid *pfid)
+create_durable_v2_buf(struct cifs_open_parms *oparms)
 {
+	struct cifs_fid *pfid = oparms->fid;
 	struct create_durable_v2 *buf;
 
 	buf = kzalloc(sizeof(struct create_durable_v2), GFP_KERNEL);
@@ -1874,7 +1875,14 @@ create_durable_v2_buf(struct cifs_fid *pfid)
 				(struct create_durable_v2, Name));
 	buf->ccontext.NameLength = cpu_to_le16(4);
 
-	buf->dcontext.Timeout = 0; /* Should this be configurable by workload */
+	/*
+	 * NB: Handle timeout defaults to 0, which allows server to choose
+	 * (most servers default to 120 seconds) and most clients default to 0.
+	 * This can be overridden at mount ("handletimeout=") if the user wants
+	 * a different persistent (or resilient) handle timeout for all opens
+	 * opens on a particular SMB3 mount.
+	 */
+	buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout);
 	buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
 	generate_random_uuid(buf->dcontext.CreateGuid);
 	memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
@@ -1927,7 +1935,7 @@ add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec,
 	struct smb2_create_req *req = iov[0].iov_base;
 	unsigned int num = *num_iovec;
 
-	iov[num].iov_base = create_durable_v2_buf(oparms->fid);
+	iov[num].iov_base = create_durable_v2_buf(oparms);
 	if (iov[num].iov_base == NULL)
 		return -ENOMEM;
 	iov[num].iov_len = sizeof(struct create_durable_v2);
-- 
GitLab


From 4811e3096daaa56e145a1d2bec45e2e9fe790729 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Mon, 1 Apr 2019 09:53:44 +1000
Subject: [PATCH 0454/1861] cifs: a smb2_validate_and_copy_iov failure does not
 mean the handle is invalid.

It only means that we do not have a valid cached value for the
file_all_info structure.

CC: Stable <stable@vger.kernel.org>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2ops.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b22be10ee980b..00225e699d036 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -733,14 +733,12 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
 	qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
 	if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info))
 		goto oshr_exit;
-	rc = smb2_validate_and_copy_iov(
+	if (!smb2_validate_and_copy_iov(
 				le16_to_cpu(qi_rsp->OutputBufferOffset),
 				sizeof(struct smb2_file_all_info),
 				&rsp_iov[1], sizeof(struct smb2_file_all_info),
-				(char *)&tcon->crfid.file_all_info);
-	if (rc)
-		goto oshr_exit;
-	tcon->crfid.file_all_info_is_valid = 1;
+				(char *)&tcon->crfid.file_all_info))
+		tcon->crfid.file_all_info_is_valid = 1;
 
  oshr_exit:
 	mutex_unlock(&tcon->crfid.fid_mutex);
-- 
GitLab


From 0d74e6a3b6421d98eeafbed26f29156d469bc0b5 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 13 Mar 2019 07:56:02 -0400
Subject: [PATCH 0455/1861] dm integrity: change memcmp to strncmp in
 dm_integrity_ctr

If the string opt_string is small, the function memcmp can access bytes
that are beyond the terminating nul character. In theory, it could cause
segfault, if opt_string were located just below some unmapped memory.

Change from memcmp to strncmp so that we don't read bytes beyond the end
of the string.

Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-integrity.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index d57d997a52c81..33fac437569f0 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3185,7 +3185,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 			journal_watermark = val;
 		else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
 			sync_msec = val;
-		else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) {
+		else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) {
 			if (ic->meta_dev) {
 				dm_put_device(ti, ic->meta_dev);
 				ic->meta_dev = NULL;
@@ -3204,17 +3204,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 				goto bad;
 			}
 			ic->sectors_per_block = val >> SECTOR_SHIFT;
-		} else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
+		} else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
 			r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
 					    "Invalid internal_hash argument");
 			if (r)
 				goto bad;
-		} else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
+		} else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
 			r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
 					    "Invalid journal_crypt argument");
 			if (r)
 				goto bad;
-		} else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
+		} else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
 			r = get_alg_and_key(opt_string, &ic->journal_mac_alg,  &ti->error,
 					    "Invalid journal_mac argument");
 			if (r)
-- 
GitLab


From 5efedc9b62b5cf0ccc84ed427a07f0d2485091c4 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 22 Mar 2019 22:16:34 +0800
Subject: [PATCH 0456/1861] dm integrity: make dm_integrity_init and
 dm_integrity_exit static

Fix sparse warnings:

drivers/md/dm-integrity.c:3619:12: warning:
 symbol 'dm_integrity_init' was not declared. Should it be static?
drivers/md/dm-integrity.c:3638:6: warning:
 symbol 'dm_integrity_exit' was not declared. Should it be static?

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-integrity.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 33fac437569f0..94b865c5ce715 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3616,7 +3616,7 @@ static struct target_type integrity_target = {
 	.io_hints		= dm_integrity_io_hints,
 };
 
-int __init dm_integrity_init(void)
+static int __init dm_integrity_init(void)
 {
 	int r;
 
@@ -3635,7 +3635,7 @@ int __init dm_integrity_init(void)
 	return r;
 }
 
-void dm_integrity_exit(void)
+static void __exit dm_integrity_exit(void)
 {
 	dm_unregister_target(&integrity_target);
 	kmem_cache_destroy(journal_io_cache);
-- 
GitLab


From 93fc91675a6c84d6ab355188aea398bda2cc51f8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 21 Mar 2019 15:00:09 -0700
Subject: [PATCH 0457/1861] dm init: fix const confusion for dm_allowed_targets
 array

A non const pointer to const cannot be marked initconst.
Mark the array actually const.

Fixes: 6bbc923dfcf5 dm: add support to directly boot to a mapped device
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index b53f30f16b4d4..4b76f84424c3c 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -36,7 +36,7 @@ struct dm_device {
 	struct list_head list;
 };
 
-const char *dm_allowed_targets[] __initconst = {
+const char * const dm_allowed_targets[] __initconst = {
 	"crypt",
 	"delay",
 	"linear",
-- 
GitLab


From 75ae193626de3238ca5fb895868ec91c94e63b1b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Thu, 21 Mar 2019 16:46:12 -0400
Subject: [PATCH 0458/1861] dm: revert 8f50e358153d ("dm: limit the max bio
 size as BIO_MAX_PAGES * PAGE_SIZE")

The limit was already incorporated to dm-crypt with commit 4e870e948fba
("dm crypt: fix error with too large bios"), so we don't need to apply
it globally to all targets. The quantity BIO_MAX_PAGES * PAGE_SIZE is
wrong anyway because the variable ti->max_io_len it is supposed to be in
the units of 512-byte sectors not in bytes.

Reduction of the limit to 1048576 sectors could even cause data
corruption in rare cases - suppose that we have a dm-striped device with
stripe size 768MiB. The target will call dm_set_target_max_io_len with
the value 1572864. The buggy code would reduce it to 1048576. Now, the
dm-core will errorneously split the bios on 1048576-sector boundary
insetad of 1572864-sector boundary and pass these stripe-crossing bios
to the striped target.

Cc: stable@vger.kernel.org # v4.16+
Fixes: 8f50e358153d ("dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE")
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 68d24056d0b1c..89b04169658d2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1042,15 +1042,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 		return -EINVAL;
 	}
 
-	/*
-	 * BIO based queue uses its own splitting. When multipage bvecs
-	 * is switched on, size of the incoming bio may be too big to
-	 * be handled in some targets, such as crypt.
-	 *
-	 * When these targets are ready for the big bio, we can remove
-	 * the limit.
-	 */
-	ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE);
+	ti->max_io_len = (uint32_t) len;
 
 	return 0;
 }
-- 
GitLab


From eb40c0acdc342b815d4d03ae6abb09e80c0f2988 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 26 Mar 2019 20:20:58 +0100
Subject: [PATCH 0459/1861] dm table: propagate BDI_CAP_STABLE_WRITES to fix
 sporadic checksum errors

Some devices don't use blk_integrity but still want stable pages
because they do their own checksumming.  Examples include rbd and iSCSI
when data digests are negotiated.  Stacking DM (and thus LVM) on top of
these devices results in sporadic checksum errors.

Set BDI_CAP_STABLE_WRITES if any underlying device has it set.

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-table.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ba9481f1bf3c0..cde3b49b2a910 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1844,6 +1844,36 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
 	return true;
 }
 
+static int device_requires_stable_pages(struct dm_target *ti,
+					struct dm_dev *dev, sector_t start,
+					sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+
+	return q && bdi_cap_stable_pages_required(q->backing_dev_info);
+}
+
+/*
+ * If any underlying device requires stable pages, a table must require
+ * them as well.  Only targets that support iterate_devices are considered:
+ * don't want error, zero, etc to require stable pages.
+ */
+static bool dm_table_requires_stable_pages(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i;
+
+	for (i = 0; i < dm_table_get_num_targets(t); i++) {
+		ti = dm_table_get_target(t, i);
+
+		if (ti->type->iterate_devices &&
+		    ti->type->iterate_devices(ti, device_requires_stable_pages, NULL))
+			return true;
+	}
+
+	return false;
+}
+
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			       struct queue_limits *limits)
 {
@@ -1896,6 +1926,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 
 	dm_table_verify_integrity(t);
 
+	/*
+	 * Some devices don't use blk_integrity but still want stable pages
+	 * because they do their own checksumming.
+	 */
+	if (dm_table_requires_stable_pages(t))
+		q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
+	else
+		q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES;
+
 	/*
 	 * Determine whether or not this queue's I/O timings contribute
 	 * to the entropy pool, Only request-based targets use this.
-- 
GitLab


From 556a888a14afe27164191955618990fb3ccc9aad Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Sat, 30 Mar 2019 03:12:32 +0100
Subject: [PATCH 0460/1861] signal: don't silently convert SI_USER signals to
 non-current pidfd

The current sys_pidfd_send_signal() silently turns signals with explicit
SI_USER context that are sent to non-current tasks into signals with
kernel-generated siginfo.
This is unlike do_rt_sigqueueinfo(), which returns -EPERM in this case.
If a user actually wants to send a signal with kernel-provided siginfo,
they can do that with pidfd_send_signal(pidfd, sig, NULL, 0); so allowing
this case is unnecessary.

Instead of silently replacing the siginfo, just bail out with an error;
this is consistent with other interfaces and avoids special-casing behavior
based on security checks.

Fixes: 3eb39f47934f ("signal: add pidfd_send_signal() syscall")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian@brauner.io>
---
 kernel/signal.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index b7953934aa994..f98448cf2defb 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3605,16 +3605,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 		if (unlikely(sig != kinfo.si_signo))
 			goto err;
 
+		/* Only allow sending arbitrary signals to yourself. */
+		ret = -EPERM;
 		if ((task_pid(current) != pid) &&
-		    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) {
-			/* Only allow sending arbitrary signals to yourself. */
-			ret = -EPERM;
-			if (kinfo.si_code != SI_USER)
-				goto err;
-
-			/* Turn this into a regular kill signal. */
-			prepare_kill_siginfo(sig, &kinfo);
-		}
+		    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
+			goto err;
 	} else {
 		prepare_kill_siginfo(sig, &kinfo);
 	}
-- 
GitLab


From 0db6f8befc32c68bb13d7ffbb2e563c79e913e13 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 28 Mar 2019 10:35:06 +0100
Subject: [PATCH 0461/1861] net/sched: fix ->get helper of the matchall cls

It returned always NULL, thus it was never possible to get the filter.

Example:
$ ip link add foo type dummy
$ ip link add bar type dummy
$ tc qdisc add dev foo clsact
$ tc filter add dev foo protocol all pref 1 ingress handle 1234 \
	matchall action mirred ingress mirror dev bar

Before the patch:
$ tc filter get dev foo protocol all pref 1 ingress handle 1234 matchall
Error: Specified filter handle not found.
We have an error talking to the kernel

After:
$ tc filter get dev foo protocol all pref 1 ingress handle 1234 matchall
filter ingress protocol all pref 1 matchall chain 0 handle 0x4d2
  not_in_hw
        action order 1: mirred (Ingress Mirror to device bar) pipe
        index 1 ref 1 bind 1

CC: Yotam Gigi <yotamg@mellanox.com>
CC: Jiri Pirko <jiri@mellanox.com>
Fixes: fd62d9f5c575 ("net/sched: matchall: Fix configuration race")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_matchall.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 459921bd3d87b..a13bc351a4148 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -130,6 +130,11 @@ static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
 
 static void *mall_get(struct tcf_proto *tp, u32 handle)
 {
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+
+	if (head && head->handle == handle)
+		return head;
+
 	return NULL;
 }
 
-- 
GitLab


From 4ab526468344c11d2d1807ae95feb1f5305dc014 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 1 Apr 2019 17:03:45 +0200
Subject: [PATCH 0462/1861] cpufreq/intel_pstate: Load only on Intel hardware

This driver is Intel-only so loading on anything which is not Intel is
pointless. Prevent it from doing so.

While at it, correct the "not supported" print statement to say CPU
"model" which is what that test does.

Fixes: 076b862c7e44 (cpufreq: intel_pstate: Add reasons for failure and debug messages)
Suggested-by: Erwan Velu <e.velu@criteo.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b599c7318aab4..2986119dd31fb 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2596,6 +2596,9 @@ static int __init intel_pstate_init(void)
 	const struct x86_cpu_id *id;
 	int rc;
 
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return -ENODEV;
+
 	if (no_load)
 		return -ENODEV;
 
@@ -2611,7 +2614,7 @@ static int __init intel_pstate_init(void)
 	} else {
 		id = x86_match_cpu(intel_pstate_cpu_ids);
 		if (!id) {
-			pr_info("CPU ID not supported\n");
+			pr_info("CPU model not supported\n");
 			return -ENODEV;
 		}
 
-- 
GitLab


From 5a25e3f7cc538fb49e11267c1e41c54ccf83835e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 26 Mar 2019 12:15:13 +0100
Subject: [PATCH 0463/1861] cpufreq: intel_pstate: Driver-specific handling of
 _PPC updates

In some cases, the platform firmware disables or enables turbo
frequencies for all CPUs globally before triggering a _PPC change
notification for one of them.  Obviously, that global change affects
all CPUs, not just the notified one, and it needs to be acted upon by
cpufreq.

The intel_pstate driver is able to detect such global changes of
the settings, but it also needs to update policy limits for all
CPUs if that happens, in particular if turbo frequencies are
enabled globally - to allow them to be used.

For this reason, introduce a new cpufreq driver callback to be
invoked on _PPC notifications, if present, instead of simply
calling cpufreq_update_policy() for the notified CPU and make
intel_pstate use it to trigger policy updates for all CPUs
in the system if global settings change.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200759
Reported-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Tested-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/acpi/processor_perflib.c |  2 +-
 drivers/cpufreq/cpufreq.c        | 16 ++++++++++++++++
 drivers/cpufreq/intel_pstate.c   | 24 ++++++++++++++++++++++++
 include/linux/cpufreq.h          |  4 ++++
 4 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index a303fd0e108cc..c73d3a62799a6 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -181,7 +181,7 @@ void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag)
 			acpi_processor_ppc_ost(pr->handle, 0);
 	}
 	if (ret >= 0)
-		cpufreq_update_policy(pr->id);
+		cpufreq_update_limits(pr->id);
 }
 
 int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e10922709d139..bb63347f6af14 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2370,6 +2370,22 @@ unlock:
 }
 EXPORT_SYMBOL(cpufreq_update_policy);
 
+/**
+ * cpufreq_update_limits - Update policy limits for a given CPU.
+ * @cpu: CPU to update the policy limits for.
+ *
+ * Invoke the driver's ->update_limits callback if present or call
+ * cpufreq_update_policy() for @cpu.
+ */
+void cpufreq_update_limits(unsigned int cpu)
+{
+	if (cpufreq_driver->update_limits)
+		cpufreq_driver->update_limits(cpu);
+	else
+		cpufreq_update_policy(cpu);
+}
+EXPORT_SYMBOL_GPL(cpufreq_update_limits);
+
 /*********************************************************************
  *               BOOST						     *
  *********************************************************************/
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b599c7318aab4..e2191a570adeb 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -179,6 +179,7 @@ struct vid_data {
  *			based on the MSR_IA32_MISC_ENABLE value and whether or
  *			not the maximum reported turbo P-state is different from
  *			the maximum reported non-turbo one.
+ * @turbo_disabled_s:	Saved @turbo_disabled value.
  * @min_perf_pct:	Minimum capacity limit in percent of the maximum turbo
  *			P-state capacity.
  * @max_perf_pct:	Maximum capacity limit in percent of the maximum turbo
@@ -187,6 +188,7 @@ struct vid_data {
 struct global_params {
 	bool no_turbo;
 	bool turbo_disabled;
+	bool turbo_disabled_s;
 	int max_perf_pct;
 	int min_perf_pct;
 };
@@ -897,6 +899,25 @@ static void intel_pstate_update_policies(void)
 		cpufreq_update_policy(cpu);
 }
 
+static void intel_pstate_update_limits(unsigned int cpu)
+{
+	mutex_lock(&intel_pstate_driver_lock);
+
+	update_turbo_state();
+	/*
+	 * If turbo has been turned on or off globally, policy limits for
+	 * all CPUs need to be updated to reflect that.
+	 */
+	if (global.turbo_disabled_s != global.turbo_disabled) {
+		global.turbo_disabled_s = global.turbo_disabled;
+		intel_pstate_update_policies();
+	} else {
+		cpufreq_update_policy(cpu);
+	}
+
+	mutex_unlock(&intel_pstate_driver_lock);
+}
+
 /************************** sysfs begin ************************/
 #define show_one(file_name, object)					\
 	static ssize_t show_##file_name					\
@@ -2138,6 +2159,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	/* cpuinfo and default policy values */
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	update_turbo_state();
+	global.turbo_disabled_s = global.turbo_disabled;
 	policy->cpuinfo.max_freq = global.turbo_disabled ?
 			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 	policy->cpuinfo.max_freq *= cpu->pstate.scaling;
@@ -2182,6 +2204,7 @@ static struct cpufreq_driver intel_pstate = {
 	.init		= intel_pstate_cpu_init,
 	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_pstate_stop_cpu,
+	.update_limits	= intel_pstate_update_limits,
 	.name		= "intel_pstate",
 };
 
@@ -2316,6 +2339,7 @@ static struct cpufreq_driver intel_cpufreq = {
 	.init		= intel_cpufreq_cpu_init,
 	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_cpufreq_stop_cpu,
+	.update_limits	= intel_pstate_update_limits,
 	.name		= "intel_cpufreq",
 };
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index b160e98076e3b..5005ea40364fc 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -195,6 +195,7 @@ void disable_cpufreq(void);
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
 void cpufreq_update_policy(unsigned int cpu);
+void cpufreq_update_limits(unsigned int cpu);
 bool have_governor_per_policy(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
 void cpufreq_enable_fast_switch(struct cpufreq_policy *policy);
@@ -322,6 +323,9 @@ struct cpufreq_driver {
 	/* should be defined, if possible */
 	unsigned int	(*get)(unsigned int cpu);
 
+	/* Called to update policy limits on firmware notifications. */
+	void		(*update_limits)(unsigned int cpu);
+
 	/* optional */
 	int		(*bios_limit)(int cpu, unsigned int *limit);
 
-- 
GitLab


From 540a375822a40675868c5f62ae57b608a579e56a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 26 Mar 2019 12:16:58 +0100
Subject: [PATCH 0464/1861] cpufreq: Add cpufreq_cpu_acquire() and
 cpufreq_cpu_release()

It sometimes is necessary to find a cpufreq policy for a given CPU
and acquire its rwsem (for writing) immediately after that, so
introduce cpufreq_cpu_acquire() as a helper for that and the
complementary cpufreq_cpu_release().

Make cpufreq_update_policy() use the new functions.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 56 ++++++++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index bb63347f6af14..d8fc395af7737 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -250,6 +250,51 @@ void cpufreq_cpu_put(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 
+/**
+ * cpufreq_cpu_release - Unlock a policy and decrement its usage counter.
+ * @policy: cpufreq policy returned by cpufreq_cpu_acquire().
+ */
+static void cpufreq_cpu_release(struct cpufreq_policy *policy)
+{
+	if (WARN_ON(!policy))
+		return;
+
+	lockdep_assert_held(&policy->rwsem);
+
+	up_write(&policy->rwsem);
+
+	cpufreq_cpu_put(policy);
+}
+
+/**
+ * cpufreq_cpu_acquire - Find policy for a CPU, mark it as busy and lock it.
+ * @cpu: CPU to find the policy for.
+ *
+ * Call cpufreq_cpu_get() to get a reference on the cpufreq policy for @cpu and
+ * if the policy returned by it is not NULL, acquire its rwsem for writing.
+ * Return the policy if it is active or release it and return NULL otherwise.
+ *
+ * The policy returned by this function has to be released with the help of
+ * cpufreq_cpu_release() in order to release its rwsem and balance its usage
+ * counter properly.
+ */
+static struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu)
+{
+	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+	if (!policy)
+		return NULL;
+
+	down_write(&policy->rwsem);
+
+	if (policy_is_inactive(policy)) {
+		cpufreq_cpu_release(policy);
+		return NULL;
+	}
+
+	return policy;
+}
+
 /*********************************************************************
  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
  *********************************************************************/
@@ -2337,17 +2382,12 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
  */
 void cpufreq_update_policy(unsigned int cpu)
 {
-	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+	struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
 	struct cpufreq_policy new_policy;
 
 	if (!policy)
 		return;
 
-	down_write(&policy->rwsem);
-
-	if (policy_is_inactive(policy))
-		goto unlock;
-
 	/*
 	 * BIOS might change freq behind our back
 	 * -> ask driver for current freq and notify governors about a change
@@ -2364,9 +2404,7 @@ void cpufreq_update_policy(unsigned int cpu)
 	cpufreq_set_policy(policy, &new_policy);
 
 unlock:
-	up_write(&policy->rwsem);
-
-	cpufreq_cpu_put(policy);
+	cpufreq_cpu_release(policy);
 }
 EXPORT_SYMBOL(cpufreq_update_policy);
 
-- 
GitLab


From c324f43aed89b33eee37aaf022b32c31a2b3104d Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 28 Mar 2019 14:33:58 +0100
Subject: [PATCH 0465/1861] cpuidle: exynos: Unify target residency for AFTR
 and coupled AFTR states

Since commit 45f1ff59e27c ("cpuidle: Return nohz hint from
cpuidle_select()") Exynos CPUidle driver stopped entering C1 (AFTR) mode
on Exynos4412-based Trats2 board.

Further analysis revealed that the CPUidle framework changed the way
it handles predicted timer ticks and reported target residency for the
given idle states. As a result, the C1 (AFTR) state was not chosen
anymore on completely idle device. The main issue was to high target
residency value. The similar C1 (AFTR) state for 'coupled' CPUidle
version used 10 times lower value for the target residency, despite
the fact that it is the same state from the hardware perspective.

The 100000us value for standard C1 (AFTR) mode is there from the begining
of the support for this idle state, added by the commit 67173ca492ab
("ARM: EXYNOS: Add support AFTR mode on EXYNOS4210"). That commit doesn't
give any reason for it, instead it looks like it was blindly copied from
the WFI/IDLE state of the same driver that time. That time, that value
was probably not really used by the framework for any critical decision,
so it didn't matter that much.

Now it turned out to be an issue, so unify the target residency with the
'coupled' version, as it seems to better match the real use case values
and restores the operation of the Exynos CPUidle driver on the idle
device.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-exynos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c
index 0171a6e190d71..f7199a35cbb65 100644
--- a/drivers/cpuidle/cpuidle-exynos.c
+++ b/drivers/cpuidle/cpuidle-exynos.c
@@ -84,7 +84,7 @@ static struct cpuidle_driver exynos_idle_driver = {
 		[1] = {
 			.enter			= exynos_enter_lowpower,
 			.exit_latency		= 300,
-			.target_residency	= 100000,
+			.target_residency	= 10000,
 			.name			= "C1",
 			.desc			= "ARM power down",
 		},
-- 
GitLab


From 5dd431b6b92c0db324d134d2a4006dd4f87f2261 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 28 Mar 2019 16:53:12 +0100
Subject: [PATCH 0466/1861] net: sched: introduce and use qstats read helpers

Classful qdiscs can't access directly the child qdiscs backlog
length: if such qdisc is NOLOCK, per CPU values should be
accounted instead.

Most qdiscs no not respect the above. As a result, qstats fetching
for most classful qdisc is currently incorrect: if the child qdisc is
NOLOCK, it always reports 0 len backlog.

This change introduces a pair of helpers to safely fetch
both backlog and qlen and use them in stats class dumping
functions, fixing the above issue and cleaning a bit the code.

DRR needs also to access the child qdisc queue length, so it
needs custom handling.

Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 18 ++++++++++++++++++
 net/sched/sch_cbq.c       |  4 +++-
 net/sched/sch_drr.c       |  5 +++--
 net/sched/sch_hfsc.c      |  5 +++--
 net/sched/sch_htb.c       |  7 +++----
 net/sched/sch_mq.c        |  2 +-
 net/sched/sch_mqprio.c    |  3 +--
 net/sched/sch_multiq.c    |  2 +-
 net/sched/sch_prio.c      |  2 +-
 net/sched/sch_qfq.c       |  3 +--
 net/sched/sch_taprio.c    |  2 +-
 11 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7d1a0483a17ba..43e4e17aa938a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -923,6 +923,24 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
 	sch->qstats.overlimits++;
 }
 
+static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch)
+{
+	__u32 qlen = qdisc_qlen_sum(sch);
+
+	return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen);
+}
+
+static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
+					     __u32 *backlog)
+{
+	struct gnet_stats_queue qstats = { 0 };
+	__u32 len = qdisc_qlen_sum(sch);
+
+	__gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
+	*qlen = qstats.qlen;
+	*backlog = qstats.backlog;
+}
+
 static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
 {
 	qh->head = NULL;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4dc05409e3fb2..651879c1b6558 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1358,9 +1358,11 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
+	__u32 qlen;
 
 	cl->xstats.avgidle = cl->avgidle;
 	cl->xstats.undertime = 0;
+	qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
 
 	if (cl->undertime != PSCHED_PASTPERFECT)
 		cl->xstats.undertime = cl->undertime - q->now;
@@ -1368,7 +1370,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
+	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 09b8009910657..8a181591b0ea8 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -269,7 +269,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 				struct gnet_dump *d)
 {
 	struct drr_class *cl = (struct drr_class *)arg;
-	__u32 qlen = cl->qdisc->q.qlen;
+	__u32 qlen = qdisc_qlen_sum(cl->qdisc);
+	struct Qdisc *cl_q = cl->qdisc;
 	struct tc_drr_stats xstats;
 
 	memset(&xstats, 0, sizeof(xstats));
@@ -279,7 +280,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
+	    gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 24cc220a3218a..a946a419d7173 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1328,8 +1328,9 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
 	struct tc_hfsc_stats xstats;
+	__u32 qlen;
 
-	cl->qstats.backlog = cl->qdisc->qstats.backlog;
+	qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog);
 	xstats.level   = cl->level;
 	xstats.period  = cl->cl_vtperiod;
 	xstats.work    = cl->cl_total;
@@ -1337,7 +1338,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
+	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 30f9da7e10763..ed92836f528a9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1127,10 +1127,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 	};
 	__u32 qlen = 0;
 
-	if (!cl->level && cl->leaf.q) {
-		qlen = cl->leaf.q->q.qlen;
-		qs.backlog = cl->leaf.q->qstats.backlog;
-	}
+	if (!cl->level && cl->leaf.q)
+		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
+
 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
 				    INT_MIN, INT_MAX);
 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 203659bc39064..3a3312467692c 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -249,7 +249,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 	sch = dev_queue->qdisc_sleeping;
 	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
 }
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index d364e63c396d7..ea0dc112b38dd 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -561,8 +561,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 		sch = dev_queue->qdisc_sleeping;
 		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 					  d, NULL, &sch->bstats) < 0 ||
-		    gnet_stats_copy_queue(d, NULL,
-					  &sch->qstats, sch->q.qlen) < 0)
+		    qdisc_qstats_copy(d, sch) < 0)
 			return -1;
 	}
 	return 0;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 7410ce4d03213..53c918a113786 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -344,7 +344,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	cl_q = q->queues[cl - 1];
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl_q->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
 	return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 847141cd900f1..dfb06d5bfacc3 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -365,7 +365,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	cl_q = q->queues[cl - 1];
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl_q->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
 	return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 29f5c4a246882..9fbda3ec58618 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -655,8 +655,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL,
-				  &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
+	    qdisc_qstats_copy(d, cl->qdisc) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 206e4dbed12f0..c7041999eb5d3 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -895,7 +895,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 	sch = dev_queue->qdisc_sleeping;
 	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
 }
-- 
GitLab


From e5f0e8f8e456589d56e4955154ed5d468cd6d286 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 28 Mar 2019 16:53:13 +0100
Subject: [PATCH 0467/1861] net: sched: introduce and use qdisc tree
 flush/purge helpers

The same code to flush qdisc tree and purge the qdisc queue
is duplicated in many places and in most cases it does not
respect NOLOCK qdisc: the global backlog len is used and the
per CPU values are ignored.

This change addresses the above, factoring-out the relevant
code and using the helpers introduced by the previous patch
to fetch the correct backlog len.

Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 26 +++++++++++++++++++-------
 net/sched/sch_cbq.c       |  6 +-----
 net/sched/sch_drr.c       | 11 +----------
 net/sched/sch_hfsc.c      | 14 ++------------
 net/sched/sch_htb.c       | 15 +++------------
 net/sched/sch_multiq.c    |  8 +++-----
 net/sched/sch_prio.c      |  8 ++------
 net/sched/sch_qfq.c       | 11 +----------
 net/sched/sch_red.c       |  3 +--
 net/sched/sch_sfb.c       |  3 +--
 net/sched/sch_tbf.c       |  3 +--
 11 files changed, 35 insertions(+), 73 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 43e4e17aa938a..a2b38b3deeca2 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -941,6 +941,23 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
 	*backlog = qstats.backlog;
 }
 
+static inline void qdisc_tree_flush_backlog(struct Qdisc *sch)
+{
+	__u32 qlen, backlog;
+
+	qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
+	qdisc_tree_reduce_backlog(sch, qlen, backlog);
+}
+
+static inline void qdisc_purge_queue(struct Qdisc *sch)
+{
+	__u32 qlen, backlog;
+
+	qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
+	qdisc_reset(sch);
+	qdisc_tree_reduce_backlog(sch, qlen, backlog);
+}
+
 static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
 {
 	qh->head = NULL;
@@ -1124,13 +1141,8 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
 	sch_tree_lock(sch);
 	old = *pold;
 	*pold = new;
-	if (old != NULL) {
-		unsigned int qlen = old->q.qlen;
-		unsigned int backlog = old->qstats.backlog;
-
-		qdisc_reset(old);
-		qdisc_tree_reduce_backlog(old, qlen, backlog);
-	}
+	if (old != NULL)
+		qdisc_tree_flush_backlog(old);
 	sch_tree_unlock(sch);
 
 	return old;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 651879c1b6558..114b9048ea7e3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1667,17 +1667,13 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
-	unsigned int qlen, backlog;
 
 	if (cl->filters || cl->children || cl == &q->link)
 		return -EBUSY;
 
 	sch_tree_lock(sch);
 
-	qlen = cl->q->q.qlen;
-	backlog = cl->q->qstats.backlog;
-	qdisc_reset(cl->q);
-	qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
+	qdisc_purge_queue(cl->q);
 
 	if (cl->next_alive)
 		cbq_deactivate_class(cl);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8a181591b0ea8..430df9a55ec4e 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -50,15 +50,6 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
 	return container_of(clc, struct drr_class, common);
 }
 
-static void drr_purge_queue(struct drr_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
 static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
 	[TCA_DRR_QUANTUM]	= { .type = NLA_U32 },
 };
@@ -167,7 +158,7 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
 
 	sch_tree_lock(sch);
 
-	drr_purge_queue(cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index a946a419d7173..d2ab463f22ae8 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -844,16 +844,6 @@ qdisc_peek_len(struct Qdisc *sch)
 	return len;
 }
 
-static void
-hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
 static void
 hfsc_adjust_levels(struct hfsc_class *cl)
 {
@@ -1076,7 +1066,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
 	list_add_tail(&cl->siblings, &parent->children);
 	if (parent->level == 0)
-		hfsc_purge_queue(sch, parent);
+		qdisc_purge_queue(parent->qdisc);
 	hfsc_adjust_levels(parent);
 	sch_tree_unlock(sch);
 
@@ -1112,7 +1102,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
 	list_del(&cl->siblings);
 	hfsc_adjust_levels(cl->cl_parent);
 
-	hfsc_purge_queue(sch, cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
 
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ed92836f528a9..2f9883b196e8e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1269,13 +1269,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 	sch_tree_lock(sch);
 
-	if (!cl->level) {
-		unsigned int qlen = cl->leaf.q->q.qlen;
-		unsigned int backlog = cl->leaf.q->qstats.backlog;
-
-		qdisc_reset(cl->leaf.q);
-		qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog);
-	}
+	if (!cl->level)
+		qdisc_purge_queue(cl->leaf.q);
 
 	/* delete from hash and active; remainder in destroy_class */
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
@@ -1403,12 +1398,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 					  classid, NULL);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
-			unsigned int qlen = parent->leaf.q->q.qlen;
-			unsigned int backlog = parent->leaf.q->qstats.backlog;
-
 			/* turn parent into inner node */
-			qdisc_reset(parent->leaf.q);
-			qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog);
+			qdisc_purge_queue(parent->leaf.q);
 			qdisc_put(parent->leaf.q);
 			if (parent->prio_activity)
 				htb_deactivate(q, parent);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 53c918a113786..35b03ae08e0f1 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -201,9 +201,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 	for (i = q->bands; i < q->max_bands; i++) {
 		if (q->queues[i] != &noop_qdisc) {
 			struct Qdisc *child = q->queues[i];
+
 			q->queues[i] = &noop_qdisc;
-			qdisc_tree_reduce_backlog(child, child->q.qlen,
-						  child->qstats.backlog);
+			qdisc_tree_flush_backlog(child);
 			qdisc_put(child);
 		}
 	}
@@ -225,9 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 					qdisc_hash_add(child, true);
 
 				if (old != &noop_qdisc) {
-					qdisc_tree_reduce_backlog(old,
-								  old->q.qlen,
-								  old->qstats.backlog);
+					qdisc_tree_flush_backlog(old);
 					qdisc_put(old);
 				}
 				sch_tree_unlock(sch);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index dfb06d5bfacc3..d519b21535b36 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -216,12 +216,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-	for (i = q->bands; i < oldbands; i++) {
-		struct Qdisc *child = q->queues[i];
-
-		qdisc_tree_reduce_backlog(child, child->q.qlen,
-					  child->qstats.backlog);
-	}
+	for (i = q->bands; i < oldbands; i++)
+		qdisc_tree_flush_backlog(q->queues[i]);
 
 	for (i = oldbands; i < q->bands; i++) {
 		q->queues[i] = queues[i];
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 9fbda3ec58618..1589364b54da1 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -217,15 +217,6 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
 	return container_of(clc, struct qfq_class, common);
 }
 
-static void qfq_purge_queue(struct qfq_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
 static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
 	[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
 	[TCA_QFQ_LMAX] = { .type = NLA_U32 },
@@ -551,7 +542,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
 
 	sch_tree_lock(sch);
 
-	qfq_purge_queue(cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9df9942340eaa..4e8c0abf61945 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -233,8 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
 	q->flags = ctl->flags;
 	q->limit = ctl->limit;
 	if (child) {
-		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-					  q->qdisc->qstats.backlog);
+		qdisc_tree_flush_backlog(q->qdisc);
 		old_child = q->qdisc;
 		q->qdisc = child;
 	}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index bab506b01a329..2419fdb759667 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -521,8 +521,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
 		qdisc_hash_add(child, true);
 	sch_tree_lock(sch);
 
-	qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-				  q->qdisc->qstats.backlog);
+	qdisc_tree_flush_backlog(q->qdisc);
 	qdisc_put(q->qdisc);
 	q->qdisc = child;
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7f272a9070c57..f71578dbb9e39 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -391,8 +391,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
 
 	sch_tree_lock(sch);
 	if (child) {
-		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-					  q->qdisc->qstats.backlog);
+		qdisc_tree_flush_backlog(q->qdisc);
 		qdisc_put(q->qdisc);
 		q->qdisc = child;
 	}
-- 
GitLab


From 74a1dd86d1739eae2015a3832f62c1d6546893a7 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 25 Mar 2019 10:24:56 -0700
Subject: [PATCH 0468/1861] PM / wakeup: Use pm_pr_dbg() instead of pr_debug()

These prints are useful if we're doing PM suspend debugging. Having them
at pr_debug() level means that we need to either enable DEBUG in this
file, or compile the kernel with dynamic debug capabilities. Both of
these options have drawbacks like custom compilation or opting into all
debug statements being included into the kernel image. Given that we
already have infrastructure to collect PM debugging information with
CONFIG_PM_DEBUG and friends, let's change the pr_debug usage here to be
pm_pr_dbg() instead so we can collect the wakeup information in the
kernel logs.

Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index bb1ae175fae13..23c243a4c6754 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -804,7 +804,7 @@ void pm_print_active_wakeup_sources(void)
 	srcuidx = srcu_read_lock(&wakeup_srcu);
 	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
 		if (ws->active) {
-			pr_debug("active wakeup source: %s\n", ws->name);
+			pm_pr_dbg("active wakeup source: %s\n", ws->name);
 			active = 1;
 		} else if (!active &&
 			   (!last_activity_ws ||
@@ -815,7 +815,7 @@ void pm_print_active_wakeup_sources(void)
 	}
 
 	if (!active && last_activity_ws)
-		pr_debug("last active wakeup source: %s\n",
+		pm_pr_dbg("last active wakeup source: %s\n",
 			last_activity_ws->name);
 	srcu_read_unlock(&wakeup_srcu, srcuidx);
 }
@@ -845,7 +845,7 @@ bool pm_wakeup_pending(void)
 	raw_spin_unlock_irqrestore(&events_lock, flags);
 
 	if (ret) {
-		pr_debug("Wakeup pending, aborting suspend\n");
+		pm_pr_dbg("Wakeup pending, aborting suspend\n");
 		pm_print_active_wakeup_sources();
 	}
 
-- 
GitLab


From 3c446e6f96997f2a95bf0037ef463802162d2323 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 29 Mar 2019 12:19:46 +0100
Subject: [PATCH 0469/1861] kcm: switch order of device registration to fix a
 crash

When kcm is loaded while many processes try to create a KCM socket, a
crash occurs:
 BUG: unable to handle kernel NULL pointer dereference at 000000000000000e
 IP: mutex_lock+0x27/0x40 kernel/locking/mutex.c:240
 PGD 8000000016ef2067 P4D 8000000016ef2067 PUD 3d6e9067 PMD 0
 Oops: 0002 [#1] SMP KASAN PTI
 CPU: 0 PID: 7005 Comm: syz-executor.5 Not tainted 4.12.14-396-default #1 SLE15-SP1 (unreleased)
 RIP: 0010:mutex_lock+0x27/0x40 kernel/locking/mutex.c:240
 RSP: 0018:ffff88000d487a00 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: 000000000000000e RCX: 1ffff100082b0719
 ...
 CR2: 000000000000000e CR3: 000000004b1bc003 CR4: 0000000000060ef0
 Call Trace:
  kcm_create+0x600/0xbf0 [kcm]
  __sock_create+0x324/0x750 net/socket.c:1272
 ...

This is due to race between sock_create and unfinished
register_pernet_device. kcm_create tries to do "net_generic(net,
kcm_net_id)". but kcm_net_id is not initialized yet.

So switch the order of the two to close the race.

This can be reproduced with mutiple processes doing socket(PF_KCM, ...)
and one process doing module removal.

Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index c5c5ab6c5a1cc..44fdc641710db 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -2054,14 +2054,14 @@ static int __init kcm_init(void)
 	if (err)
 		goto fail;
 
-	err = sock_register(&kcm_family_ops);
-	if (err)
-		goto sock_register_fail;
-
 	err = register_pernet_device(&kcm_net_ops);
 	if (err)
 		goto net_ops_fail;
 
+	err = sock_register(&kcm_family_ops);
+	if (err)
+		goto sock_register_fail;
+
 	err = kcm_proc_init();
 	if (err)
 		goto proc_init_fail;
@@ -2069,12 +2069,12 @@ static int __init kcm_init(void)
 	return 0;
 
 proc_init_fail:
-	unregister_pernet_device(&kcm_net_ops);
-
-net_ops_fail:
 	sock_unregister(PF_KCM);
 
 sock_register_fail:
+	unregister_pernet_device(&kcm_net_ops);
+
+net_ops_fail:
 	proto_unregister(&kcm_proto);
 
 fail:
@@ -2090,8 +2090,8 @@ fail:
 static void __exit kcm_exit(void)
 {
 	kcm_proc_exit();
-	unregister_pernet_device(&kcm_net_ops);
 	sock_unregister(PF_KCM);
+	unregister_pernet_device(&kcm_net_ops);
 	proto_unregister(&kcm_proto);
 	destroy_workqueue(kcm_wq);
 
-- 
GitLab


From f7ee799a51ddbcc205ef615fe424fb5084e9e0aa Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Fri, 29 Mar 2019 19:04:43 -0700
Subject: [PATCH 0470/1861] nfp: flower: replace CFI with vlan present

Replace vlan CFI bit with a vlan present bit that indicates the
presence of a vlan tag. Previously the driver incorrectly assumed
that an vlan id of 0 is not matchable, therefore we indicate vlan
presence with a vlan present bit.

Fixes: 5571e8c9f241 ("nfp: extend flower matching capabilities")
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/flower/cmsg.h  |  2 +-
 .../net/ethernet/netronome/nfp/flower/match.c | 27 +++++++++----------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 4fcaf11ed56ed..08ed777b86d2c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -26,7 +26,7 @@
 #define NFP_FLOWER_LAYER2_GENEVE_OP	BIT(6)
 
 #define NFP_FLOWER_MASK_VLAN_PRIO	GENMASK(15, 13)
-#define NFP_FLOWER_MASK_VLAN_CFI	BIT(12)
+#define NFP_FLOWER_MASK_VLAN_PRESENT	BIT(12)
 #define NFP_FLOWER_MASK_VLAN_VID	GENMASK(11, 0)
 
 #define NFP_FLOWER_MASK_MPLS_LB		GENMASK(31, 12)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index e03c8ef2c28c5..9b8b843d03403 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -30,20 +30,19 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
 
 		flow_rule_match_vlan(rule, &match);
 		/* Populate the tci field. */
-		if (match.key->vlan_id || match.key->vlan_priority) {
-			tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
-					     match.key->vlan_priority) |
-				  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
-					     match.key->vlan_id) |
-				  NFP_FLOWER_MASK_VLAN_CFI;
-			ext->tci = cpu_to_be16(tmp_tci);
-			tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
-					     match.mask->vlan_priority) |
-				  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
-					     match.mask->vlan_id) |
-				  NFP_FLOWER_MASK_VLAN_CFI;
-			msk->tci = cpu_to_be16(tmp_tci);
-		}
+		tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+		tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+				      match.key->vlan_priority) |
+			   FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+				      match.key->vlan_id);
+		ext->tci = cpu_to_be16(tmp_tci);
+
+		tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+		tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+				      match.mask->vlan_priority) |
+			   FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+				      match.mask->vlan_id);
+		msk->tci = cpu_to_be16(tmp_tci);
 	}
 }
 
-- 
GitLab


From 42cd5484a22f1a1b947e21e2af65fa7dab09d017 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Fri, 29 Mar 2019 19:04:44 -0700
Subject: [PATCH 0471/1861] nfp: flower: remove vlan CFI bit from push vlan
 action

We no longer set CFI when pushing vlan tags, therefore we remove
the CFI bit from push vlan.

Fixes: 1a1e586f54bf ("nfp: add basic action capabilities to flower offloads")
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/action.c | 3 +--
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h   | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index eeda4ed98333a..e336f6ee94f5c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -48,8 +48,7 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
 
 	tmp_push_vlan_tci =
 		FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, act->vlan.prio) |
-		FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid) |
-		NFP_FL_PUSH_VLAN_CFI;
+		FIELD_PREP(NFP_FL_PUSH_VLAN_VID, act->vlan.vid);
 	push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 08ed777b86d2c..0ed51e79db00e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -82,7 +82,6 @@
 #define NFP_FL_OUT_FLAGS_TYPE_IDX	GENMASK(2, 0)
 
 #define NFP_FL_PUSH_VLAN_PRIO		GENMASK(15, 13)
-#define NFP_FL_PUSH_VLAN_CFI		BIT(12)
 #define NFP_FL_PUSH_VLAN_VID		GENMASK(11, 0)
 
 #define IPV6_FLOW_LABEL_MASK		cpu_to_be32(0x000fffff)
-- 
GitLab


From 09279e615c81ce55e04835970601ae286e3facbe Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 31 Mar 2019 16:58:15 +0800
Subject: [PATCH 0472/1861] sctp: initialize _pad of sockaddr_in before copying
 to user memory

Syzbot report a kernel-infoleak:

  BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32
  Call Trace:
    _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32
    copy_to_user include/linux/uaccess.h:174 [inline]
    sctp_getsockopt_peer_addrs net/sctp/socket.c:5911 [inline]
    sctp_getsockopt+0x1668e/0x17f70 net/sctp/socket.c:7562
    ...
  Uninit was stored to memory at:
    sctp_transport_init net/sctp/transport.c:61 [inline]
    sctp_transport_new+0x16d/0x9a0 net/sctp/transport.c:115
    sctp_assoc_add_peer+0x532/0x1f70 net/sctp/associola.c:637
    sctp_process_param net/sctp/sm_make_chunk.c:2548 [inline]
    sctp_process_init+0x1a1b/0x3ed0 net/sctp/sm_make_chunk.c:2361
    ...
  Bytes 8-15 of 16 are uninitialized

It was caused by that th _pad field (the 8-15 bytes) of a v4 addr (saved in
struct sockaddr_in) wasn't initialized, but directly copied to user memory
in sctp_getsockopt_peer_addrs().

So fix it by calling memset(addr->v4.sin_zero, 0, 8) to initialize _pad of
sockaddr_in before copying it to user memory in sctp_v4_addr_to_user(), as
sctp_v6_addr_to_user() does.

Reported-by: syzbot+86b5c7c236a22616a72f@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Alexander Potapenko <glider@google.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6abc8b2742707..951afdeea5e92 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -600,6 +600,7 @@ out:
 static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
 {
 	/* No address mapping for V4 sockets */
+	memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
 	return sizeof(struct sockaddr_in);
 }
 
-- 
GitLab


From 1d3ff0950e2b40dc861b1739029649d03f591820 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 1 Apr 2019 09:35:54 +0800
Subject: [PATCH 0473/1861] dccp: Fix memleak in __feat_register_sp

If dccp_feat_push_change fails, we forget free the mem
which is alloced by kmemdup in dccp_feat_clone_sp_val.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: e8ef967a54f4 ("dccp: Registration routines for changing feature values")
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/feat.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index f227f002c73d3..db87d9f580198 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
 	if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
 		return -ENOMEM;
 
-	return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
+	if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) {
+		kfree(fval.sp.vec);
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 /**
-- 
GitLab


From 2d85978341e6a32e7443d9f28639da254d53f400 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 28 Mar 2019 17:31:30 +0300
Subject: [PATCH 0474/1861] drm/mediatek: Fix an error code in
 mtk_hdmi_dt_parse_pdata()

We don't want to overwrite "ret", it already holds the correct error
code.  The "regmap" variable might be a valid pointer as this point.

Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_hdmi.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 915cc84621aea..543a25e5765e4 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1480,7 +1480,6 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
 	if (IS_ERR(regmap))
 		ret = PTR_ERR(regmap);
 	if (ret) {
-		ret = PTR_ERR(regmap);
 		dev_err(dev,
 			"Failed to get system configuration registers: %d\n",
 			ret);
-- 
GitLab


From 20bb907f7dc82ecc9e135ad7067ac7eb69c81222 Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas@kemnade.info>
Date: Sat, 23 Feb 2019 12:47:54 +0100
Subject: [PATCH 0475/1861] mfd: twl-core: Disable IRQ while suspended

Since commit 6e2bd956936 ("i2c: omap: Use noirq system sleep pm ops to idle device for suspend")
on gta04 we have handle_twl4030_pih() called in situations where pm_runtime_get()
in i2c-omap.c returns -EACCES.

[   86.474365] Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
[   86.485473] printk: Suspending console(s) (use no_console_suspend to debug)
[   86.555572] Disabling non-boot CPUs ...
[   86.555664] Successfully put all powerdomains to target state
[   86.563720] twl: Read failed (mod 1, reg 0x01 count 1)
[   86.563751] twl4030: I2C error -13 reading PIH ISR
[   86.563812] twl: Read failed (mod 1, reg 0x01 count 1)
[   86.563812] twl4030: I2C error -13 reading PIH ISR
[   86.563873] twl: Read failed (mod 1, reg 0x01 count 1)
[   86.563903] twl4030: I2C error -13 reading PIH ISR

This happens when we wakeup via something behing twl4030 (powerbutton or rtc
alarm). This goes on for minutes until the system is finally resumed.
Disable the irq on suspend and enable it on resume to avoid
having i2c access problems when the irq registers are checked.

Fixes: 6e2bd956936 ("i2c: omap: Use noirq system sleep pm ops to idle device for suspend")
Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/twl-core.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 299016bc46d90..104477b512a29 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -1245,6 +1245,28 @@ free:
 	return status;
 }
 
+static int __maybe_unused twl_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	if (client->irq)
+		disable_irq(client->irq);
+
+	return 0;
+}
+
+static int __maybe_unused twl_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	if (client->irq)
+		enable_irq(client->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(twl_dev_pm_ops, twl_suspend, twl_resume);
+
 static const struct i2c_device_id twl_ids[] = {
 	{ "twl4030", TWL4030_VAUX2 },	/* "Triton 2" */
 	{ "twl5030", 0 },		/* T2 updated */
@@ -1262,6 +1284,7 @@ static const struct i2c_device_id twl_ids[] = {
 /* One Client Driver , 4 Clients */
 static struct i2c_driver twl_driver = {
 	.driver.name	= DRIVER_NAME,
+	.driver.pm	= &twl_dev_pm_ops,
 	.id_table	= twl_ids,
 	.probe		= twl_probe,
 	.remove		= twl_remove,
-- 
GitLab


From 1d71670e5e09680202c975c2332bcd2b64e8091f Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Mon, 18 Mar 2019 11:26:51 +0800
Subject: [PATCH 0476/1861] mfd: sc27xx: Use SoC compatible string for PMIC
 devices

We should use SoC compatible string in stead of wildcard string for
PMIC child devices.

Fixes: 0419a75b1808 (arm64: dts: sprd: Remove wildcard compatible string)
Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sprd-sc27xx-spi.c | 42 +++++++++++++++++------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c
index 69df27769c213..43ac71691fe47 100644
--- a/drivers/mfd/sprd-sc27xx-spi.c
+++ b/drivers/mfd/sprd-sc27xx-spi.c
@@ -53,67 +53,67 @@ static const struct sprd_pmic_data sc2731_data = {
 static const struct mfd_cell sprd_pmic_devs[] = {
 	{
 		.name = "sc27xx-wdt",
-		.of_compatible = "sprd,sc27xx-wdt",
+		.of_compatible = "sprd,sc2731-wdt",
 	}, {
 		.name = "sc27xx-rtc",
-		.of_compatible = "sprd,sc27xx-rtc",
+		.of_compatible = "sprd,sc2731-rtc",
 	}, {
 		.name = "sc27xx-charger",
-		.of_compatible = "sprd,sc27xx-charger",
+		.of_compatible = "sprd,sc2731-charger",
 	}, {
 		.name = "sc27xx-chg-timer",
-		.of_compatible = "sprd,sc27xx-chg-timer",
+		.of_compatible = "sprd,sc2731-chg-timer",
 	}, {
 		.name = "sc27xx-fast-chg",
-		.of_compatible = "sprd,sc27xx-fast-chg",
+		.of_compatible = "sprd,sc2731-fast-chg",
 	}, {
 		.name = "sc27xx-chg-wdt",
-		.of_compatible = "sprd,sc27xx-chg-wdt",
+		.of_compatible = "sprd,sc2731-chg-wdt",
 	}, {
 		.name = "sc27xx-typec",
-		.of_compatible = "sprd,sc27xx-typec",
+		.of_compatible = "sprd,sc2731-typec",
 	}, {
 		.name = "sc27xx-flash",
-		.of_compatible = "sprd,sc27xx-flash",
+		.of_compatible = "sprd,sc2731-flash",
 	}, {
 		.name = "sc27xx-eic",
-		.of_compatible = "sprd,sc27xx-eic",
+		.of_compatible = "sprd,sc2731-eic",
 	}, {
 		.name = "sc27xx-efuse",
-		.of_compatible = "sprd,sc27xx-efuse",
+		.of_compatible = "sprd,sc2731-efuse",
 	}, {
 		.name = "sc27xx-thermal",
-		.of_compatible = "sprd,sc27xx-thermal",
+		.of_compatible = "sprd,sc2731-thermal",
 	}, {
 		.name = "sc27xx-adc",
-		.of_compatible = "sprd,sc27xx-adc",
+		.of_compatible = "sprd,sc2731-adc",
 	}, {
 		.name = "sc27xx-audio-codec",
-		.of_compatible = "sprd,sc27xx-audio-codec",
+		.of_compatible = "sprd,sc2731-audio-codec",
 	}, {
 		.name = "sc27xx-regulator",
-		.of_compatible = "sprd,sc27xx-regulator",
+		.of_compatible = "sprd,sc2731-regulator",
 	}, {
 		.name = "sc27xx-vibrator",
-		.of_compatible = "sprd,sc27xx-vibrator",
+		.of_compatible = "sprd,sc2731-vibrator",
 	}, {
 		.name = "sc27xx-keypad-led",
-		.of_compatible = "sprd,sc27xx-keypad-led",
+		.of_compatible = "sprd,sc2731-keypad-led",
 	}, {
 		.name = "sc27xx-bltc",
-		.of_compatible = "sprd,sc27xx-bltc",
+		.of_compatible = "sprd,sc2731-bltc",
 	}, {
 		.name = "sc27xx-fgu",
-		.of_compatible = "sprd,sc27xx-fgu",
+		.of_compatible = "sprd,sc2731-fgu",
 	}, {
 		.name = "sc27xx-7sreset",
-		.of_compatible = "sprd,sc27xx-7sreset",
+		.of_compatible = "sprd,sc2731-7sreset",
 	}, {
 		.name = "sc27xx-poweroff",
-		.of_compatible = "sprd,sc27xx-poweroff",
+		.of_compatible = "sprd,sc2731-poweroff",
 	}, {
 		.name = "sc27xx-syscon",
-		.of_compatible = "sprd,sc27xx-syscon",
+		.of_compatible = "sprd,sc2731-syscon",
 	},
 };
 
-- 
GitLab


From 6246f283d5e02ac757bd8d9bacde8fdc54c4582d Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 1 Apr 2019 15:03:54 +0200
Subject: [PATCH 0477/1861] ASoC: dpcm: skip missing substream while applying
 symmetry

If for any reason, the backend does not have the requested substream
(like capture on a playback only backend), the BE will be skipped in
dpcm_be_dai_startup().

However, dpcm_apply_symmetry() does not skip those BE and will
dereference the be_substream (NULL) pointer anyway.

Like in dpcm_be_dai_startup(), just skip those BE.

Fixes: 906c7d690c3b ("ASoC: dpcm: Apply symmetry for DPCM")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-pcm.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 7fe5321000e87..2d5d5cac4ba60 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1911,10 +1911,15 @@ static int dpcm_apply_symmetry(struct snd_pcm_substream *fe_substream,
 		struct snd_soc_pcm_runtime *be = dpcm->be;
 		struct snd_pcm_substream *be_substream =
 			snd_soc_dpcm_get_substream(be, stream);
-		struct snd_soc_pcm_runtime *rtd = be_substream->private_data;
+		struct snd_soc_pcm_runtime *rtd;
 		struct snd_soc_dai *codec_dai;
 		int i;
 
+		/* A backend may not have the requested substream */
+		if (!be_substream)
+			continue;
+
+		rtd = be_substream->private_data;
 		if (rtd->dai_link->be_hw_params_fixup)
 			continue;
 
-- 
GitLab


From 6e3bfcff191ec9476ca5ef9b2ad85a15ba829374 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Fri, 1 Mar 2019 19:08:53 -0600
Subject: [PATCH 0478/1861] ASoC: dapm: set power_check callback for widgets
 that shouldnt be always on

Currently, buffers, schedulers, src's, encoders, decoders
and effect type dapm widgets remain always on as their
power_check method is not set. Setting this callback allows these
widgets in the audio path to be powered managed properly.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-dapm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 67b032ca1601f..0382a47b30bd8 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3650,6 +3650,13 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm,
 	case snd_soc_dapm_dac:
 	case snd_soc_dapm_aif_in:
 	case snd_soc_dapm_pga:
+	case snd_soc_dapm_buffer:
+	case snd_soc_dapm_scheduler:
+	case snd_soc_dapm_effect:
+	case snd_soc_dapm_src:
+	case snd_soc_dapm_asrc:
+	case snd_soc_dapm_encoder:
+	case snd_soc_dapm_decoder:
 	case snd_soc_dapm_out_drv:
 	case snd_soc_dapm_micbias:
 	case snd_soc_dapm_line:
-- 
GitLab


From 8742dc86d0c7a9628117a989c11f04a9b6b898f3 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 26 Feb 2019 07:04:50 +0100
Subject: [PATCH 0479/1861] xfrm4: Fix uninitialized memory read in
 _decode_session4

We currently don't reload pointers pointing into skb header
after doing pskb_may_pull() in _decode_session4(). So in case
pskb_may_pull() changed the pointers, we read from random
memory. Fix this by putting all the needed infos on the
stack, so that we don't need to access the header pointers
after doing pskb_may_pull().

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_policy.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d73a6d6652f60..2b144b92ae46a 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -111,7 +111,8 @@ static void
 _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
+	int ihl = iph->ihl;
+	u8 *xprth = skb_network_header(skb) + ihl * 4;
 	struct flowi4 *fl4 = &fl->u.ip4;
 	int oif = 0;
 
@@ -122,6 +123,11 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl4->flowi4_mark = skb->mark;
 	fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
 
+	fl4->flowi4_proto = iph->protocol;
+	fl4->daddr = reverse ? iph->saddr : iph->daddr;
+	fl4->saddr = reverse ? iph->daddr : iph->saddr;
+	fl4->flowi4_tos = iph->tos;
+
 	if (!ip_is_fragment(iph)) {
 		switch (iph->protocol) {
 		case IPPROTO_UDP:
@@ -133,7 +139,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
 				__be16 *ports;
 
-				xprth = skb_network_header(skb) + iph->ihl * 4;
+				xprth = skb_network_header(skb) + ihl * 4;
 				ports = (__be16 *)xprth;
 
 				fl4->fl4_sport = ports[!!reverse];
@@ -146,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 			    pskb_may_pull(skb, xprth + 2 - skb->data)) {
 				u8 *icmp;
 
-				xprth = skb_network_header(skb) + iph->ihl * 4;
+				xprth = skb_network_header(skb) + ihl * 4;
 				icmp = xprth;
 
 				fl4->fl4_icmp_type = icmp[0];
@@ -159,7 +165,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
 				__be32 *ehdr;
 
-				xprth = skb_network_header(skb) + iph->ihl * 4;
+				xprth = skb_network_header(skb) + ihl * 4;
 				ehdr = (__be32 *)xprth;
 
 				fl4->fl4_ipsec_spi = ehdr[0];
@@ -171,7 +177,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 			    pskb_may_pull(skb, xprth + 8 - skb->data)) {
 				__be32 *ah_hdr;
 
-				xprth = skb_network_header(skb) + iph->ihl * 4;
+				xprth = skb_network_header(skb) + ihl * 4;
 				ah_hdr = (__be32 *)xprth;
 
 				fl4->fl4_ipsec_spi = ah_hdr[1];
@@ -183,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
 				__be16 *ipcomp_hdr;
 
-				xprth = skb_network_header(skb) + iph->ihl * 4;
+				xprth = skb_network_header(skb) + ihl * 4;
 				ipcomp_hdr = (__be16 *)xprth;
 
 				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
@@ -196,7 +202,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 				__be16 *greflags;
 				__be32 *gre_hdr;
 
-				xprth = skb_network_header(skb) + iph->ihl * 4;
+				xprth = skb_network_header(skb) + ihl * 4;
 				greflags = (__be16 *)xprth;
 				gre_hdr = (__be32 *)xprth;
 
@@ -213,10 +219,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 			break;
 		}
 	}
-	fl4->flowi4_proto = iph->protocol;
-	fl4->daddr = reverse ? iph->saddr : iph->daddr;
-	fl4->saddr = reverse ? iph->daddr : iph->saddr;
-	fl4->flowi4_tos = iph->tos;
 }
 
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
-- 
GitLab


From ea5c7eba216e832906e594799b8670f1954a588c Mon Sep 17 00:00:00 2001
From: Jian-Hong Pan <jian-hong@endlessm.com>
Date: Mon, 1 Apr 2019 11:25:05 +0800
Subject: [PATCH 0480/1861] ALSA: hda/realtek: Enable headset MIC of Acer
 TravelMate B114-21 with ALC233

The Acer TravelMate B114-21 laptop cannot detect and record sound from
headset MIC.  This patch adds the ALC233_FIXUP_ACER_HEADSET_MIC HDA verb
quirk chained with ALC233_FIXUP_ASUS_MIC_NO_PRESENCE pin quirk to fix
this issue.

[ fixed the missing brace and reordered the entry -- tiwai ]

Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Reviewed-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a3fb3d4c57309..715ab2342151e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5663,6 +5663,7 @@ enum {
 	ALC233_FIXUP_ASUS_MIC_NO_PRESENCE,
 	ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE,
 	ALC233_FIXUP_LENOVO_MULTI_CODECS,
+	ALC233_FIXUP_ACER_HEADSET_MIC,
 	ALC294_FIXUP_LENOVO_MIC_LOCATION,
 	ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE,
 	ALC700_FIXUP_INTEL_REFERENCE,
@@ -6490,6 +6491,16 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc233_alc662_fixup_lenovo_dual_codecs,
 	},
+	[ALC233_FIXUP_ACER_HEADSET_MIC] = {
+		.type = HDA_FIXUP_VERBS,
+		.v.verbs = (const struct hda_verb[]) {
+			{ 0x20, AC_VERB_SET_COEF_INDEX, 0x45 },
+			{ 0x20, AC_VERB_SET_PROC_COEF, 0x5089 },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC233_FIXUP_ASUS_MIC_NO_PRESENCE
+	},
 	[ALC294_FIXUP_LENOVO_MIC_LOCATION] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -6737,6 +6748,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+	SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
 	SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
-- 
GitLab


From b5bdbb6ccd1117896bf4fba6bff75336ca423e8c Mon Sep 17 00:00:00 2001
From: Daniel Mentz <danielmentz@google.com>
Date: Fri, 29 Mar 2019 15:48:54 -0700
Subject: [PATCH 0481/1861] ALSA: uapi: #include <time.h> in asound.h

The uapi header asound.h defines types based on struct timespec. We need
to #include <time.h> to get access to the definition of this struct.

Previously, we encountered the following error message when building
applications with a clang/bionic toolchain:

kernel-headers/sound/asound.h:350:19: error: field has incomplete type 'struct timespec'
  struct timespec trigger_tstamp;
                  ^

The absence of the time.h #include statement does not cause build errors
with glibc, because its version of stdlib.h indirectly includes time.h.

Signed-off-by: Daniel Mentz <danielmentz@google.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/asound.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 404d4b9ffe764..df1153cea0b7e 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -32,6 +32,7 @@
 
 #ifndef __KERNEL__
 #include <stdlib.h>
+#include <time.h>
 #endif
 
 /*
-- 
GitLab


From b5dee3130bb4014511f5d0dd46855ed843e3fdc8 Mon Sep 17 00:00:00 2001
From: Harry Pan <harry.pan@intel.com>
Date: Mon, 25 Feb 2019 20:36:41 +0800
Subject: [PATCH 0482/1861] PM / sleep: Refactor filesystems sync to reduce
 duplication

Create a common helper to sync filesystems for system suspend and
hibernation.

Signed-off-by: Harry Pan <harry.pan@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/suspend.h  |  3 +++
 kernel/power/hibernate.c |  5 +----
 kernel/power/main.c      |  9 +++++++++
 kernel/power/suspend.c   | 13 +++++--------
 kernel/power/user.c      |  5 +----
 5 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 3f529ad9a9d2e..6b3ea9ea6a9e0 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -425,6 +425,7 @@ void restore_processor_state(void);
 /* kernel/power/main.c */
 extern int register_pm_notifier(struct notifier_block *nb);
 extern int unregister_pm_notifier(struct notifier_block *nb);
+extern void ksys_sync_helper(void);
 
 #define pm_notifier(fn, pri) {				\
 	static struct notifier_block fn##_nb =			\
@@ -462,6 +463,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 	return 0;
 }
 
+static inline void ksys_sync_helper(void) {}
+
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index abef759de7c8f..cc105ecd9c07e 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -14,7 +14,6 @@
 
 #include <linux/export.h>
 #include <linux/suspend.h>
-#include <linux/syscalls.h>
 #include <linux/reboot.h>
 #include <linux/string.h>
 #include <linux/device.h>
@@ -709,9 +708,7 @@ int hibernate(void)
 		goto Exit;
 	}
 
-	pr_info("Syncing filesystems ... \n");
-	ksys_sync();
-	pr_info("done.\n");
+	ksys_sync_helper();
 
 	error = freeze_processes();
 	if (error)
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 98e76cad128b8..40472a7c55366 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -16,6 +16,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/suspend.h>
+#include <linux/syscalls.h>
 
 #include "power.h"
 
@@ -51,6 +52,14 @@ void unlock_system_sleep(void)
 }
 EXPORT_SYMBOL_GPL(unlock_system_sleep);
 
+void ksys_sync_helper(void)
+{
+	pr_info("Syncing filesystems ... ");
+	ksys_sync();
+	pr_cont("done.\n");
+}
+EXPORT_SYMBOL_GPL(ksys_sync_helper);
+
 /* Routines for PM-transition notifications */
 
 static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 0bd595a0b6103..e39059dea38b4 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -17,7 +17,6 @@
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
-#include <linux/syscalls.h>
 #include <linux/gfp.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -568,13 +567,11 @@ static int enter_state(suspend_state_t state)
 	if (state == PM_SUSPEND_TO_IDLE)
 		s2idle_begin();
 
-#ifndef CONFIG_SUSPEND_SKIP_SYNC
-	trace_suspend_resume(TPS("sync_filesystems"), 0, true);
-	pr_info("Syncing filesystems ... ");
-	ksys_sync();
-	pr_cont("done.\n");
-	trace_suspend_resume(TPS("sync_filesystems"), 0, false);
-#endif
+	if (!IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC)) {
+		trace_suspend_resume(TPS("sync_filesystems"), 0, true);
+		ksys_sync_helper();
+		trace_suspend_resume(TPS("sync_filesystems"), 0, false);
+	}
 
 	pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
 	pm_suspend_clear_flags();
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 2d8b60a3c86b9..cb24e840a3e6b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/suspend.h>
-#include <linux/syscalls.h>
 #include <linux/reboot.h>
 #include <linux/string.h>
 #include <linux/device.h>
@@ -228,9 +227,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		if (data->frozen)
 			break;
 
-		printk("Syncing filesystems ... ");
-		ksys_sync();
-		printk("done.\n");
+		ksys_sync_helper();
 
 		error = freeze_processes();
 		if (error)
-- 
GitLab


From c64546b17bc940643545dd34eac21f51764d633c Mon Sep 17 00:00:00 2001
From: Harry Pan <harry.pan@intel.com>
Date: Mon, 25 Feb 2019 20:36:43 +0800
Subject: [PATCH 0483/1861] PM / sleep: Measure the time of filesystems syncing

Measure the filesystems sync time during system sleep more precisely.

Among other things, this allows the pr_cont() to be dropped from
ksys_sync_helper() and makes automatic system suspend and hibernation
profiling somewhat more straightforward.

Signed-off-by: Harry Pan <harry.pan@intel.com>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/main.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/power/main.c b/kernel/power/main.c
index 40472a7c55366..4f43e724f6eb3 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -54,9 +54,14 @@ EXPORT_SYMBOL_GPL(unlock_system_sleep);
 
 void ksys_sync_helper(void)
 {
-	pr_info("Syncing filesystems ... ");
+	ktime_t start;
+	long elapsed_msecs;
+
+	start = ktime_get();
 	ksys_sync();
-	pr_cont("done.\n");
+	elapsed_msecs = ktime_to_ms(ktime_sub(ktime_get(), start));
+	pr_info("Filesystems sync: %ld.%03ld seconds\n",
+		elapsed_msecs / MSEC_PER_SEC, elapsed_msecs % MSEC_PER_SEC);
 }
 EXPORT_SYMBOL_GPL(ksys_sync_helper);
 
-- 
GitLab


From de7b77e5bb9451417ca57f1b6501da654587c048 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 27 Mar 2019 07:00:29 -0500
Subject: [PATCH 0484/1861] cpu/hotplug: Create SMT sysfs interface for all
 arches

Make the /sys/devices/system/cpu/smt/* files available on all arches, so
user space has a consistent way to detect whether SMT is enabled.

The 'control' file now shows 'notimplemented' for architectures which
don't yet have CONFIG_HOTPLUG_SMT.

[ tglx: Make notimplemented a real state ]

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Kosina <jikos@kernel.org>
Link: https://lkml.kernel.org/r/469c2b98055f2c41e75748e06447d592a64080c9.1553635520.git.jpoimboe@redhat.com
---
 .../ABI/testing/sysfs-devices-system-cpu      | 10 +--
 include/linux/cpu.h                           |  3 +-
 kernel/cpu.c                                  | 64 +++++++++++--------
 3 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 9605dbd4b5b59..5eea46fefcb2a 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -511,10 +511,12 @@ Description:	Control Symetric Multi Threading (SMT)
 		control: Read/write interface to control SMT. Possible
 			 values:
 
-			 "on"		SMT is enabled
-			 "off"		SMT is disabled
-			 "forceoff"	SMT is force disabled. Cannot be changed.
-			 "notsupported" SMT is not supported by the CPU
+			 "on"		  SMT is enabled
+			 "off"		  SMT is disabled
+			 "forceoff"	  SMT is force disabled. Cannot be changed.
+			 "notsupported"   SMT is not supported by the CPU
+			 "notimplemented" SMT runtime toggling is not
+					  implemented for the architecture
 
 			 If control status is "forceoff" or "notsupported" writes
 			 are rejected.
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 5041357d0297a..ae99dde023209 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -175,6 +175,7 @@ enum cpuhp_smt_control {
 	CPU_SMT_DISABLED,
 	CPU_SMT_FORCE_DISABLED,
 	CPU_SMT_NOT_SUPPORTED,
+	CPU_SMT_NOT_IMPLEMENTED,
 };
 
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
@@ -182,7 +183,7 @@ extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
 extern void cpu_smt_check_topology(void);
 #else
-# define cpu_smt_control		(CPU_SMT_ENABLED)
+# define cpu_smt_control		(CPU_SMT_NOT_IMPLEMENTED)
 static inline void cpu_smt_disable(bool force) { }
 static inline void cpu_smt_check_topology(void) { }
 #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6754f3ecfd943..b8bf3f93e39b7 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2033,19 +2033,6 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = {
 
 #ifdef CONFIG_HOTPLUG_SMT
 
-static const char *smt_states[] = {
-	[CPU_SMT_ENABLED]		= "on",
-	[CPU_SMT_DISABLED]		= "off",
-	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
-	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
-};
-
-static ssize_t
-show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
-}
-
 static void cpuhp_offline_cpu_device(unsigned int cpu)
 {
 	struct device *dev = get_cpu_device(cpu);
@@ -2116,9 +2103,10 @@ static int cpuhp_smt_enable(void)
 	return ret;
 }
 
+
 static ssize_t
-store_smt_control(struct device *dev, struct device_attribute *attr,
-		  const char *buf, size_t count)
+__store_smt_control(struct device *dev, struct device_attribute *attr,
+		    const char *buf, size_t count)
 {
 	int ctrlval, ret;
 
@@ -2156,14 +2144,44 @@ store_smt_control(struct device *dev, struct device_attribute *attr,
 	unlock_device_hotplug();
 	return ret ? ret : count;
 }
+
+#else /* !CONFIG_HOTPLUG_SMT */
+static ssize_t
+__store_smt_control(struct device *dev, struct device_attribute *attr,
+		    const char *buf, size_t count)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_HOTPLUG_SMT */
+
+static const char *smt_states[] = {
+	[CPU_SMT_ENABLED]		= "on",
+	[CPU_SMT_DISABLED]		= "off",
+	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
+	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
+	[CPU_SMT_NOT_IMPLEMENTED]	= "notimplemented",
+};
+
+static ssize_t
+show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	const char *state = smt_states[cpu_smt_control];
+
+	return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
+}
+
+static ssize_t
+store_smt_control(struct device *dev, struct device_attribute *attr,
+		  const char *buf, size_t count)
+{
+	return __store_smt_control(dev, attr, buf, count);
+}
 static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
 
 static ssize_t
 show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	bool active = topology_max_smt_threads() > 1;
-
-	return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
+	return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
 }
 static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
 
@@ -2179,21 +2197,17 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 	NULL
 };
 
-static int __init cpu_smt_state_init(void)
+static int __init cpu_smt_sysfs_init(void)
 {
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }
 
-#else
-static inline int cpu_smt_state_init(void) { return 0; }
-#endif
-
 static int __init cpuhp_sysfs_init(void)
 {
 	int cpu, ret;
 
-	ret = cpu_smt_state_init();
+	ret = cpu_smt_sysfs_init();
 	if (ret)
 		return ret;
 
@@ -2214,7 +2228,7 @@ static int __init cpuhp_sysfs_init(void)
 	return 0;
 }
 device_initcall(cpuhp_sysfs_init);
-#endif
+#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
 
 /*
  * cpu_bit_bitmap[] is a special, "compressed" data structure that
-- 
GitLab


From b9a1ff504b9492ad6beb7d5606e0e3365d4d8499 Mon Sep 17 00:00:00 2001
From: Shenghui Wang <shhuiw@foxmail.com>
Date: Mon, 1 Apr 2019 21:40:36 +0800
Subject: [PATCH 0485/1861] block: use blk_free_flush_queue() to free hctx->fq
 in blk_mq_init_hctx

kfree() can leak the hctx->fq->flush_rq field.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Shenghui Wang <shhuiw@foxmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ff3d7b499697..f3b0d33bdf882 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2332,7 +2332,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	return 0;
 
  free_fq:
-	kfree(hctx->fq);
+	blk_free_flush_queue(hctx->fq);
  exit_hctx:
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);
-- 
GitLab


From ff3b74b8e1675c802e09157a56c97ca38a659b9d Mon Sep 17 00:00:00 2001
From: Yufen Yu <yuyufen@huawei.com>
Date: Tue, 26 Mar 2019 21:19:25 +0800
Subject: [PATCH 0486/1861] blk-mq: add trace block plug and unplug for
 multiple queues

For now, we just trace plug for single queue device or drivers
provide .commit_rqs, and have not trace plug for multiple queues
device. But, unplug events will be recorded when call
blk_mq_flush_plug_list(). Then, trace events will be asymmetrical,
just have unplug and without plug.

This patch add trace plug and unplug for multiple queues device in
blk_mq_make_request(). After that, we can accurately trace plug and
unplug for multiple queues.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f3b0d33bdf882..22074a1e37cd4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2003,11 +2003,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 			plug->rq_count--;
 		}
 		blk_add_rq_to_plug(plug, rq);
+		trace_block_plug(q);
 
 		blk_mq_put_ctx(data.ctx);
 
 		if (same_queue_rq) {
 			data.hctx = same_queue_rq->mq_hctx;
+			trace_block_unplug(q, 1, true);
 			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
 					&cookie, false, true);
 		}
-- 
GitLab


From fad9fab975cb9fae651854c811cb07a30bc2b98a Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Tue, 2 Apr 2019 17:40:56 +0200
Subject: [PATCH 0487/1861] EDAC/altera, firmware/intel: Add Stratix10 ECC DBE
 SMC call

Reserve ECC Double Bit Error SMC call to alert U-Boot that a DBE has
occurred. Move the call from local EDAC header file to a common header.

 [ bp: Merge the two patches. ]

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Richard Gong <richard.gong@intel.com>
Reviewed-by: Alan Tull <atull@kernel.org> # firmware
Cc: Greg KH <greg@kroah.com>
Cc: James Morse <james.morse@arm.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: mchehab@kernel.org
Link: https://lkml.kernel.org/r/1553870639-23895-1-git-send-email-thor.thayer@linux.intel.com

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/altera_edac.c                   |  1 +
 drivers/edac/altera_edac.h                   | 83 --------------------
 include/linux/firmware/intel/stratix10-smc.h | 19 +++++
 3 files changed, 20 insertions(+), 83 deletions(-)

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 761199175c763..8816f74a22b4a 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -9,6 +9,7 @@
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/edac.h>
+#include <linux/firmware/intel/stratix10-smc.h>
 #include <linux/genalloc.h>
 #include <linux/interrupt.h>
 #include <linux/irqchip/chained_irq.h>
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 1532ec9c3510f..55654cc4bcdf3 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -372,87 +372,4 @@ struct altr_arria10_edac {
 	struct notifier_block	panic_notifier;
 };
 
-/*
- * Functions specified by ARM SMC Calling convention:
- *
- * FAST call executes atomic operations, returns when the requested operation
- * has completed.
- * STD call starts a operation which can be preempted by a non-secure
- * interrupt. The call can return before the requested operation has
- * completed.
- *
- * a0..a7 is used as register names in the descriptions below, on arm32
- * that translates to r0..r7 and on arm64 to w0..w7.
- */
-
-#define INTEL_SIP_SMC_STD_CALL_VAL(func_num) \
-	ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_64, \
-	ARM_SMCCC_OWNER_SIP, (func_num))
-
-#define INTEL_SIP_SMC_FAST_CALL_VAL(func_num) \
-	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, \
-	ARM_SMCCC_OWNER_SIP, (func_num))
-
-#define INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION		0xFFFFFFFF
-#define INTEL_SIP_SMC_STATUS_OK				0x0
-#define INTEL_SIP_SMC_REG_ERROR				0x5
-
-/*
- * Request INTEL_SIP_SMC_REG_READ
- *
- * Read a protected register using SMCCC
- *
- * Call register usage:
- * a0: INTEL_SIP_SMC_REG_READ.
- * a1: register address.
- * a2-7: not used.
- *
- * Return status:
- * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or
- *     INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION
- * a1: Value in the register
- * a2-3: not used.
- */
-#define INTEL_SIP_SMC_FUNCID_REG_READ 7
-#define INTEL_SIP_SMC_REG_READ \
-	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_READ)
-
-/*
- * Request INTEL_SIP_SMC_REG_WRITE
- *
- * Write a protected register using SMCCC
- *
- * Call register usage:
- * a0: INTEL_SIP_SMC_REG_WRITE.
- * a1: register address
- * a2: value to program into register.
- * a3-7: not used.
- *
- * Return status:
- * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or
- *     INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION
- * a1-3: not used.
- */
-#define INTEL_SIP_SMC_FUNCID_REG_WRITE 8
-#define INTEL_SIP_SMC_REG_WRITE \
-	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE)
-
-/*
- * Request INTEL_SIP_SMC_ECC_DBE
- *
- * Sync call used by service driver at EL1 alert EL3 that a Double Bit
- * ECC error has occurred.
- *
- * Call register usage:
- * a0 INTEL_SIP_SMC_ECC_DBE
- * a1 SysManager Double Bit Error value
- * a2-7 not used
- *
- * Return status
- * a0 INTEL_SIP_SMC_STATUS_OK
- */
-#define INTEL_SIP_SMC_FUNCID_ECC_DBE 13
-#define INTEL_SIP_SMC_ECC_DBE \
-	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE)
-
 #endif	/* #ifndef _ALTERA_EDAC_H */
diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h
index 5be5dab50b132..01684d935580e 100644
--- a/include/linux/firmware/intel/stratix10-smc.h
+++ b/include/linux/firmware/intel/stratix10-smc.h
@@ -309,4 +309,23 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
 #define INTEL_SIP_SMC_FUNCID_RSU_UPDATE 12
 #define INTEL_SIP_SMC_RSU_UPDATE \
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_UPDATE)
+
+/*
+ * Request INTEL_SIP_SMC_ECC_DBE
+ *
+ * Sync call used by service driver at EL1 to alert EL3 that a Double
+ * Bit ECC error has occurred.
+ *
+ * Call register usage:
+ * a0 INTEL_SIP_SMC_ECC_DBE
+ * a1 SysManager Double Bit Error value
+ * a2-7 not used
+ *
+ * Return status
+ * a0 INTEL_SIP_SMC_STATUS_OK
+ */
+#define INTEL_SIP_SMC_FUNCID_ECC_DBE 13
+#define INTEL_SIP_SMC_ECC_DBE \
+	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE)
+
 #endif
-- 
GitLab


From 01b76c32e3f30d54ab8ec1efeed3c6ecef7f6027 Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Thu, 28 Mar 2019 03:47:37 -0400
Subject: [PATCH 0488/1861] misc: fastrpc: add checked value for dma_set_mask

There be should check return value from dma_set_mask to throw some info
if fail to set dma mask.

Detected by CoverityScan, CID# 1443983:  Error handling issues (CHECKED_RETURN)

Fixes: f6f9279f2bf0 ("misc: fastrpc: Add Qualcomm fastrpc basic driver model")
Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 39f832d272889..36d0d5c9cfbad 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1184,6 +1184,7 @@ static int fastrpc_cb_probe(struct platform_device *pdev)
 	struct fastrpc_session_ctx *sess;
 	struct device *dev = &pdev->dev;
 	int i, sessions = 0;
+	int rc;
 
 	cctx = dev_get_drvdata(dev->parent);
 	if (!cctx)
@@ -1213,7 +1214,11 @@ static int fastrpc_cb_probe(struct platform_device *pdev)
 	}
 	cctx->sesscount++;
 	spin_unlock(&cctx->lock);
-	dma_set_mask(dev, DMA_BIT_MASK(32));
+	rc = dma_set_mask(dev, DMA_BIT_MASK(32));
+	if (rc) {
+		dev_err(dev, "32-bit DMA enable failed\n");
+		return rc;
+	}
 
 	return 0;
 }
-- 
GitLab


From 131ac62253dba79daf4a6d83ab12293d2b9863d3 Mon Sep 17 00:00:00 2001
From: Christian Gromm <christian.gromm@microchip.com>
Date: Tue, 2 Apr 2019 13:39:57 +0200
Subject: [PATCH 0489/1861] staging: most: core: use device description as name

This patch uses the device description to clearly identity a device
attached to the bus. It is needed as the currently useed mdevX
notation is not sufficiant in case more than one network
interface controller is being used at the same time.

Cc: stable@vger.kernel.org
Signed-off-by: Christian Gromm <christian.gromm@microchip.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/most/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/most/core.c b/drivers/staging/most/core.c
index 18936cdb10830..956daf8c3bd24 100644
--- a/drivers/staging/most/core.c
+++ b/drivers/staging/most/core.c
@@ -1431,7 +1431,7 @@ int most_register_interface(struct most_interface *iface)
 
 	INIT_LIST_HEAD(&iface->p->channel_list);
 	iface->p->dev_id = id;
-	snprintf(iface->p->name, STRING_SIZE, "mdev%d", id);
+	strcpy(iface->p->name, iface->description);
 	iface->dev.init_name = iface->p->name;
 	iface->dev.bus = &mc.bus;
 	iface->dev.parent = &mc.dev;
-- 
GitLab


From 99bd5fcc505d65ea9c60619202f0b2d926eabbe9 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 21 Mar 2019 17:19:37 -0700
Subject: [PATCH 0490/1861] ARC: PAE40: don't panic and instead turn off hw ioc

HSDK currently panics when built for HIGHMEM/ARC_HAS_PAE40 because ioc
is enabled with default which doesn't work for the 2 non contiguous
memory nodes. So get PAE working by disabling ioc instead.

Tested with !PAE40 by forcing @ioc_enable=0 and running the glibc
testsuite over ssh

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 4135abec3fb09..63e6e65046992 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -113,10 +113,24 @@ static void read_decode_cache_bcr_arcv2(int cpu)
 	}
 
 	READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
-	if (cbcr.c)
+	if (cbcr.c) {
 		ioc_exists = 1;
-	else
+
+		/*
+		 * As for today we don't support both IOC and ZONE_HIGHMEM enabled
+		 * simultaneously. This happens because as of today IOC aperture covers
+		 * only ZONE_NORMAL (low mem) and any dma transactions outside this
+		 * region won't be HW coherent.
+		 * If we want to use both IOC and ZONE_HIGHMEM we can use
+		 * bounce_buffer to handle dma transactions to HIGHMEM.
+		 * Also it is possible to modify dma_direct cache ops or increase IOC
+		 * aperture size if we are planning to use HIGHMEM without PAE.
+		 */
+		if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled())
+			ioc_enable = 0;
+	} else {
 		ioc_enable = 0;
+	}
 
 	/* HS 2.0 didn't have AUX_VOL */
 	if (cpuinfo_arc700[cpu].core.family > 0x51) {
@@ -1158,19 +1172,6 @@ noinline void __init arc_ioc_setup(void)
 	if (!ioc_enable)
 		return;
 
-	/*
-	 * As for today we don't support both IOC and ZONE_HIGHMEM enabled
-	 * simultaneously. This happens because as of today IOC aperture covers
-	 * only ZONE_NORMAL (low mem) and any dma transactions outside this
-	 * region won't be HW coherent.
-	 * If we want to use both IOC and ZONE_HIGHMEM we can use
-	 * bounce_buffer to handle dma transactions to HIGHMEM.
-	 * Also it is possible to modify dma_direct cache ops or increase IOC
-	 * aperture size if we are planning to use HIGHMEM without PAE.
-	 */
-	if (IS_ENABLED(CONFIG_HIGHMEM))
-		panic("IOC and HIGHMEM can't be used simultaneously");
-
 	/* Flush + invalidate + disable L1 dcache */
 	__dc_disable();
 
-- 
GitLab


From 21cee1bd1594b2af6798ddffa97555b4bc3586e1 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 2 Apr 2019 12:10:44 -0700
Subject: [PATCH 0491/1861] ARC: [hsdk] Make it easier to add PAE40 region to
 DTB

1. Bump top level address-cells/size-cells nodes to 2 (to ensure all
   down stream addresses are 64-bits, unless explicitly specified
   otherwise (in "soc" bus with all peripherals)

2. "memory" also specified with address/size 2

3. Add a commented reference for PAE40 region beyond 4GB physical
   address space

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/hsdk.dts | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 69bc1c9e8e50d..7425bb0f2d1b6 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -18,8 +18,8 @@
 	model = "snps,hsdk";
 	compatible = "snps,hsdk";
 
-	#address-cells = <1>;
-	#size-cells = <1>;
+	#address-cells = <2>;
+	#size-cells = <2>;
 
 	chosen {
 		bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
@@ -105,7 +105,7 @@
 		#size-cells = <1>;
 		interrupt-parent = <&idu_intc>;
 
-		ranges = <0x00000000 0xf0000000 0x10000000>;
+		ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
 
 		cgu_rst: reset-controller@8a0 {
 			compatible = "snps,hsdk-reset";
@@ -269,9 +269,10 @@
 	};
 
 	memory@80000000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
+		#address-cells = <2>;
+		#size-cells = <2>;
 		device_type = "memory";
-		reg = <0x80000000 0x40000000>;  /* 1 GiB */
+		reg = <0x0 0x80000000 0x0 0x40000000>;  /* 1 GB lowmem */
+		/*     0x1 0x00000000 0x0 0x40000000>;     1 GB highmem */
 	};
 };
-- 
GitLab


From b2e54b09a3d29c4db883b920274ca8dca4d9f04d Mon Sep 17 00:00:00 2001
From: Sheena Mira-ato <sheena.mira-ato@alliedtelesis.co.nz>
Date: Mon, 1 Apr 2019 13:04:42 +1300
Subject: [PATCH 0492/1861] ip6_tunnel: Match to ARPHRD_TUNNEL6 for dev type

The device type for ip6 tunnels is set to
ARPHRD_TUNNEL6. However, the ip4ip6_err function
is expecting the device type of the tunnel to be
ARPHRD_TUNNEL.  Since the device types do not
match, the function exits and the ICMP error
packet is not sent to the originating host. Note
that the device type for IPv4 tunnels is set to
ARPHRD_TUNNEL.

Fix is to expect a tunnel device type of
ARPHRD_TUNNEL6 instead.  Now the tunnel device
type matches and the ICMP error packet is sent
to the originating host.

Signed-off-by: Sheena Mira-ato <sheena.mira-ato@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0c6403cf8b522..ade1390c63488 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -627,7 +627,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
 					   eiph->daddr, eiph->saddr, 0, 0,
 					   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
-		if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
+		if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
 			if (!IS_ERR(rt))
 				ip_rt_put(rt);
 			goto out;
@@ -636,7 +636,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	} else {
 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
 				   skb2->dev) ||
-		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
+		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
 			goto out;
 	}
 
-- 
GitLab


From d939f44d4a7f910755165458da20407d2139f581 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Mon, 1 Apr 2019 18:08:30 +0800
Subject: [PATCH 0493/1861] drm/amdgpu: remove unnecessary rlc reset function
 on gfx9

The rlc reset function is not necessary during gfx9 initialization/resume phase.
And this function would even cause rlc fw loading failed on some gfx9 ASIC.
Remove this function safely with verification well on Vega/Raven platform.

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d0309e8c9d12c..a11db2b1a63f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2405,8 +2405,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
 	/* disable CG */
 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
 
-	adev->gfx.rlc.funcs->reset(adev);
-
 	gfx_v9_0_init_pg(adev);
 
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-- 
GitLab


From 882c5e552ffd06856de42261460f46e18319d259 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Tue, 2 Apr 2019 12:26:36 +0200
Subject: [PATCH 0494/1861] rtc: da9063: set uie_unsupported when relevant

The DA9063AD doesn't support alarms on any seconds and its granularity is
the minute. Set uie_unsupported in that case.

Reported-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reported-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-da9063.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index b4e054c64bad9..69b54e5556c06 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -480,6 +480,13 @@ static int da9063_rtc_probe(struct platform_device *pdev)
 	da9063_data_to_tm(data, &rtc->alarm_time, rtc);
 	rtc->rtc_sync = false;
 
+	/*
+	 * TODO: some models have alarms on a minute boundary but still support
+	 * real hardware interrupts. Add this once the core supports it.
+	 */
+	if (config->rtc_data_start != RTC_SEC)
+		rtc->rtc_dev->uie_unsupported = 1;
+
 	irq_alarm = platform_get_irq_byname(pdev, "ALARM");
 	ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL,
 					da9063_alarm_event,
-- 
GitLab


From 96085b949672dca19773495813b577eb3bedf06e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 Apr 2019 06:36:23 +0100
Subject: [PATCH 0495/1861] KVM: arm/arm64: vgic-v3: Retire pending interrupts
 on disabling LPIs

When disabling LPIs (for example on reset) at the redistributor
level, it is expected that LPIs that was pending in the CPU
interface are eventually retired.

Currently, this is not what is happening, and these LPIs will
stay in the ap_list, eventually being acknowledged by the vcpu
(which didn't quite expect this behaviour).

The fix is thus to retire these LPIs from the list of pending
interrupts as we disable LPIs.

Reported-by: Heyi Guo <guoheyi@huawei.com>
Tested-by: Heyi Guo <guoheyi@huawei.com>
Fixes: 0e4e82f154e3 ("KVM: arm64: vgic-its: Enable ITS emulation as a virtual MSI controller")
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-mmio-v3.c |  3 +++
 virt/kvm/arm/vgic/vgic.c         | 21 +++++++++++++++++++++
 virt/kvm/arm/vgic/vgic.h         |  1 +
 3 files changed, 25 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 4a12322bf7df8..9f4843fe9cda6 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -200,6 +200,9 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
 
 	vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
 
+	if (was_enabled && !vgic_cpu->lpis_enabled)
+		vgic_flush_pending_lpis(vcpu);
+
 	if (!was_enabled && vgic_cpu->lpis_enabled)
 		vgic_enable_lpis(vcpu);
 }
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 3af69f2a38667..191deccf60bf9 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -151,6 +151,27 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 	kfree(irq);
 }
 
+void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_irq *irq, *tmp;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
+
+	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
+		if (irq->intid >= VGIC_MIN_LPI) {
+			raw_spin_lock(&irq->irq_lock);
+			list_del(&irq->ap_list);
+			irq->vcpu = NULL;
+			raw_spin_unlock(&irq->irq_lock);
+			vgic_put_irq(vcpu->kvm, irq);
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+}
+
 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
 {
 	WARN_ON(irq_set_irqchip_state(irq->host_irq,
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index a90024718ca44..abeeffabc456c 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -238,6 +238,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu);
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
 void vgic_enable_lpis(struct kvm_vcpu *vcpu);
+void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu);
 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
 int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
-- 
GitLab


From 43d147be5738a9ed6cfb25c285ac50d6dd5793be Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 2 Apr 2019 13:49:14 +0100
Subject: [PATCH 0496/1861] ASoC: wm_adsp: Check for buffer in trigger stop

Trigger stop can be called in situations where trigger start failed
and as such it can't be assumed the buffer is already attached to
the compressed stream or a NULL pointer may be dereferenced.

Fixes: 639e5eb3c7d6 ("ASoC: wm_adsp: Correct handling of compressed streams that restart")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 5608ed5deccac..b0b48eb9c7c91 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -3587,7 +3587,8 @@ int wm_adsp_compr_trigger(struct snd_compr_stream *stream, int cmd)
 		}
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
-		wm_adsp_buffer_clear(compr->buf);
+		if (wm_adsp_compr_attached(compr))
+			wm_adsp_buffer_clear(compr->buf);
 		break;
 	default:
 		ret = -EINVAL;
-- 
GitLab


From 9f3bd8fe8f9d39e27e29c7134b21e335d1c7db6c Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Tue, 2 Apr 2019 13:18:45 -0400
Subject: [PATCH 0497/1861] Update Nicolas Pitre's email address

The @linaro version won't be valid much longer.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap    | 2 ++
 MAINTAINERS | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index b2cde8668dcc3..ae2bcad06f4b5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -156,6 +156,8 @@ Morten Welinder <welinder@darter.rentec.com>
 Morten Welinder <welinder@troll.com>
 Mythri P K <mythripk@ti.com>
 Nguyen Anh Quynh <aquynh@gmail.com>
+Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org>
+Nicolas Pitre <nico@fluxnic.net> <nico@linaro.org>
 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
 Patrick Mochel <mochel@digitalimplant.org>
 Paul Burton <paul.burton@mips.com> <paul.burton@imgtec.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 43b36dbed48e7..391405091c6b3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4129,7 +4129,7 @@ F:	drivers/cpuidle/*
 F:	include/linux/cpuidle.h
 
 CRAMFS FILESYSTEM
-M:	Nicolas Pitre <nico@linaro.org>
+M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Maintained
 F:	Documentation/filesystems/cramfs.txt
 F:	fs/cramfs/
-- 
GitLab


From 4bcdec39c454c4e8f9512115bdcc3efec1ba5f55 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 2 Apr 2019 12:20:49 +0200
Subject: [PATCH 0498/1861] ASoC: Intel: cht_bsw_max98090_ti: Enable codec
 clock once and keep it enabled

Users have been seeing sound stability issues with max98090 codecs since:
commit 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL")

At first that commit broke sound for Chromebook Swanky and Clapper models,
the problem was that the machine-driver has been controlling the wrong
clock on those models since support for them was added. This was hidden by
clk-pmc-atom.c keeping the actual clk on unconditionally.

With the machine-driver controlling the proper clock, sound works again
but we are seeing bug reports describing it as: low volume,
"sounds like played at 10x speed" and instable.

When these issues are hit the following message is seen in dmesg:
"max98090 i2c-193C9890:00: PLL unlocked".

Attempts have been made to fix this by inserting a delay between enabling
the clk and enabling and checking the pll, but this has not helped.

It seems that at least on boards which use pmc_plt_clk_0 as clock,
if we ever disable the clk, the pll looses its lock and after that we get
various issues.

This commit fixes this by enabling the clock once at probe time on
these boards. In essence this restores the old behavior of clk-pmc-atom.c
always keeping the clk on on these boards.

Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL")
Reported-by: Mogens Jensen <mogens-jensen@protonmail.com>
Reported-by: Dean Wallace <duffydack73@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/cht_bsw_max98090_ti.c | 47 +++++++++++++++++---
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index 3263b0495853c..c0e0844f75b9f 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -43,6 +43,7 @@ struct cht_mc_private {
 	struct clk *mclk;
 	struct snd_soc_jack jack;
 	bool ts3a227e_present;
+	int quirks;
 };
 
 static int platform_clock_control(struct snd_soc_dapm_widget *w,
@@ -54,6 +55,10 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w,
 	struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card);
 	int ret;
 
+	/* See the comment in snd_cht_mc_probe() */
+	if (ctx->quirks & QUIRK_PMC_PLT_CLK_0)
+		return 0;
+
 	codec_dai = snd_soc_card_get_codec_dai(card, CHT_CODEC_DAI);
 	if (!codec_dai) {
 		dev_err(card->dev, "Codec dai not found; Unable to set platform clock\n");
@@ -223,6 +228,10 @@ static int cht_codec_init(struct snd_soc_pcm_runtime *runtime)
 			"jack detection gpios not added, error %d\n", ret);
 	}
 
+	/* See the comment in snd_cht_mc_probe() */
+	if (ctx->quirks & QUIRK_PMC_PLT_CLK_0)
+		return 0;
+
 	/*
 	 * The firmware might enable the clock at
 	 * boot (this information may or may not
@@ -423,16 +432,15 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	const char *mclk_name;
 	struct snd_soc_acpi_mach *mach;
 	const char *platform_name;
-	int quirks = 0;
-
-	dmi_id = dmi_first_match(cht_max98090_quirk_table);
-	if (dmi_id)
-		quirks = (unsigned long)dmi_id->driver_data;
 
 	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
 	if (!drv)
 		return -ENOMEM;
 
+	dmi_id = dmi_first_match(cht_max98090_quirk_table);
+	if (dmi_id)
+		drv->quirks = (unsigned long)dmi_id->driver_data;
+
 	drv->ts3a227e_present = acpi_dev_found("104C227E");
 	if (!drv->ts3a227e_present) {
 		/* no need probe TI jack detection chip */
@@ -458,7 +466,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	snd_soc_card_cht.dev = &pdev->dev;
 	snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);
 
-	if (quirks & QUIRK_PMC_PLT_CLK_0)
+	if (drv->quirks & QUIRK_PMC_PLT_CLK_0)
 		mclk_name = "pmc_plt_clk_0";
 	else
 		mclk_name = "pmc_plt_clk_3";
@@ -471,6 +479,21 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 		return PTR_ERR(drv->mclk);
 	}
 
+	/*
+	 * Boards which have the MAX98090's clk connected to clk_0 do not seem
+	 * to like it if we muck with the clock. If we disable the clock when
+	 * it is unused we get "max98090 i2c-193C9890:00: PLL unlocked" errors
+	 * and the PLL never seems to lock again.
+	 * So for these boards we enable it here once and leave it at that.
+	 */
+	if (drv->quirks & QUIRK_PMC_PLT_CLK_0) {
+		ret_val = clk_prepare_enable(drv->mclk);
+		if (ret_val < 0) {
+			dev_err(&pdev->dev, "MCLK enable error: %d\n", ret_val);
+			return ret_val;
+		}
+	}
+
 	ret_val = devm_snd_soc_register_card(&pdev->dev, &snd_soc_card_cht);
 	if (ret_val) {
 		dev_err(&pdev->dev,
@@ -481,11 +504,23 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	return ret_val;
 }
 
+static int snd_cht_mc_remove(struct platform_device *pdev)
+{
+	struct snd_soc_card *card = platform_get_drvdata(pdev);
+	struct cht_mc_private *ctx = snd_soc_card_get_drvdata(card);
+
+	if (ctx->quirks & QUIRK_PMC_PLT_CLK_0)
+		clk_disable_unprepare(ctx->mclk);
+
+	return 0;
+}
+
 static struct platform_driver snd_cht_mc_driver = {
 	.driver = {
 		.name = "cht-bsw-max98090",
 	},
 	.probe = snd_cht_mc_probe,
+	.remove = snd_cht_mc_remove,
 };
 
 module_platform_driver(snd_cht_mc_driver)
-- 
GitLab


From 1a81542abfdae21716a1a32ea8472de7a271dde6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Mar 2019 09:47:18 +0100
Subject: [PATCH 0499/1861] perf/x86/intel: Simplify
 intel_tfa_commit_scheduling()

validate_group() calls x86_schedule_events(.assign=NULL) and therefore
will not call intel_tfa_commit_scheduling(). So there is no point in
checking cpuc->is_fake, we'll never get there.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8baa441d8000f..dee281d58476b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2015,7 +2015,7 @@ static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int
 	/*
 	 * We're going to use PMC3, make sure TFA is set before we touch it.
 	 */
-	if (cntr == 3 && !cpuc->is_fake)
+	if (cntr == 3)
 		intel_set_tfa(cpuc, true);
 }
 
-- 
GitLab


From 21d65555cd878c8d6ff2b2902e6c16b2ca73f33d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Mar 2019 09:57:57 +0100
Subject: [PATCH 0500/1861] perf/x86: Simplify x86_pmu.get_constraints()
 interface

There is a special case for validate_events() where we'll call
x86_pmu.get_constraints(.idx=-1). It's purpose, up until recent, seems
to be to avoid taking a previous constraint from
cpuc->event_constraint[] in intel_get_event_constraints().

(I could not find any other get_event_constraints() implementation
using @idx)

However, since that cpuc is freshly allocated, that array will in fact
be initialized with NULL pointers, achieving the very same effect.

Therefore remove this exception.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c       | 2 +-
 arch/x86/events/intel/core.c | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e2b1447192a88..508bf5355a5ab 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2031,7 +2031,7 @@ static int validate_event(struct perf_event *event)
 	if (IS_ERR(fake_cpuc))
 		return PTR_ERR(fake_cpuc);
 
-	c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
+	c = x86_pmu.get_event_constraints(fake_cpuc, 0, event);
 
 	if (!c || !c->weight)
 		ret = -EINVAL;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index dee281d58476b..879ab540fd057 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2931,11 +2931,9 @@ static struct event_constraint *
 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			    struct perf_event *event)
 {
-	struct event_constraint *c1 = NULL;
-	struct event_constraint *c2;
+	struct event_constraint *c1, *c2;
 
-	if (idx >= 0) /* fake does < 0 */
-		c1 = cpuc->event_constraint[idx];
+	c1 = cpuc->event_constraint[idx];
 
 	/*
 	 * first time only
@@ -3410,7 +3408,7 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	/*
 	 * Without TFA we must not use PMC3.
 	 */
-	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk) && idx >= 0) {
+	if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
 		c = dyn_constraint(cpuc, c, idx);
 		c->idxmsk64 &= ~(1ULL << 3);
 		c->weight--;
-- 
GitLab


From 1f6a1e2d7d7135280125418b25d35aebd6fa0952 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Mar 2019 12:58:52 +0100
Subject: [PATCH 0501/1861] perf/x86: Remove PERF_X86_EVENT_COMMITTED

The flag PERF_X86_EVENT_COMMITTED is used to find uncommitted events
for which to call put_event_constraint() when scheduling fails.

These are the newly added events to the list, and must form, per
definition, the tail of cpuc->event_list[]. By computing the list
index of the last successfull schedule, then iteration can start there
and the flag is redundant.

There are only 3 callers of x86_schedule_events(), notably:

 - x86_pmu_add()
 - x86_pmu_commit_txn()
 - validate_group()

For x86_pmu_add(), cpuc->n_events isn't updated until after
schedule_events() succeeds, therefore cpuc->n_events points to the
desired index.

For x86_pmu_commit_txn(), cpuc->n_events is updated, but we can
trivially compute the desired value with cpuc->n_txn -- the number of
events added in this transaction.

For validate_group(), we can make the rule for x86_pmu_add() work by
simply setting cpuc->n_events to 0 before calling schedule_events().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c       | 28 +++++++++++++---------------
 arch/x86/events/perf_event.h | 19 +++++++++----------
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 508bf5355a5ab..9cab5adfa87af 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -925,19 +925,23 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	if (!unsched && assign) {
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
-			e->hw.flags |= PERF_X86_EVENT_COMMITTED;
 			if (x86_pmu.commit_scheduling)
 				x86_pmu.commit_scheduling(cpuc, i, assign[i]);
 		}
 	} else {
-		for (i = 0; i < n; i++) {
+		/*
+		 * Compute the number of events already present; see
+		 * x86_pmu_add(), validate_group() and x86_pmu_commit_txn().
+		 * For the former two cpuc->n_events hasn't been updated yet,
+		 * while for the latter cpuc->n_txn contains the number of
+		 * events added in the current transaction.
+		 */
+		i = cpuc->n_events;
+		if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+			i -= cpuc->n_txn;
+
+		for (; i < n; i++) {
 			e = cpuc->event_list[i];
-			/*
-			 * do not put_constraint() on comitted events,
-			 * because they are good to go
-			 */
-			if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
-				continue;
 
 			/*
 			 * release events that failed scheduling
@@ -1371,11 +1375,6 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	int i;
 
-	/*
-	 * event is descheduled
-	 */
-	event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
-
 	/*
 	 * If we're called during a txn, we only need to undo x86_pmu.add.
 	 * The events never got scheduled and ->cancel_txn will truncate
@@ -2079,8 +2078,7 @@ static int validate_group(struct perf_event *event)
 	if (n < 0)
 		goto out;
 
-	fake_cpuc->n_events = n;
-
+	fake_cpuc->n_events = 0;
 	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
 
 out:
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a75955741c504..b4d41829da4f2 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -55,22 +55,21 @@ struct event_constraint {
 	int	overlap;
 	int	flags;
 };
+
 /*
  * struct hw_perf_event.flags flags
  */
 #define PERF_X86_EVENT_PEBS_LDLAT	0x0001 /* ld+ldlat data address sampling */
 #define PERF_X86_EVENT_PEBS_ST		0x0002 /* st data address sampling */
 #define PERF_X86_EVENT_PEBS_ST_HSW	0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED	0x0008 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW	0x0010 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW	0x0020 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL		0x0040 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC		0x0080 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */
-#define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD	0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS	0x0800 /* use large PEBS */
-
+#define PERF_X86_EVENT_PEBS_LD_HSW	0x0008 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW	0x0010 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL		0x0020 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC		0x0040 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED	0x0080 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT	0x0100 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD	0x0200 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_LARGE_PEBS	0x0400 /* use large PEBS */
 
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
-- 
GitLab


From c090cb70c6152fc97ce91242789d7c322109f1a1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Mar 2019 13:01:14 +0100
Subject: [PATCH 0502/1861] perf/x86/intel: Optimize
 intel_get_excl_constraints()

Avoid the POPCNT  by noting we can decrement the weight for each
cleared bit.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 879ab540fd057..a80015fa8aeda 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2838,7 +2838,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
 	struct intel_excl_states *xlo;
 	int tid = cpuc->excl_thread_id;
-	int is_excl, i;
+	int is_excl, i, w;
 
 	/*
 	 * validating a group does not require
@@ -2894,36 +2894,40 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 * SHARED   : sibling counter measuring non-exclusive event
 	 * UNUSED   : sibling counter unused
 	 */
+	w = c->weight;
 	for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
 		/*
 		 * exclusive event in sibling counter
 		 * our corresponding counter cannot be used
 		 * regardless of our event
 		 */
-		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
 			__clear_bit(i, c->idxmsk);
+			w--;
+			continue;
+		}
 		/*
 		 * if measuring an exclusive event, sibling
 		 * measuring non-exclusive, then counter cannot
 		 * be used
 		 */
-		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
 			__clear_bit(i, c->idxmsk);
+			w--;
+			continue;
+		}
 	}
 
-	/*
-	 * recompute actual bit weight for scheduling algorithm
-	 */
-	c->weight = hweight64(c->idxmsk64);
-
 	/*
 	 * if we return an empty mask, then switch
 	 * back to static empty constraint to avoid
 	 * the cost of freeing later on
 	 */
-	if (c->weight == 0)
+	if (!w)
 		c = &emptyconstraint;
 
+	c->weight = w;
+
 	return c;
 }
 
-- 
GitLab


From 2c9651c38d1789d179c8d50724d037152ba85e56 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Mar 2019 13:03:00 +0100
Subject: [PATCH 0503/1861] perf/x86: Clear ->event_constraint[] on put

The current code unconditionally clears cpuc->event_constraint[i]
before calling get_event_constraints(.idx=i). The only site that cares
is intel_get_event_constraints() where the c1 load will always be
NULL.

However, always calling get_event_constraints() on all events is
wastefull, most times it will return the exact same result. Therefore
retain the logic in intel_get_event_constraints() and change the
generic code to only clear the constraint on put.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 9cab5adfa87af..279530111d8ef 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -858,7 +858,6 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		x86_pmu.start_scheduling(cpuc);
 
 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-		cpuc->event_constraint[i] = NULL;
 		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
 		cpuc->event_constraint[i] = c;
 
@@ -948,6 +947,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			 */
 			if (x86_pmu.put_event_constraints)
 				x86_pmu.put_event_constraints(cpuc, e);
+
+			cpuc->event_constraint[i] = NULL;
 		}
 	}
 
@@ -1411,6 +1412,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 		cpuc->event_list[i-1] = cpuc->event_list[i];
 		cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
 	}
+	cpuc->event_constraint[i-1] = NULL;
 	--cpuc->n_events;
 
 	perf_event_update_userpage(event);
-- 
GitLab


From 109717de57b95d6dd9582c18a74f154d69fea536 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Mar 2019 13:17:51 +0100
Subject: [PATCH 0504/1861] perf/x86: Optimize x86_schedule_events()

Now that cpuc->event_constraint[] is retained, we can avoid calling
get_event_constraints() over and over again.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c       | 13 +++++++++++--
 arch/x86/events/intel/core.c |  3 ++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 279530111d8ef..fa88acf0daadf 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -858,8 +858,17 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		x86_pmu.start_scheduling(cpuc);
 
 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
-		cpuc->event_constraint[i] = c;
+		c = cpuc->event_constraint[i];
+
+		/*
+		 * Request constraints for new events; or for those events that
+		 * have a dynamic constraint -- for those the constraint can
+		 * change due to external factors (sibling state, allow_tfa).
+		 */
+		if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
+			c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+			cpuc->event_constraint[i] = c;
+		}
 
 		wmin = min(wmin, c->weight);
 		wmax = max(wmax, c->weight);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a80015fa8aeda..62394e1b2390e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2945,7 +2945,8 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	 * - dynamic constraint: handled by intel_get_excl_constraints()
 	 */
 	c2 = __intel_get_event_constraints(cpuc, idx, event);
-	if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+	if (c1) {
+	        WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
 		bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
 		c1->weight = c2->weight;
 		c2 = c1;
-- 
GitLab


From f80deefa41890f9484802d7b67f11daf28055150 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Mar 2019 13:25:02 +0100
Subject: [PATCH 0505/1861] perf/x86: Add sanity checks to
 x86_schedule_events()

By computing the 'committed' index earlier, we can use it to validate
the cached constraint state.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index fa88acf0daadf..24dab9ad4fd66 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -849,17 +849,34 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	struct event_constraint *c;
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct perf_event *e;
-	int i, wmin, wmax, unsched = 0;
+	int n0, i, wmin, wmax, unsched = 0;
 	struct hw_perf_event *hwc;
 
 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
+	/*
+	 * Compute the number of events already present; see x86_pmu_add(),
+	 * validate_group() and x86_pmu_commit_txn(). For the former two
+	 * cpuc->n_events hasn't been updated yet, while for the latter
+	 * cpuc->n_txn contains the number of events added in the current
+	 * transaction.
+	 */
+	n0 = cpuc->n_events;
+	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+		n0 -= cpuc->n_txn;
+
 	if (x86_pmu.start_scheduling)
 		x86_pmu.start_scheduling(cpuc);
 
 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 		c = cpuc->event_constraint[i];
 
+		/*
+		 * Previously scheduled events should have a cached constraint,
+		 * while new events should not have one.
+		 */
+		WARN_ON_ONCE((c && i >= n0) || (!c && i < n0));
+
 		/*
 		 * Request constraints for new events; or for those events that
 		 * have a dynamic constraint -- for those the constraint can
@@ -937,18 +954,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 				x86_pmu.commit_scheduling(cpuc, i, assign[i]);
 		}
 	} else {
-		/*
-		 * Compute the number of events already present; see
-		 * x86_pmu_add(), validate_group() and x86_pmu_commit_txn().
-		 * For the former two cpuc->n_events hasn't been updated yet,
-		 * while for the latter cpuc->n_txn contains the number of
-		 * events added in the current transaction.
-		 */
-		i = cpuc->n_events;
-		if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
-			i -= cpuc->n_txn;
-
-		for (; i < n; i++) {
+		for (i = n0; i < n; i++) {
 			e = cpuc->event_list[i];
 
 			/*
-- 
GitLab


From 6690e86be83ac75832e461c141055b5d601c0a6d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 14 Feb 2019 10:30:52 +0100
Subject: [PATCH 0506/1861] sched/x86: Save [ER]FLAGS on context switch

Effectively reverts commit:

  2c7577a75837 ("sched/x86_64: Don't save flags on context switch")

Specifically because SMAP uses FLAGS.AC which invalidates the claim
that the kernel has clean flags.

In particular; while preemption from interrupt return is fine (the
IRET frame on the exception stack contains FLAGS) it breaks any code
that does synchonous scheduling, including preempt_enable().

This has become a significant issue ever since commit:

  5b24a7a2aa20 ("Add 'unsafe' user access functions for batched accesses")

provided for means of having 'normal' C code between STAC / CLAC,
exposing the FLAGS.AC state. So far this hasn't led to trouble,
however fix it before it comes apart.

Reported-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org
Fixes: 5b24a7a2aa20 ("Add 'unsafe' user access functions for batched accesses")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/entry_32.S        | 2 ++
 arch/x86/entry/entry_64.S        | 2 ++
 arch/x86/include/asm/switch_to.h | 1 +
 arch/x86/kernel/process_32.c     | 7 +++++++
 arch/x86/kernel/process_64.c     | 8 ++++++++
 5 files changed, 20 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d309f30cf7af8..5fc76b755510a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -650,6 +650,7 @@ ENTRY(__switch_to_asm)
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
+	pushfl
 
 	/* switch stack */
 	movl	%esp, TASK_threadsp(%eax)
@@ -672,6 +673,7 @@ ENTRY(__switch_to_asm)
 #endif
 
 	/* restore callee-saved registers */
+	popfl
 	popl	%esi
 	popl	%edi
 	popl	%ebx
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b6294..4fe27b67d7e24 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -291,6 +291,7 @@ ENTRY(__switch_to_asm)
 	pushq	%r13
 	pushq	%r14
 	pushq	%r15
+	pushfq
 
 	/* switch stack */
 	movq	%rsp, TASK_threadsp(%rdi)
@@ -313,6 +314,7 @@ ENTRY(__switch_to_asm)
 #endif
 
 	/* restore callee-saved registers */
+	popfq
 	popq	%r15
 	popq	%r14
 	popq	%r13
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 7cf1a270d8910..157149d4129c0 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -40,6 +40,7 @@ asmlinkage void ret_from_fork(void);
  * order of the fields must match the code in __switch_to_asm().
  */
 struct inactive_task_frame {
+	unsigned long flags;
 #ifdef CONFIG_X86_64
 	unsigned long r15;
 	unsigned long r14;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e471d8e6f0b24..70933193878ca 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -127,6 +127,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	struct task_struct *tsk;
 	int err;
 
+	/*
+	 * For a new task use the RESET flags value since there is no before.
+	 * All the status flags are zero; DF and all the system flags must also
+	 * be 0, specifically IF must be 0 because we context switch to the new
+	 * task with interrupts disabled.
+	 */
+	frame->flags = X86_EFLAGS_FIXED;
 	frame->bp = 0;
 	frame->ret_addr = (unsigned long) ret_from_fork;
 	p->thread.sp = (unsigned long) fork_frame;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6a62f4af9fcf7..026a43be9bd1f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -392,6 +392,14 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	childregs = task_pt_regs(p);
 	fork_frame = container_of(childregs, struct fork_frame, regs);
 	frame = &fork_frame->frame;
+
+	/*
+	 * For a new task use the RESET flags value since there is no before.
+	 * All the status flags are zero; DF and all the system flags must also
+	 * be 0, specifically IF must be 0 because we context switch to the new
+	 * task with interrupts disabled.
+	 */
+	frame->flags = X86_EFLAGS_FIXED;
 	frame->bp = 0;
 	frame->ret_addr = (unsigned long) ret_from_fork;
 	p->thread.sp = (unsigned long) fork_frame;
-- 
GitLab


From 37686b1353cfc30e127cef811959cdbcd0495d98 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 7 Mar 2019 11:48:02 -0600
Subject: [PATCH 0507/1861] tracing: Improve "if" macro code generation

With CONFIG_PROFILE_ALL_BRANCHES=y, the "if" macro converts the
conditional to an array index.  This can cause GCC to create horrible
code.  When there are nested ifs, the generated code uses register
values to encode branching decisions.

Make it easier for GCC to optimize by keeping the conditional as a
conditional rather than converting it to an integer.  This shrinks the
generated code quite a bit, and also makes the code sane enough for
objtool to understand.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: brgerst@gmail.com
Cc: catalin.marinas@arm.com
Cc: dvlasenk@redhat.com
Cc: dvyukov@google.com
Cc: hpa@zytor.com
Cc: james.morse@arm.com
Cc: julien.thierry@arm.com
Cc: luto@amacapital.net
Cc: luto@kernel.org
Cc: rostedt@goodmis.org
Cc: valentin.schneider@arm.com
Cc: will.deacon@arm.com
Link: https://lkml.kernel.org/r/20190307174802.46fmpysxyo35hh43@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 445348facea97..d58aa0db05f94 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 				.line = __LINE__,			\
 			};						\
 		______r = !!(cond);					\
-		______f.miss_hit[______r]++;					\
+		______r ? ______f.miss_hit[1]++ : ______f.miss_hit[0]++;\
 		______r;						\
 	}))
 #endif /* CONFIG_PROFILE_ALL_BRANCHES */
-- 
GitLab


From 67a0514afdbb8b2fc70b771b8c77661a9cb9d3a9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 25 Feb 2019 12:56:35 +0100
Subject: [PATCH 0508/1861] x86/ia32: Fix ia32_restore_sigcontext() AC leak

Objtool spotted that we call native_load_gs_index() with AC set.
Re-arrange the code to avoid that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/ia32_signal.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 321fe5f5d0e96..4d5fcd47ab75a 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -61,9 +61,8 @@
 } while (0)
 
 #define RELOAD_SEG(seg)		{		\
-	unsigned int pre = GET_SEG(seg);	\
+	unsigned int pre = (seg) | 3;		\
 	unsigned int cur = get_user_seg(seg);	\
-	pre |= 3;				\
 	if (pre != cur)				\
 		set_user_seg(seg, pre);		\
 }
@@ -72,6 +71,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 				   struct sigcontext_32 __user *sc)
 {
 	unsigned int tmpflags, err = 0;
+	u16 gs, fs, es, ds;
 	void __user *buf;
 	u32 tmp;
 
@@ -79,16 +79,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	current->restart_block.fn = do_no_restart_syscall;
 
 	get_user_try {
-		/*
-		 * Reload fs and gs if they have changed in the signal
-		 * handler.  This does not handle long fs/gs base changes in
-		 * the handler, but does not clobber them at least in the
-		 * normal case.
-		 */
-		RELOAD_SEG(gs);
-		RELOAD_SEG(fs);
-		RELOAD_SEG(ds);
-		RELOAD_SEG(es);
+		gs = GET_SEG(gs);
+		fs = GET_SEG(fs);
+		ds = GET_SEG(ds);
+		es = GET_SEG(es);
 
 		COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 		COPY(dx); COPY(cx); COPY(ip); COPY(ax);
@@ -106,6 +100,17 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 		buf = compat_ptr(tmp);
 	} get_user_catch(err);
 
+	/*
+	 * Reload fs and gs if they have changed in the signal
+	 * handler.  This does not handle long fs/gs base changes in
+	 * the handler, but does not clobber them at least in the
+	 * normal case.
+	 */
+	RELOAD_SEG(gs);
+	RELOAD_SEG(fs);
+	RELOAD_SEG(ds);
+	RELOAD_SEG(es);
+
 	err |= fpu__restore_sig(buf, 1);
 
 	force_iret();
-- 
GitLab


From 8f4faed01e3015955801c8ef066ec7fd7a8b3902 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 28 Feb 2019 13:52:31 +0100
Subject: [PATCH 0509/1861] i915, uaccess: Fix redundant CLAC

New tooling noticed this:

 drivers/gpu/drm/i915/i915_gem_execbuffer.o: warning: objtool: .altinstr_replacement+0x3c: redundant UACCESS disable
 drivers/gpu/drm/i915/i915_gem_execbuffer.o: warning: objtool: .altinstr_replacement+0x66: redundant UACCESS disable

You don't need user_access_end() if user_access_begin() fails.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 02adcaf6ebea6..16f80a4488206 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1667,6 +1667,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
 					     len)) {
 end_user:
 				user_access_end();
+end:
 				kvfree(relocs);
 				err = -EFAULT;
 				goto err;
@@ -1686,7 +1687,7 @@ end_user:
 		 * relocations were valid.
 		 */
 		if (!user_access_begin(urelocs, size))
-			goto end_user;
+			goto end;
 
 		for (copied = 0; copied < nreloc; copied++)
 			unsafe_put_user(-1,
@@ -2695,7 +2696,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		 * when we did the "copy_from_user()" above.
 		 */
 		if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
-			goto end_user;
+			goto end;
 
 		for (i = 0; i < args->buffer_count; i++) {
 			if (!(exec2_list[i].offset & UPDATE))
@@ -2709,6 +2710,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		}
 end_user:
 		user_access_end();
+end:;
 	}
 
 	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
-- 
GitLab


From 3693ca81151eacd498675baae56abede577e8b31 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 1 Mar 2019 15:24:33 +0100
Subject: [PATCH 0510/1861] x86/uaccess: Move copy_user_handle_tail() into asm

By writing the function in asm we avoid cross object code flow and
objtool no longer gets confused about a 'stray' CLAC.

Also; the asm version is actually _simpler_.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/asm.h        | 24 ----------------
 arch/x86/include/asm/uaccess_64.h |  3 --
 arch/x86/lib/copy_user_64.S       | 48 +++++++++++++++++++++++++++++++
 arch/x86/lib/usercopy_64.c        | 20 -------------
 4 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 6467757bb39f6..3ff577c0b1024 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -148,30 +148,6 @@
 	_ASM_PTR (entry);					\
 	.popsection
 
-.macro ALIGN_DESTINATION
-	/* check for bad alignment of destination */
-	movl %edi,%ecx
-	andl $7,%ecx
-	jz 102f				/* already aligned */
-	subl $8,%ecx
-	negl %ecx
-	subl %ecx,%edx
-100:	movb (%rsi),%al
-101:	movb %al,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz 100b
-102:
-	.section .fixup,"ax"
-103:	addl %ecx,%edx			/* ecx is zerorest also */
-	jmp copy_user_handle_tail
-	.previous
-
-	_ASM_EXTABLE_UA(100b, 103b)
-	_ASM_EXTABLE_UA(101b, 103b)
-	.endm
-
 #else
 # define _EXPAND_EXTABLE_HANDLE(x) #x
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index a9d637bc301d7..5cd1caa8bc653 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -207,9 +207,6 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
 	return __copy_user_flushcache(dst, src, size);
 }
 
-unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len);
-
 unsigned long
 mcsafe_handle_tail(char *to, char *from, unsigned len);
 
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index db4e5aa0858b9..b2f1822084aee 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,6 +16,30 @@
 #include <asm/smap.h>
 #include <asm/export.h>
 
+.macro ALIGN_DESTINATION
+	/* check for bad alignment of destination */
+	movl %edi,%ecx
+	andl $7,%ecx
+	jz 102f				/* already aligned */
+	subl $8,%ecx
+	negl %ecx
+	subl %ecx,%edx
+100:	movb (%rsi),%al
+101:	movb %al,(%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz 100b
+102:
+	.section .fixup,"ax"
+103:	addl %ecx,%edx			/* ecx is zerorest also */
+	jmp copy_user_handle_tail
+	.previous
+
+	_ASM_EXTABLE_UA(100b, 103b)
+	_ASM_EXTABLE_UA(101b, 103b)
+	.endm
+
 /*
  * copy_user_generic_unrolled - memory copy with exception handling.
  * This version is for CPUs like P4 that don't have efficient micro
@@ -193,6 +217,30 @@ ENTRY(copy_user_enhanced_fast_string)
 ENDPROC(copy_user_enhanced_fast_string)
 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 
+/*
+ * Try to copy last bytes and clear the rest if needed.
+ * Since protection fault in copy_from/to_user is not a normal situation,
+ * it is not necessary to optimize tail handling.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ALIGN;
+copy_user_handle_tail:
+	movl %edx,%ecx
+1:	rep movsb
+2:	mov %ecx,%eax
+	ASM_CLAC
+	ret
+
+	_ASM_EXTABLE_UA(1b, 2b)
+ENDPROC(copy_user_handle_tail)
+
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
  * This will force destination out of cache for more performance.
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index ee42bb0cbeb3f..9952a01cad249 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -54,26 +54,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
 }
 EXPORT_SYMBOL(clear_user);
 
-/*
- * Try to copy last bytes and clear the rest if needed.
- * Since protection fault in copy_from/to_user is not a normal situation,
- * it is not necessary to optimize tail handling.
- */
-__visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len)
-{
-	for (; len; --len, to++) {
-		char c;
-
-		if (__get_user_nocheck(c, from++, sizeof(char)))
-			break;
-		if (__put_user_nocheck(c, to, sizeof(char)))
-			break;
-	}
-	clac();
-	return len;
-}
-
 /*
  * Similar to copy_user_handle_tail, probe for the write fault point,
  * but reuse __memcpy_mcsafe in case a new read error is encountered.
-- 
GitLab


From a05a2e7998ab1badcf80aed47b5313934fd131fa Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Fri, 22 Mar 2019 10:16:50 +0100
Subject: [PATCH 0511/1861] mfd: sun6i-prcm: Allow to compile with COMPILE_TEST

Since this driver only has a dependency on ARCH_SUNXI just because it
doesn't make any sense to run it on something else, we can definitely
enable it through COMPILE_TEST as well to get some build coverage.

Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 0ce2d8dfc5f1a..26ad6468d13a7 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1246,7 +1246,7 @@ config MFD_STA2X11
 
 config MFD_SUN6I_PRCM
 	bool "Allwinner A31 PRCM controller"
-	depends on ARCH_SUNXI
+	depends on ARCH_SUNXI || COMPILE_TEST
 	select MFD_CORE
 	help
 	  Support for the PRCM (Power/Reset/Clock Management) unit available
-- 
GitLab


From b69656fa7ea2f75e47d7bd5b9430359fa46488af Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 3 Apr 2019 09:39:45 +0200
Subject: [PATCH 0512/1861] x86/uaccess: Fix up the fixup

New tooling got confused about this:

  arch/x86/lib/memcpy_64.o: warning: objtool: .fixup+0x7: return with UACCESS enabled

While the code isn't wrong, it is tedious (if at all possible) to
figure out what function a particular chunk of .fixup belongs to.

This then confuses the objtool uaccess validation. Instead of
returning directly from the .fixup, jump back into the right function.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/lib/memcpy_64.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 3b24dc05251c7..9d05572370edc 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -257,6 +257,7 @@ ENTRY(__memcpy_mcsafe)
 	/* Copy successful. Return zero */
 .L_done_memcpy_trap:
 	xorl %eax, %eax
+.L_done:
 	ret
 ENDPROC(__memcpy_mcsafe)
 EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
@@ -273,7 +274,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
 	addl	%edx, %ecx
 .E_trailing_bytes:
 	mov	%ecx, %eax
-	ret
+	jmp	.L_done
 
 	/*
 	 * For write fault handling, given the destination is unaligned,
-- 
GitLab


From ff05ab2305aaeb21a3002ae95a17e176c198b71b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Mar 2019 14:33:07 +0100
Subject: [PATCH 0513/1861] x86/nospec, objtool: Introduce
 ANNOTATE_IGNORE_ALTERNATIVE

To facillitate other usage of ignoring alternatives; rename
ANNOTATE_NOSPEC_IGNORE to ANNOTATE_IGNORE_ALTERNATIVE.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/alternative-asm.h | 11 ++++++++++
 arch/x86/include/asm/alternative.h     | 10 +++++++++
 arch/x86/include/asm/nospec-branch.h   | 28 +++++++++-----------------
 tools/objtool/check.c                  |  8 ++++----
 4 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 31b627b43a8e0..464034db299f7 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -19,6 +19,17 @@
 	.endm
 #endif
 
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+.macro ANNOTATE_IGNORE_ALTERNATIVE
+	.Lannotate_\@:
+	.pushsection .discard.ignore_alts
+	.long .Lannotate_\@ - .
+	.popsection
+.endm
+
 /*
  * Issue one struct alt_instr descriptor entry (need to put it into
  * the section .altinstructions, see below). This entry contains
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4c74073a19ccd..094fbc9c0b1c0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -45,6 +45,16 @@
 #define LOCK_PREFIX ""
 #endif
 
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+#define ANNOTATE_IGNORE_ALTERNATIVE				\
+	"999:\n\t"						\
+	".pushsection .discard.ignore_alts\n\t"			\
+	".long 999b - .\n\t"					\
+	".popsection\n\t"
+
 struct alt_instr {
 	s32 instr_offset;	/* original instruction */
 	s32 repl_offset;	/* offset to replacement instruction */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index dad12b767ba06..daf25b60c9e3a 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -10,6 +10,15 @@
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
+/*
+ * This should be used immediately before a retpoline alternative. It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+#define ANNOTATE_NOSPEC_ALTERNATIVE \
+	ANNOTATE_IGNORE_ALTERNATIVE
+
 /*
  * Fill the CPU return stack buffer.
  *
@@ -56,19 +65,6 @@
 
 #ifdef __ASSEMBLY__
 
-/*
- * This should be used immediately before a retpoline alternative.  It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-.macro ANNOTATE_NOSPEC_ALTERNATIVE
-	.Lannotate_\@:
-	.pushsection .discard.nospec
-	.long .Lannotate_\@ - .
-	.popsection
-.endm
-
 /*
  * This should be used immediately before an indirect jump/call. It tells
  * objtool the subsequent indirect jump/call is vouched safe for retpoline
@@ -152,12 +148,6 @@
 
 #else /* __ASSEMBLY__ */
 
-#define ANNOTATE_NOSPEC_ALTERNATIVE				\
-	"999:\n\t"						\
-	".pushsection .discard.nospec\n\t"			\
-	".long 999b - .\n\t"					\
-	".popsection\n\t"
-
 #define ANNOTATE_RETPOLINE_SAFE					\
 	"999:\n\t"						\
 	".pushsection .discard.retpoline_safe\n\t"		\
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5dde107083c60..110ea3d847724 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -457,13 +457,13 @@ static void add_ignores(struct objtool_file *file)
  * But it at least allows objtool to understand the control flow *around* the
  * retpoline.
  */
-static int add_nospec_ignores(struct objtool_file *file)
+static int add_ignore_alternatives(struct objtool_file *file)
 {
 	struct section *sec;
 	struct rela *rela;
 	struct instruction *insn;
 
-	sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+	sec = find_section_by_name(file->elf, ".rela.discard.ignore_alts");
 	if (!sec)
 		return 0;
 
@@ -475,7 +475,7 @@ static int add_nospec_ignores(struct objtool_file *file)
 
 		insn = find_insn(file, rela->sym->sec, rela->addend);
 		if (!insn) {
-			WARN("bad .discard.nospec entry");
+			WARN("bad .discard.ignore_alts entry");
 			return -1;
 		}
 
@@ -1239,7 +1239,7 @@ static int decode_sections(struct objtool_file *file)
 
 	add_ignores(file);
 
-	ret = add_nospec_ignores(file);
+	ret = add_ignore_alternatives(file);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 4fc0f0e9471ec573ab2d44e7f462d996d614536e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Mar 2019 19:09:08 +0100
Subject: [PATCH 0514/1861] x86/uaccess, xen: Suppress SMAP warnings

drivers/xen/privcmd.o: warning: objtool: privcmd_ioctl()+0x1414: call to hypercall_page() with UACCESS enabled

Some Xen hypercalls allow parameter buffers in user land, so they need
to set AC=1. Avoid the warning for those cases.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: andrew.cooper3@citrix.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/xen/hypercall.h | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index de6f0d59a24f4..580a909afad4e 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -214,6 +214,22 @@ xen_single_call(unsigned int call,
 	return (long)__res;
 }
 
+static __always_inline void __xen_stac(void)
+{
+	/*
+	 * Suppress objtool seeing the STAC/CLAC and getting confused about it
+	 * calling random code with AC=1.
+	 */
+	asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
+		     ASM_STAC ::: "memory", "flags");
+}
+
+static __always_inline void __xen_clac(void)
+{
+	asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
+		     ASM_CLAC ::: "memory", "flags");
+}
+
 static inline long
 privcmd_call(unsigned int call,
 	     unsigned long a1, unsigned long a2,
@@ -222,9 +238,9 @@ privcmd_call(unsigned int call,
 {
 	long res;
 
-	stac();
+	__xen_stac();
 	res = xen_single_call(call, a1, a2, a3, a4, a5);
-	clac();
+	__xen_clac();
 
 	return res;
 }
@@ -421,9 +437,9 @@ HYPERVISOR_dm_op(
 	domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs)
 {
 	int ret;
-	stac();
+	__xen_stac();
 	ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs);
-	clac();
+	__xen_clac();
 	return ret;
 }
 
-- 
GitLab


From b7f89bfe52cd523a229d6dd22639225b2e3681dc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 3 Apr 2019 09:39:47 +0200
Subject: [PATCH 0515/1861] x86/uaccess: Always inline user_access_begin()

If GCC out-of-lines it, the STAC and CLAC are in different fuctions
and objtool gets upset.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1954dd5552a2e..ae5783b5fab01 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -705,7 +705,7 @@ extern struct movsl_mask {
  * checking before using them, but you have to surround them with the
  * user_access_begin/end() pair.
  */
-static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
 {
 	if (unlikely(!access_ok(ptr,len)))
 		return 0;
-- 
GitLab


From 88e4718275c1bddca6f61f300688b4553dc8584b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 3 Apr 2019 09:39:48 +0200
Subject: [PATCH 0516/1861] x86/uaccess, signal: Fix AC=1 bloat

Occasionally GCC is less agressive with inlining and the following is
observed:

  arch/x86/kernel/signal.o: warning: objtool: restore_sigcontext()+0x3cc: call to force_valid_ss.isra.5() with UACCESS enabled
  arch/x86/kernel/signal.o: warning: objtool: do_signal()+0x384: call to frame_uc_flags.isra.0() with UACCESS enabled

Cure this by moving this code out of the AC=1 region, since it really
isn't needed for the user access.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/signal.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 08dfd4c1a4f95..c8aa58a2bab97 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -132,16 +132,6 @@ static int restore_sigcontext(struct pt_regs *regs,
 		COPY_SEG_CPL3(cs);
 		COPY_SEG_CPL3(ss);
 
-#ifdef CONFIG_X86_64
-		/*
-		 * Fix up SS if needed for the benefit of old DOSEMU and
-		 * CRIU.
-		 */
-		if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
-			     user_64bit_mode(regs)))
-			force_valid_ss(regs);
-#endif
-
 		get_user_ex(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 		regs->orig_ax = -1;		/* disable syscall checks */
@@ -150,6 +140,15 @@ static int restore_sigcontext(struct pt_regs *regs,
 		buf = (void __user *)buf_val;
 	} get_user_catch(err);
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Fix up SS if needed for the benefit of old DOSEMU and
+	 * CRIU.
+	 */
+	if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
+		force_valid_ss(regs);
+#endif
+
 	err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
 
 	force_iret();
@@ -461,6 +460,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 {
 	struct rt_sigframe __user *frame;
 	void __user *fp = NULL;
+	unsigned long uc_flags;
 	int err = 0;
 
 	frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
@@ -473,9 +473,11 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 			return -EFAULT;
 	}
 
+	uc_flags = frame_uc_flags(regs);
+
 	put_user_try {
 		/* Create the ucontext.  */
-		put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
+		put_user_ex(uc_flags, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
 		save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
@@ -541,6 +543,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 {
 #ifdef CONFIG_X86_X32_ABI
 	struct rt_sigframe_x32 __user *frame;
+	unsigned long uc_flags;
 	void __user *restorer;
 	int err = 0;
 	void __user *fpstate = NULL;
@@ -555,9 +558,11 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 			return -EFAULT;
 	}
 
+	uc_flags = frame_uc_flags(regs);
+
 	put_user_try {
 		/* Create the ucontext.  */
-		put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
+		put_user_ex(uc_flags, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
 		compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 		put_user_ex(0, &frame->uc.uc__pad0);
-- 
GitLab


From 0e9f02450da07fc7b1346c8c32c771555173e397 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Tue, 19 Mar 2019 12:36:10 +0000
Subject: [PATCH 0517/1861] sched/fair: Do not re-read ->h_load_next during
 hierarchical load calculation

A NULL pointer dereference bug was reported on a distribution kernel but
the same issue should be present on mainline kernel. It occured on s390
but should not be arch-specific.  A partial oops looks like:

  Unable to handle kernel pointer dereference in virtual kernel address space
  ...
  Call Trace:
    ...
    try_to_wake_up+0xfc/0x450
    vhost_poll_wakeup+0x3a/0x50 [vhost]
    __wake_up_common+0xbc/0x178
    __wake_up_common_lock+0x9e/0x160
    __wake_up_sync_key+0x4e/0x60
    sock_def_readable+0x5e/0x98

The bug hits any time between 1 hour to 3 days. The dereference occurs
in update_cfs_rq_h_load when accumulating h_load. The problem is that
cfq_rq->h_load_next is not protected by any locking and can be updated
by parallel calls to task_h_load. Depending on the compiler, code may be
generated that re-reads cfq_rq->h_load_next after the check for NULL and
then oops when reading se->avg.load_avg. The dissassembly showed that it
was possible to reread h_load_next after the check for NULL.

While this does not appear to be an issue for later compilers, it's still
an accident if the correct code is generated. Full locking in this path
would have high overhead so this patch uses READ_ONCE to read h_load_next
only once and check for NULL before dereferencing. It was confirmed that
there were no further oops after 10 days of testing.

As Peter pointed out, it is also necessary to use WRITE_ONCE() to avoid any
potential problems with store tearing.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Fixes: 685207963be9 ("sched: Move h_load calculation to task_h_load()")
Link: https://lkml.kernel.org/r/20190319123610.nsivgf3mjbjjesxb@techsingularity.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fdab7eb6f3517..40bd1e27b1b79 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7784,10 +7784,10 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
 	if (cfs_rq->last_h_load_update == now)
 		return;
 
-	cfs_rq->h_load_next = NULL;
+	WRITE_ONCE(cfs_rq->h_load_next, NULL);
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
-		cfs_rq->h_load_next = se;
+		WRITE_ONCE(cfs_rq->h_load_next, se);
 		if (cfs_rq->last_h_load_update == now)
 			break;
 	}
@@ -7797,7 +7797,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
 		cfs_rq->last_h_load_update = now;
 	}
 
-	while ((se = cfs_rq->h_load_next) != NULL) {
+	while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) {
 		load = cfs_rq->h_load;
 		load = div64_ul(load * se->avg.load_avg,
 			cfs_rq_load_avg(cfs_rq) + 1);
-- 
GitLab


From d18bf4229b1772e91c0c36772737c01cf9726720 Mon Sep 17 00:00:00 2001
From: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Date: Tue, 12 Mar 2019 04:06:37 -0400
Subject: [PATCH 0518/1861] perf/core: Make perf_swevent_init_cpu() static

'make W=1' causes GCC to complain:

  kernel/events/core.c:11877:6: warning: no previous prototype for 'perf_swevent_init_cpu' [-Wmissing-prototypes]

It's not referenced anywhere else, make it static.

Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/28974.1552377997@turing-police
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72d06e302e993..dfc4bab0b02bc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11878,7 +11878,7 @@ static void __init perf_event_init_all_cpus(void)
 	}
 }
 
-void perf_swevent_init_cpu(unsigned int cpu)
+static void perf_swevent_init_cpu(unsigned int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
-- 
GitLab


From 2abc330c514fe56c570bb1a6318b054b06a4f72e Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Tue, 2 Apr 2019 23:06:21 +0200
Subject: [PATCH 0519/1861] clk: sunxi-ng: nkmp: Avoid GENMASK(-1, 0)

Sometimes one of the nkmp factors is unused. This means that one of the
factors shift and width values are set to 0. Current nkmp clock code
generates a mask for each factor with GENMASK(width + shift - 1, shift).
For unused factor this translates to GENMASK(-1, 0). This code is
further expanded by C preprocessor to final version:
(((~0UL) - (1UL << (0)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (-1))))
or a bit simplified:
(~0UL & (~0UL >> BITS_PER_LONG))

It turns out that result of the second part (~0UL >> BITS_PER_LONG) is
actually undefined by C standard, which clearly specifies:

"If the value of the right operand is negative or is greater than or
equal to the width of the promoted left operand, the behavior is
undefined."

Additionally, compiling kernel with aarch64-linux-gnu-gcc 8.3.0 gave
different results whether literals or variables with same values as
literals were used. GENMASK with literals -1 and 0 gives zero and with
variables gives 0xFFFFFFFFFFFFFFF (~0UL). Because nkmp driver uses
GENMASK with variables as parameter, expression calculates mask as ~0UL
instead of 0. This has further consequences that LSB in register is
always set to 1 (1 is neutral value for a factor and shift is 0).

For example, H6 pll-de clock is set to 600 MHz by sun4i-drm driver, but
due to this bug ends up being 300 MHz. Additionally, 300 MHz seems to be
too low because following warning can be found in dmesg:

[    1.752763] WARNING: CPU: 2 PID: 41 at drivers/clk/sunxi-ng/ccu_common.c:41 ccu_helper_wait_for_lock.part.0+0x6c/0x90
[    1.763378] Modules linked in:
[    1.766441] CPU: 2 PID: 41 Comm: kworker/2:1 Not tainted 5.1.0-rc2-next-20190401 #138
[    1.774269] Hardware name: Pine H64 (DT)
[    1.778200] Workqueue: events deferred_probe_work_func
[    1.783341] pstate: 40000005 (nZcv daif -PAN -UAO)
[    1.788135] pc : ccu_helper_wait_for_lock.part.0+0x6c/0x90
[    1.793623] lr : ccu_helper_wait_for_lock.part.0+0x48/0x90
[    1.799107] sp : ffff000010f93840
[    1.802422] x29: ffff000010f93840 x28: 0000000000000000
[    1.807735] x27: ffff800073ce9d80 x26: ffff000010afd1b8
[    1.813049] x25: ffffffffffffffff x24: 00000000ffffffff
[    1.818362] x23: 0000000000000001 x22: ffff000010abd5c8
[    1.823675] x21: 0000000010000000 x20: 00000000685f367e
[    1.828987] x19: 0000000000001801 x18: 0000000000000001
[    1.834300] x17: 0000000000000001 x16: 0000000000000000
[    1.839613] x15: 0000000000000000 x14: ffff000010789858
[    1.844926] x13: 0000000000000000 x12: 0000000000000001
[    1.850239] x11: 0000000000000000 x10: 0000000000000970
[    1.855551] x9 : ffff000010f936c0 x8 : ffff800074cec0d0
[    1.860864] x7 : 0000800067117000 x6 : 0000000115c30b41
[    1.866177] x5 : 00ffffffffffffff x4 : 002c959300bfe500
[    1.871490] x3 : 0000000000000018 x2 : 0000000029aaaaab
[    1.876802] x1 : 00000000000002e6 x0 : 00000000686072bc
[    1.882114] Call trace:
[    1.884565]  ccu_helper_wait_for_lock.part.0+0x6c/0x90
[    1.889705]  ccu_helper_wait_for_lock+0x10/0x20
[    1.894236]  ccu_nkmp_set_rate+0x244/0x2a8
[    1.898334]  clk_change_rate+0x144/0x290
[    1.902258]  clk_core_set_rate_nolock+0x180/0x1b8
[    1.906963]  clk_set_rate+0x34/0xa0
[    1.910455]  sun8i_mixer_bind+0x484/0x558
[    1.914466]  component_bind_all+0x10c/0x230
[    1.918651]  sun4i_drv_bind+0xc4/0x1a0
[    1.922401]  try_to_bring_up_master+0x164/0x1c0
[    1.926932]  __component_add+0xa0/0x168
[    1.930769]  component_add+0x10/0x18
[    1.934346]  sun8i_dw_hdmi_probe+0x18/0x20
[    1.938443]  platform_drv_probe+0x50/0xa0
[    1.942455]  really_probe+0xcc/0x280
[    1.946032]  driver_probe_device+0x54/0xe8
[    1.950130]  __device_attach_driver+0x80/0xb8
[    1.954488]  bus_for_each_drv+0x78/0xc8
[    1.958326]  __device_attach+0xd4/0x130
[    1.962163]  device_initial_probe+0x10/0x18
[    1.966348]  bus_probe_device+0x90/0x98
[    1.970185]  deferred_probe_work_func+0x6c/0xa0
[    1.974720]  process_one_work+0x1e0/0x320
[    1.978732]  worker_thread+0x228/0x428
[    1.982484]  kthread+0x120/0x128
[    1.985714]  ret_from_fork+0x10/0x18
[    1.989290] ---[ end trace 9babd42e1ca4b84f ]---

This commit solves the issue by first checking value of the factor
width. If it is equal to 0 (unused factor), mask is set to 0, otherwise
GENMASK() macro is used as before.

Fixes: d897ef56faf9 ("clk: sunxi-ng: Mask nkmp factors when setting register")
Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 drivers/clk/sunxi-ng/ccu_nkmp.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index 9b49adb20d07c..69dfc6de1c4e6 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -167,7 +167,7 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
 			   unsigned long parent_rate)
 {
 	struct ccu_nkmp *nkmp = hw_to_ccu_nkmp(hw);
-	u32 n_mask, k_mask, m_mask, p_mask;
+	u32 n_mask = 0, k_mask = 0, m_mask = 0, p_mask = 0;
 	struct _ccu_nkmp _nkmp;
 	unsigned long flags;
 	u32 reg;
@@ -186,10 +186,18 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	ccu_nkmp_find_best(parent_rate, rate, &_nkmp);
 
-	n_mask = GENMASK(nkmp->n.width + nkmp->n.shift - 1, nkmp->n.shift);
-	k_mask = GENMASK(nkmp->k.width + nkmp->k.shift - 1, nkmp->k.shift);
-	m_mask = GENMASK(nkmp->m.width + nkmp->m.shift - 1, nkmp->m.shift);
-	p_mask = GENMASK(nkmp->p.width + nkmp->p.shift - 1, nkmp->p.shift);
+	if (nkmp->n.width)
+		n_mask = GENMASK(nkmp->n.width + nkmp->n.shift - 1,
+				 nkmp->n.shift);
+	if (nkmp->k.width)
+		k_mask = GENMASK(nkmp->k.width + nkmp->k.shift - 1,
+				 nkmp->k.shift);
+	if (nkmp->m.width)
+		m_mask = GENMASK(nkmp->m.width + nkmp->m.shift - 1,
+				 nkmp->m.shift);
+	if (nkmp->p.width)
+		p_mask = GENMASK(nkmp->p.width + nkmp->p.shift - 1,
+				 nkmp->p.shift);
 
 	spin_lock_irqsave(nkmp->common.lock, flags);
 
-- 
GitLab


From 583feb08e7f7ac9d533b446882eb3a54737a6dbb Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 6 Mar 2019 11:50:48 -0800
Subject: [PATCH 0520/1861] perf/x86/intel: Fix handling of wakeup_events for
 multi-entry PEBS

When an event is programmed with attr.wakeup_events=N (N>0), it means
the caller is interested in getting a user level notification after
N samples have been recorded in the kernel sampling buffer.

With precise events on Intel processors, the kernel uses PEBS.
The kernel tries minimize sampling overhead by verifying
if the event configuration is compatible with multi-entry PEBS mode.
If so, the kernel is notified only when the buffer has reached its threshold.
Other PEBS operates in single-entry mode, the kenrel is notified for each
PEBS sample.

The problem is that the current implementation look at frequency
mode and event sample_type but ignores the wakeup_events field. Thus,
it may not be possible to receive a notification after each precise event.

This patch fixes this problem by disabling multi-entry PEBS if wakeup_events
is non-zero.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: kan.liang@intel.com
Link: https://lkml.kernel.org/r/20190306195048.189514-1-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8baa441d8000f..1539647ea39d1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3185,7 +3185,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		return ret;
 
 	if (event->attr.precise_ip) {
-		if (!event->attr.freq) {
+		if (!(event->attr.freq || event->attr.wakeup_events)) {
 			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
 			if (!(event->attr.sample_type &
 			      ~intel_pmu_large_pebs_flags(event)))
-- 
GitLab


From f5ae2f932e2f8f4f79796f44832ae8fca26f188a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 5 Mar 2019 13:32:30 +0100
Subject: [PATCH 0521/1861] iwlwifi: mvm: avoid possible deadlock in TX path

iwl_mvm_tx_mpdu() may run from iwl_mvm_add_new_dqa_stream_wk(), where
soft-IRQs aren't disabled. In this case, it may hold the station lock
and be interrupted by a soft-IRQ that also wants to acquire said lock,
leading to a deadlock.

Fix it by disabling soft-IRQs in iwl_mvm_add_new_dqa_stream_wk().

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index db26f0041a81f..98d123dd71778 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1399,7 +1399,9 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk)
 
 		iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid);
 		list_del_init(&mvmtxq->list);
+		local_bh_disable();
 		iwl_mvm_mac_itxq_xmit(mvm->hw, txq);
+		local_bh_enable();
 	}
 
 	mutex_unlock(&mvm->mutex);
-- 
GitLab


From dcfe3b103dd1348706bbdcfb9921c65452a6144e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 12 Mar 2019 13:22:43 +0100
Subject: [PATCH 0522/1861] iwlwifi: mvm: update offloaded rate control on
 changes

With offloaded rate control, if the station parameters (rates, NSS,
bandwidth) change (sta_rc_update method), call iwl_mvm_rs_rate_init()
to propagate those change to the firmware.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index eeaeb84756665..6a3b11dd2edf5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3258,6 +3258,13 @@ static void iwl_mvm_sta_rc_update(struct ieee80211_hw *hw,
 				  struct ieee80211_sta *sta, u32 changed)
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
+	if (changed & (IEEE80211_RC_BW_CHANGED |
+		       IEEE80211_RC_SUPP_RATES_CHANGED |
+		       IEEE80211_RC_NSS_CHANGED))
+		iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band,
+				     true);
 
 	if (vif->type == NL80211_IFTYPE_STATION &&
 	    changed & IEEE80211_RC_NSS_CHANGED)
-- 
GitLab


From debec2f23910cb17f2c0f6d5e30a8da00bb5f515 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Thu, 14 Mar 2019 14:57:08 +0200
Subject: [PATCH 0523/1861] iwlwifi: add support for quz firmwares

Add a new configuration with a new firmware name for quz devices.
And, since these devices have the same PCI device and subsystem IDs,
we need to add some code to switch from a normal qu firmware to the
quz firmware.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 18 ++++++++++++++++--
 .../net/wireless/intel/iwlwifi/iwl-config.h    |  1 +
 drivers/net/wireless/intel/iwlwifi/iwl-csr.h   |  1 +
 .../net/wireless/intel/iwlwifi/pcie/trans.c    |  4 ++++
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index d5c29298ae3e8..eb6defb6d0cd9 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -82,6 +82,7 @@
 #define IWL_22000_HR_A0_FW_PRE		"iwlwifi-QuQnj-a0-hr-a0-"
 #define IWL_22000_SU_Z0_FW_PRE		"iwlwifi-su-z0-"
 #define IWL_QU_B_JF_B_FW_PRE		"iwlwifi-Qu-b0-jf-b0-"
+#define IWL_QUZ_A_HR_B_FW_PRE		"iwlwifi-QuZ-a0-hr-b0-"
 #define IWL_QNJ_B_JF_B_FW_PRE		"iwlwifi-QuQnj-b0-jf-b0-"
 #define IWL_CC_A_FW_PRE			"iwlwifi-cc-a0-"
 #define IWL_22000_SO_A_JF_B_FW_PRE	"iwlwifi-so-a0-jf-b0-"
@@ -105,8 +106,8 @@
 	IWL_22000_HR_A0_FW_PRE __stringify(api) ".ucode"
 #define IWL_22000_SU_Z0_MODULE_FIRMWARE(api) \
 	IWL_22000_SU_Z0_FW_PRE __stringify(api) ".ucode"
-#define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \
-	IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode"
+#define IWL_QUZ_A_HR_B_MODULE_FIRMWARE(api) \
+	IWL_QUZ_A_HR_B_FW_PRE __stringify(api) ".ucode"
 #define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \
 	IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode"
 #define IWL_QNJ_B_JF_B_MODULE_FIRMWARE(api)		\
@@ -235,6 +236,18 @@ const struct iwl_cfg iwl_ax101_cfg_qu_hr = {
 	.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
 };
 
+const struct iwl_cfg iwl_ax101_cfg_quz_hr = {
+	.name = "Intel(R) Wi-Fi 6 AX101",
+	.fw_name_pre = IWL_QUZ_A_HR_B_FW_PRE,
+	IWL_DEVICE_22500,
+	/*
+	 * This device doesn't support receiving BlockAck with a large bitmap
+	 * so we need to restrict the size of transmitted aggregation to the
+	 * HT size; mac80211 would otherwise pick the HE max (256) by default.
+	 */
+	.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
+};
+
 const struct iwl_cfg iwl_ax200_cfg_cc = {
 	.name = "Intel(R) Wi-Fi 6 AX200 160MHz",
 	.fw_name_pre = IWL_CC_A_FW_PRE,
@@ -444,6 +457,7 @@ MODULE_FIRMWARE(IWL_22000_HR_B_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_22000_HR_A0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_22000_SU_Z0_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QNJ_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_CC_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_22000_SO_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index c91b537fa7ffe..93070848280a4 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -549,6 +549,7 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr;
 extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb;
 extern const struct iwl_cfg iwl22000_2ac_cfg_jf;
 extern const struct iwl_cfg iwl_ax101_cfg_qu_hr;
+extern const struct iwl_cfg iwl_ax101_cfg_quz_hr;
 extern const struct iwl_cfg iwl22000_2ax_cfg_hr;
 extern const struct iwl_cfg iwl_ax200_cfg_cc;
 extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index aea6d03e545a1..e539bc94eff7f 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -327,6 +327,7 @@ enum {
 #define CSR_HW_REV_TYPE_NONE		(0x00001F0)
 #define CSR_HW_REV_TYPE_QNJ		(0x0000360)
 #define CSR_HW_REV_TYPE_QNJ_B0		(0x0000364)
+#define CSR_HW_REV_TYPE_QUZ		(0x0000354)
 #define CSR_HW_REV_TYPE_HR_CDB		(0x0000340)
 #define CSR_HW_REV_TYPE_SO		(0x0000370)
 #define CSR_HW_REV_TYPE_TY		(0x0000420)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 1d6f3053f2337..79c1dc05f9488 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3543,6 +3543,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 		}
 	} else if (cfg == &iwl_ax101_cfg_qu_hr) {
 		if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
+		    CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
+		    trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) {
+			trans->cfg = &iwl22000_2ax_cfg_qnj_hr_b0;
+		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
 		    CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR)) {
 			trans->cfg = &iwl_ax101_cfg_qu_hr;
 		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
-- 
GitLab


From dea2434c23c102b3e7d320849ec1cfeb432edb60 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 Sep 2018 10:43:14 +0200
Subject: [PATCH 0524/1861] asm-generic/tlb: Provide a comment

Write a comment explaining some of this..

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/tlb.h | 119 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 116 insertions(+), 3 deletions(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 6be86c1c5c583..f1594ba8b2de6 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -22,6 +22,118 @@
 
 #ifdef CONFIG_MMU
 
+/*
+ * Generic MMU-gather implementation.
+ *
+ * The mmu_gather data structure is used by the mm code to implement the
+ * correct and efficient ordering of freeing pages and TLB invalidations.
+ *
+ * This correct ordering is:
+ *
+ *  1) unhook page
+ *  2) TLB invalidate page
+ *  3) free page
+ *
+ * That is, we must never free a page before we have ensured there are no live
+ * translations left to it. Otherwise it might be possible to observe (or
+ * worse, change) the page content after it has been reused.
+ *
+ * The mmu_gather API consists of:
+ *
+ *  - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather
+ *
+ *    Finish in particular will issue a (final) TLB invalidate and free
+ *    all (remaining) queued pages.
+ *
+ *  - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA
+ *
+ *    Defaults to flushing at tlb_end_vma() to reset the range; helps when
+ *    there's large holes between the VMAs.
+ *
+ *  - tlb_remove_page() / __tlb_remove_page()
+ *  - tlb_remove_page_size() / __tlb_remove_page_size()
+ *
+ *    __tlb_remove_page_size() is the basic primitive that queues a page for
+ *    freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a
+ *    boolean indicating if the queue is (now) full and a call to
+ *    tlb_flush_mmu() is required.
+ *
+ *    tlb_remove_page() and tlb_remove_page_size() imply the call to
+ *    tlb_flush_mmu() when required and has no return value.
+ *
+ *  - tlb_remove_check_page_size_change()
+ *
+ *    call before __tlb_remove_page*() to set the current page-size; implies a
+ *    possible tlb_flush_mmu() call.
+ *
+ *  - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() / tlb_flush_mmu_free()
+ *
+ *    tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets
+ *                              related state, like the range)
+ *
+ *    tlb_flush_mmu_free() - frees the queued pages; make absolutely
+ *			     sure no additional tlb_remove_page()
+ *			     calls happen between _tlbonly() and this.
+ *
+ *    tlb_flush_mmu() - the above two calls.
+ *
+ *  - mmu_gather::fullmm
+ *
+ *    A flag set by tlb_gather_mmu() to indicate we're going to free
+ *    the entire mm; this allows a number of optimizations.
+ *
+ *    - We can ignore tlb_{start,end}_vma(); because we don't
+ *      care about ranges. Everything will be shot down.
+ *
+ *    - (RISC) architectures that use ASIDs can cycle to a new ASID
+ *      and delay the invalidation until ASID space runs out.
+ *
+ *  - mmu_gather::need_flush_all
+ *
+ *    A flag that can be set by the arch code if it wants to force
+ *    flush the entire TLB irrespective of the range. For instance
+ *    x86-PAE needs this when changing top-level entries.
+ *
+ * And requires the architecture to provide and implement tlb_flush().
+ *
+ * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make
+ * use of:
+ *
+ *  - mmu_gather::start / mmu_gather::end
+ *
+ *    which provides the range that needs to be flushed to cover the pages to
+ *    be freed.
+ *
+ *  - mmu_gather::freed_tables
+ *
+ *    set when we freed page table pages
+ *
+ *  - tlb_get_unmap_shift() / tlb_get_unmap_size()
+ *
+ *    returns the smallest TLB entry size unmapped in this range
+ *
+ * Additionally there are a few opt-in features:
+ *
+ *  HAVE_RCU_TABLE_FREE
+ *
+ *  This provides tlb_remove_table(), to be used instead of tlb_remove_page()
+ *  for page directores (__p*_free_tlb()). This provides separate freeing of
+ *  the page-table pages themselves in a semi-RCU fashion (see comment below).
+ *  Useful if your architecture doesn't use IPIs for remote TLB invalidates
+ *  and therefore doesn't naturally serialize with software page-table walkers.
+ *
+ *  When used, an architecture is expected to provide __tlb_remove_table()
+ *  which does the actual freeing of these pages.
+ *
+ *  HAVE_RCU_TABLE_INVALIDATE
+ *
+ *  This makes HAVE_RCU_TABLE_FREE call tlb_flush_mmu_tlbonly() before freeing
+ *  the page-table pages. Required if you use HAVE_RCU_TABLE_FREE and your
+ *  architecture uses the Linux page-tables natively.
+ *
+ */
+#define HAVE_GENERIC_MMU_GATHER
+
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 /*
  * Semi RCU freeing of the page directories.
@@ -89,14 +201,17 @@ struct mmu_gather_batch {
  */
 #define MAX_GATHER_BATCH_COUNT	(10000UL/MAX_GATHER_BATCH)
 
-/* struct mmu_gather is an opaque type used by the mm code for passing around
+/*
+ * struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
  */
 struct mmu_gather {
 	struct mm_struct	*mm;
+
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	struct mmu_table_batch	*batch;
 #endif
+
 	unsigned long		start;
 	unsigned long		end;
 	/*
@@ -131,8 +246,6 @@ struct mmu_gather {
 	int page_size;
 };
 
-#define HAVE_GENERIC_MMU_GATHER
-
 void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
-- 
GitLab


From ed6a79352cad00e9a49d6e438be40e45107207bf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 31 Aug 2018 14:46:08 +0200
Subject: [PATCH 0525/1861] asm-generic/tlb, arch: Provide
 CONFIG_HAVE_MMU_GATHER_PAGE_SIZE

Move the mmu_gather::page_size things into the generic code instead of
PowerPC specific bits.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig                   |  3 +++
 arch/arm/include/asm/tlb.h     |  3 +--
 arch/ia64/include/asm/tlb.h    |  3 +--
 arch/powerpc/Kconfig           |  1 +
 arch/powerpc/include/asm/tlb.h | 17 -----------------
 arch/s390/include/asm/tlb.h    |  4 +---
 arch/sh/include/asm/tlb.h      |  4 +---
 arch/um/include/asm/tlb.h      |  4 +---
 include/asm-generic/tlb.h      | 32 +++++++++++++++++++-------------
 mm/huge_memory.c               |  4 ++--
 mm/hugetlb.c                   |  2 +-
 mm/madvise.c                   |  2 +-
 mm/memory.c                    |  4 ++--
 mm/mmu_gather.c                |  5 +++++
 14 files changed, 39 insertions(+), 49 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 33687dddd86a7..cdc7f3d5d2783 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -386,6 +386,9 @@ config HAVE_RCU_TABLE_FREE
 config HAVE_RCU_TABLE_INVALIDATE
 	bool
 
+config HAVE_MMU_GATHER_PAGE_SIZE
+	bool
+
 config ARCH_HAVE_NMI_SAFE_CMPXCHG
 	bool
 
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index f854148c8d7c2..d644c3c7c6f38 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -286,8 +286,7 @@ tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr
 
 #define tlb_migrate_finish(mm)		do { } while (0)
 
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
+static inline void tlb_change_page_size(struct mmu_gather *tlb,
 						     unsigned int page_size)
 {
 }
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 516355a774bfe..bf8985f5f876d 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -282,8 +282,7 @@ do {							\
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
 	tlb_remove_tlb_entry(tlb, ptep, address)
 
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
+static inline void tlb_change_page_size(struct mmu_gather *tlb,
 						     unsigned int page_size)
 {
 }
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d0be82c30619..a7aa4feabc09f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -218,6 +218,7 @@ config PPC
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE		if SMP
+	select HAVE_MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index e24c67d5ba75a..b018e9f9b491a 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -27,7 +27,6 @@
 #define tlb_start_vma(tlb, vma)	do { } while (0)
 #define tlb_end_vma(tlb, vma)	do { } while (0)
 #define __tlb_remove_tlb_entry	__tlb_remove_tlb_entry
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
 
 extern void tlb_flush(struct mmu_gather *tlb);
 
@@ -46,22 +45,6 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
 #endif
 }
 
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
-						     unsigned int page_size)
-{
-	if (!tlb->page_size)
-		tlb->page_size = page_size;
-	else if (tlb->page_size != page_size) {
-		if (!tlb->fullmm)
-			tlb_flush_mmu(tlb);
-		/*
-		 * update the page size after flush for the new
-		 * mmu_gather.
-		 */
-		tlb->page_size = page_size;
-	}
-}
-
 #ifdef CONFIG_SMP
 static inline int mm_is_core_local(struct mm_struct *mm)
 {
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b31c779cf5817..9941a1442a884 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -180,9 +180,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
 	tlb_remove_tlb_entry(tlb, ptep, address)
 
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
-						     unsigned int page_size)
+static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size)
 {
 }
 
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 77abe192fb43d..af7c9d891cf80 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -127,9 +127,7 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 	return tlb_remove_page(tlb, page);
 }
 
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
-						     unsigned int page_size)
+static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size)
 {
 }
 
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index dce6db147f245..6463f3ab1767f 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -146,9 +146,7 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
 	tlb_remove_tlb_entry(tlb, ptep, address)
 
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
-						     unsigned int page_size)
+static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size)
 {
 }
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index f1594ba8b2de6..e75620e41ba4c 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -61,7 +61,7 @@
  *    tlb_remove_page() and tlb_remove_page_size() imply the call to
  *    tlb_flush_mmu() when required and has no return value.
  *
- *  - tlb_remove_check_page_size_change()
+ *  - tlb_change_page_size()
  *
  *    call before __tlb_remove_page*() to set the current page-size; implies a
  *    possible tlb_flush_mmu() call.
@@ -114,6 +114,11 @@
  *
  * Additionally there are a few opt-in features:
  *
+ *  HAVE_MMU_GATHER_PAGE_SIZE
+ *
+ *  This ensures we call tlb_flush() every time tlb_change_page_size() actually
+ *  changes the size and provides mmu_gather::page_size to tlb_flush().
+ *
  *  HAVE_RCU_TABLE_FREE
  *
  *  This provides tlb_remove_table(), to be used instead of tlb_remove_page()
@@ -239,11 +244,15 @@ struct mmu_gather {
 	unsigned int		cleared_puds : 1;
 	unsigned int		cleared_p4ds : 1;
 
+	unsigned int		batch_count;
+
 	struct mmu_gather_batch *active;
 	struct mmu_gather_batch	local;
 	struct page		*__pages[MMU_GATHER_BUNDLE];
-	unsigned int		batch_count;
-	int page_size;
+
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+	unsigned int page_size;
+#endif
 };
 
 void arch_tlb_gather_mmu(struct mmu_gather *tlb,
@@ -309,21 +318,18 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 	return tlb_remove_page_size(tlb, page, PAGE_SIZE);
 }
 
-#ifndef tlb_remove_check_page_size_change
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
+static inline void tlb_change_page_size(struct mmu_gather *tlb,
 						     unsigned int page_size)
 {
-	/*
-	 * We don't care about page size change, just update
-	 * mmu_gather page size here so that debug checks
-	 * doesn't throw false warning.
-	 */
-#ifdef CONFIG_DEBUG_VM
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+	if (tlb->page_size && tlb->page_size != page_size) {
+		if (!tlb->fullmm)
+			tlb_flush_mmu(tlb);
+	}
+
 	tlb->page_size = page_size;
 #endif
 }
-#endif
 
 static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 404acdcd0455d..76b75112a259d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1641,7 +1641,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	struct mm_struct *mm = tlb->mm;
 	bool ret = false;
 
-	tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
+	tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
 
 	ptl = pmd_trans_huge_lock(pmd, vma);
 	if (!ptl)
@@ -1717,7 +1717,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	pmd_t orig_pmd;
 	spinlock_t *ptl;
 
-	tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
+	tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
 
 	ptl = __pmd_trans_huge_lock(pmd, vma);
 	if (!ptl)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 97b1e0290c66d..3fc37a626b521 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3353,7 +3353,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	 * This is a hugetlb vma, all the pte entries should point
 	 * to huge page.
 	 */
-	tlb_remove_check_page_size_change(tlb, sz);
+	tlb_change_page_size(tlb, sz);
 	tlb_start_vma(tlb, vma);
 
 	/*
diff --git a/mm/madvise.c b/mm/madvise.c
index 21a7881a2db41..bb3a4554d5d56 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -328,7 +328,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 	if (pmd_trans_unstable(pmd))
 		return 0;
 
-	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
+	tlb_change_page_size(tlb, PAGE_SIZE);
 	orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	flush_tlb_batched_pending(mm);
 	arch_enter_lazy_mmu_mode();
diff --git a/mm/memory.c b/mm/memory.c
index ab650c21bccd5..1aa5c03566f11 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -356,7 +356,7 @@ void free_pgd_range(struct mmu_gather *tlb,
 	 * We add page table cache pages with PAGE_SIZE,
 	 * (see pte_free_tlb()), flush the tlb if we need
 	 */
-	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
+	tlb_change_page_size(tlb, PAGE_SIZE);
 	pgd = pgd_offset(tlb->mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
@@ -1046,7 +1046,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	pte_t *pte;
 	swp_entry_t entry;
 
-	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
+	tlb_change_page_size(tlb, PAGE_SIZE);
 again:
 	init_rss_vec(rss);
 	start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index f2f03c6558070..14dfc97155e40 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -58,7 +58,9 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb->batch = NULL;
 #endif
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
 	tlb->page_size = 0;
+#endif
 
 	__tlb_reset_range(tlb);
 }
@@ -121,7 +123,10 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 	struct mmu_gather_batch *batch;
 
 	VM_BUG_ON(!tlb->end);
+
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
 	VM_WARN_ON(tlb->page_size != page_size);
+#endif
 
 	batch = tlb->active;
 	/*
-- 
GitLab


From e7fd28a706bfaf9cd65dccf18140187f7ad04839 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 27 Aug 2018 13:00:17 +0200
Subject: [PATCH 0526/1861] asm-generic/tlb, arch: Provide generic VIPT cache
 flush

The one obvious thing SH and ARM want is a sensible default for
tlb_start_vma(). (also: https://lkml.org/lkml/2004/1/15/6 )

Avoid all VIPT architectures providing their own tlb_start_vma()
implementation and rely on architectures to provide a no-op
flush_cache_range() when it is not relevant.

This patch makes tlb_start_vma() default to flush_cache_range(), which
should be right and sufficient. The only exceptions that I found where
(oddly):

  - m68k-mmu
  - sparc64
  - unicore

Those architectures appear to have flush_cache_range(), but their
current tlb_start_vma() does not call it.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arc/include/asm/tlb.h      |  9 ---------
 arch/mips/include/asm/tlb.h     |  9 ---------
 arch/nds32/include/asm/tlb.h    |  6 ------
 arch/nios2/include/asm/tlb.h    | 10 ----------
 arch/parisc/include/asm/tlb.h   |  5 -----
 arch/sparc/include/asm/tlb_32.h |  5 -----
 arch/xtensa/include/asm/tlb.h   |  9 ---------
 include/asm-generic/tlb.h       | 19 +++++++++++--------
 8 files changed, 11 insertions(+), 61 deletions(-)

diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h
index a9db5f62aaf37..7af2b373ebe75 100644
--- a/arch/arc/include/asm/tlb.h
+++ b/arch/arc/include/asm/tlb.h
@@ -23,15 +23,6 @@ do {						\
  *
  * Note, read http://lkml.org/lkml/2004/1/15/6
  */
-#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING
-#define tlb_start_vma(tlb, vma)
-#else
-#define tlb_start_vma(tlb, vma)						\
-do {									\
-	if (!tlb->fullmm)						\
-		flush_cache_range(vma, vma->vm_start, vma->vm_end);	\
-} while(0)
-#endif
 
 #define tlb_end_vma(tlb, vma)						\
 do {									\
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index b6823b9e94dad..32b8a8187733d 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -5,15 +5,6 @@
 #include <asm/cpu-features.h>
 #include <asm/mipsregs.h>
 
-/*
- * MIPS doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma)					\
-	do {							\
-		if (!tlb->fullmm)				\
-			flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-	}  while (0)
 #define tlb_end_vma(tlb, vma) do { } while (0)
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index b35ae5eae3ab3..0bf7c94823819 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -4,12 +4,6 @@
 #ifndef __ASMNDS32_TLB_H
 #define __ASMNDS32_TLB_H
 
-#define tlb_start_vma(tlb,vma)						\
-	do {								\
-		if (!tlb->fullmm)					\
-			flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-	} while (0)
-
 #define tlb_end_vma(tlb,vma)				\
 	do { 						\
 		if(!tlb->fullmm)			\
diff --git a/arch/nios2/include/asm/tlb.h b/arch/nios2/include/asm/tlb.h
index d3bc648e08b5d..9b518c6d0f622 100644
--- a/arch/nios2/include/asm/tlb.h
+++ b/arch/nios2/include/asm/tlb.h
@@ -15,16 +15,6 @@
 
 extern void set_mmu_pid(unsigned long pid);
 
-/*
- * NiosII doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for the area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma)					\
-	do {							\
-		if (!tlb->fullmm)				\
-			flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-	}  while (0)
-
 #define tlb_end_vma(tlb, vma)	do { } while (0)
 #define __tlb_remove_tlb_entry(tlb, ptep, address)	do { } while (0)
 
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 0c881e74d8a62..b1984f9cd3af4 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -7,11 +7,6 @@ do {	if ((tlb)->fullmm)		\
 		flush_tlb_mm((tlb)->mm);\
 } while (0)
 
-#define tlb_start_vma(tlb, vma) \
-do {	if (!(tlb)->fullmm)	\
-		flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
 #define tlb_end_vma(tlb, vma)	\
 do {	if (!(tlb)->fullmm)	\
 		flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
diff --git a/arch/sparc/include/asm/tlb_32.h b/arch/sparc/include/asm/tlb_32.h
index 343cea19e5735..68d817273de87 100644
--- a/arch/sparc/include/asm/tlb_32.h
+++ b/arch/sparc/include/asm/tlb_32.h
@@ -2,11 +2,6 @@
 #ifndef _SPARC_TLB_H
 #define _SPARC_TLB_H
 
-#define tlb_start_vma(tlb, vma) \
-do {								\
-	flush_cache_range(vma, vma->vm_start, vma->vm_end);	\
-} while (0)
-
 #define tlb_end_vma(tlb, vma) \
 do {								\
 	flush_tlb_range(vma, vma->vm_start, vma->vm_end);	\
diff --git a/arch/xtensa/include/asm/tlb.h b/arch/xtensa/include/asm/tlb.h
index 0d766f9c1083a..1a93e350382e7 100644
--- a/arch/xtensa/include/asm/tlb.h
+++ b/arch/xtensa/include/asm/tlb.h
@@ -16,19 +16,10 @@
 
 #if (DCACHE_WAY_SIZE <= PAGE_SIZE)
 
-/* Note, read http://lkml.org/lkml/2004/1/15/6 */
-
-# define tlb_start_vma(tlb,vma)			do { } while (0)
 # define tlb_end_vma(tlb,vma)			do { } while (0)
 
 #else
 
-# define tlb_start_vma(tlb, vma)					      \
-	do {								      \
-		if (!tlb->fullmm)					      \
-			flush_cache_range(vma, vma->vm_start, vma->vm_end);   \
-	} while(0)
-
 # define tlb_end_vma(tlb, vma)						      \
 	do {								      \
 		if (!tlb->fullmm)					      \
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e75620e41ba4c..f0aa53db5e60b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -19,6 +19,7 @@
 #include <linux/swap.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
 
 #ifdef CONFIG_MMU
 
@@ -356,17 +357,19 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
  * the vmas are adjusted to only cover the region to be torn down.
  */
 #ifndef tlb_start_vma
-#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_start_vma(tlb, vma)						\
+do {									\
+	if (!tlb->fullmm)						\
+		flush_cache_range(vma, vma->vm_start, vma->vm_end);	\
+} while (0)
 #endif
 
-#define __tlb_end_vma(tlb, vma)					\
-	do {							\
-		if (!tlb->fullmm)				\
-			tlb_flush_mmu_tlbonly(tlb);		\
-	} while (0)
-
 #ifndef tlb_end_vma
-#define tlb_end_vma	__tlb_end_vma
+#define tlb_end_vma(tlb, vma)						\
+do {									\
+	if (!tlb->fullmm)						\
+		tlb_flush_mmu_tlbonly(tlb);				\
+} while (0)
 #endif
 
 #ifndef __tlb_remove_tlb_entry
-- 
GitLab


From 5f307be18b32aeff7bbad540c0d3897ecedbeb56 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 Sep 2018 13:18:15 +0200
Subject: [PATCH 0527/1861] asm-generic/tlb, arch: Provide generic tlb_flush()
 based on flush_tlb_range()

Provide a generic tlb_flush() implementation that relies on
flush_tlb_range(). This is a little awkward because flush_tlb_range()
assumes a VMA for range invalidation, but we no longer have one.

Audit of all flush_tlb_range() implementations shows only vma->vm_mm
and vma->vm_flags are used, and of the latter only VM_EXEC (I-TLB
invalidates) and VM_HUGETLB (large TLB invalidate) are used.

Therefore, track VM_EXEC and VM_HUGETLB in two more bits, and create a
'fake' VMA.

This allows architectures that have a reasonably efficient
flush_tlb_range() to not require any additional effort.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/include/asm/tlb.h   |  1 +
 arch/powerpc/include/asm/tlb.h |  1 +
 arch/riscv/include/asm/tlb.h   |  1 +
 arch/x86/include/asm/tlb.h     |  1 +
 include/asm-generic/tlb.h      | 95 +++++++++++++++++++++++++++++-----
 5 files changed, 87 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 106fdc951b6ee..37603b5616a58 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -27,6 +27,7 @@ static inline void __tlb_remove_table(void *_table)
 	free_page_and_swap_cache((struct page *)_table);
 }
 
+#define tlb_flush tlb_flush
 static void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index b018e9f9b491a..34fba1ce27f7c 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -28,6 +28,7 @@
 #define tlb_end_vma(tlb, vma)	do { } while (0)
 #define __tlb_remove_tlb_entry	__tlb_remove_tlb_entry
 
+#define tlb_flush tlb_flush
 extern void tlb_flush(struct mmu_gather *tlb);
 
 /* Get the generic bits... */
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 439dc7072e05b..1ad8d093c58b8 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -18,6 +18,7 @@ struct mmu_gather;
 
 static void tlb_flush(struct mmu_gather *tlb);
 
+#define tlb_flush tlb_flush
 #include <asm-generic/tlb.h>
 
 static inline void tlb_flush(struct mmu_gather *tlb)
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 404b8b1d44f58..f23e7aaff4cd0 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,7 @@
 #define tlb_end_vma(tlb, vma) do { } while (0)
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 
+#define tlb_flush tlb_flush
 static inline void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index f0aa53db5e60b..e6a4c407be6cc 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -95,7 +95,7 @@
  *    flush the entire TLB irrespective of the range. For instance
  *    x86-PAE needs this when changing top-level entries.
  *
- * And requires the architecture to provide and implement tlb_flush().
+ * And allows the architecture to provide and implement tlb_flush():
  *
  * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make
  * use of:
@@ -111,7 +111,10 @@
  *
  *  - tlb_get_unmap_shift() / tlb_get_unmap_size()
  *
- *    returns the smallest TLB entry size unmapped in this range
+ *    returns the smallest TLB entry size unmapped in this range.
+ *
+ * If an architecture does not provide tlb_flush() a default implementation
+ * based on flush_tlb_range() will be used.
  *
  * Additionally there are a few opt-in features:
  *
@@ -245,6 +248,12 @@ struct mmu_gather {
 	unsigned int		cleared_puds : 1;
 	unsigned int		cleared_p4ds : 1;
 
+	/*
+	 * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma
+	 */
+	unsigned int		vma_exec : 1;
+	unsigned int		vma_huge : 1;
+
 	unsigned int		batch_count;
 
 	struct mmu_gather_batch *active;
@@ -286,8 +295,59 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
 	tlb->cleared_pmds = 0;
 	tlb->cleared_puds = 0;
 	tlb->cleared_p4ds = 0;
+	/*
+	 * Do not reset mmu_gather::vma_* fields here, we do not
+	 * call into tlb_start_vma() again to set them if there is an
+	 * intermediate flush.
+	 */
+}
+
+#ifndef tlb_flush
+
+#if defined(tlb_start_vma) || defined(tlb_end_vma)
+#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma()
+#endif
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	if (tlb->fullmm || tlb->need_flush_all) {
+		flush_tlb_mm(tlb->mm);
+	} else if (tlb->end) {
+		struct vm_area_struct vma = {
+			.vm_mm = tlb->mm,
+			.vm_flags = (tlb->vma_exec ? VM_EXEC    : 0) |
+				    (tlb->vma_huge ? VM_HUGETLB : 0),
+		};
+
+		flush_tlb_range(&vma, tlb->start, tlb->end);
+	}
+}
+
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+	/*
+	 * flush_tlb_range() implementations that look at VM_HUGETLB (tile,
+	 * mips-4k) flush only large pages.
+	 *
+	 * flush_tlb_range() implementations that flush I-TLB also flush D-TLB
+	 * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing
+	 * range.
+	 *
+	 * We rely on tlb_end_vma() to issue a flush, such that when we reset
+	 * these values the batch is empty.
+	 */
+	tlb->vma_huge = !!(vma->vm_flags & VM_HUGETLB);
+	tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
 }
 
+#else
+
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#endif
+
 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
 	if (!tlb->end)
@@ -357,19 +417,30 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
  * the vmas are adjusted to only cover the region to be torn down.
  */
 #ifndef tlb_start_vma
-#define tlb_start_vma(tlb, vma)						\
-do {									\
-	if (!tlb->fullmm)						\
-		flush_cache_range(vma, vma->vm_start, vma->vm_end);	\
-} while (0)
+static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+	if (tlb->fullmm)
+		return;
+
+	tlb_update_vma_flags(tlb, vma);
+	flush_cache_range(vma, vma->vm_start, vma->vm_end);
+}
 #endif
 
 #ifndef tlb_end_vma
-#define tlb_end_vma(tlb, vma)						\
-do {									\
-	if (!tlb->fullmm)						\
-		tlb_flush_mmu_tlbonly(tlb);				\
-} while (0)
+static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+	if (tlb->fullmm)
+		return;
+
+	/*
+	 * Do a TLB flush and reset the range at VMA boundaries; this avoids
+	 * the ranges growing with the unused space between consecutive VMAs,
+	 * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
+	 * this.
+	 */
+	tlb_flush_mmu_tlbonly(tlb);
+}
 #endif
 
 #ifndef __tlb_remove_tlb_entry
-- 
GitLab


From a30e32bd79e924f460b8b83408d88af34a402f6d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 11 Oct 2018 16:51:51 +0200
Subject: [PATCH 0528/1861] asm-generic/tlb: Provide generic tlb_flush() based
 on flush_tlb_mm()

When an architecture does not have (an efficient) flush_tlb_range(),
but instead always uses full TLB invalidates, the current generic
tlb_flush() is sub-optimal, for it will generate extra flushes in
order to keep the range small.

But if we cannot do range flushes, that is a moot concern. Optionally
provide this simplified default.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/tlb.h | 41 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e6a4c407be6cc..6850d8bab6d36 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -114,7 +114,8 @@
  *    returns the smallest TLB entry size unmapped in this range.
  *
  * If an architecture does not provide tlb_flush() a default implementation
- * based on flush_tlb_range() will be used.
+ * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is
+ * specified, in which case we'll default to flush_tlb_mm().
  *
  * Additionally there are a few opt-in features:
  *
@@ -140,6 +141,9 @@
  *  the page-table pages. Required if you use HAVE_RCU_TABLE_FREE and your
  *  architecture uses the Linux page-tables natively.
  *
+ *  MMU_GATHER_NO_RANGE
+ *
+ *  Use this if your architecture lacks an efficient flush_tlb_range().
  */
 #define HAVE_GENERIC_MMU_GATHER
 
@@ -302,12 +306,45 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
 	 */
 }
 
+#ifdef CONFIG_MMU_GATHER_NO_RANGE
+
+#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma)
+#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma()
+#endif
+
+/*
+ * When an architecture does not have efficient means of range flushing TLBs
+ * there is no point in doing intermediate flushes on tlb_end_vma() to keep the
+ * range small. We equally don't have to worry about page granularity or other
+ * things.
+ *
+ * All we need to do is issue a full flush for any !0 range.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	if (tlb->end)
+		flush_tlb_mm(tlb->mm);
+}
+
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#define tlb_end_vma tlb_end_vma
+static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#else /* CONFIG_MMU_GATHER_NO_RANGE */
+
 #ifndef tlb_flush
 
 #if defined(tlb_start_vma) || defined(tlb_end_vma)
 #error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma()
 #endif
 
+/*
+ * When an architecture does not provide its own tlb_flush() implementation
+ * but does have a reasonably efficient flush_vma_range() implementation
+ * use that.
+ */
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	if (tlb->fullmm || tlb->need_flush_all) {
@@ -348,6 +385,8 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
 
 #endif
 
+#endif /* CONFIG_MMU_GATHER_NO_RANGE */
+
 static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
 	if (!tlb->end)
-- 
GitLab


From 8b6dd0c47894e2c190560914318aa4181bc8c2f2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 Sep 2018 16:00:53 +0200
Subject: [PATCH 0529/1861] asm-generic/tlb, ia64: Conditionally provide
 tlb_migrate_finish()

Needed for ia64 -- alternatively we drop the entire hook.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/tlb.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 6850d8bab6d36..1c861989b704c 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -604,6 +604,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
 
 #endif /* CONFIG_MMU */
 
+#ifndef tlb_migrate_finish
 #define tlb_migrate_finish(mm) do {} while (0)
+#endif
 
 #endif /* _ASM_GENERIC__TLB_H */
-- 
GitLab


From 96bc9567cbe112e9320250f01b9c060c882e8619 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 19 Sep 2018 13:24:41 +0200
Subject: [PATCH 0530/1861] asm-generic/tlb, arch: Invert
 CONFIG_HAVE_RCU_TABLE_INVALIDATE

Make issuing a TLB invalidate for page-table pages the normal case.

The reason is twofold:

 - too many invalidates is safer than too few,
 - most architectures use the linux page-tables natively
   and would thus require this.

Make it an opt-out, instead of an opt-in.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig              | 2 +-
 arch/arm64/Kconfig        | 1 -
 arch/powerpc/Kconfig      | 1 +
 arch/sparc/Kconfig        | 1 +
 arch/x86/Kconfig          | 1 -
 include/asm-generic/tlb.h | 9 +++++----
 mm/mmu_gather.c           | 2 +-
 7 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index cdc7f3d5d2783..04b3e8b94cfe2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -383,7 +383,7 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
 config HAVE_RCU_TABLE_FREE
 	bool
 
-config HAVE_RCU_TABLE_INVALIDATE
+config HAVE_RCU_TABLE_NO_INVALIDATE
 	bool
 
 config HAVE_MMU_GATHER_PAGE_SIZE
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de1..78d9fafac9836 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -149,7 +149,6 @@ config ARM64
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RCU_TABLE_FREE
-	select HAVE_RCU_TABLE_INVALIDATE
 	select HAVE_RSEQ
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a7aa4feabc09f..8e1e2abf17eb1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -218,6 +218,7 @@ config PPC
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE		if SMP
+	select HAVE_RCU_TABLE_NO_INVALIDATE	if HAVE_RCU_TABLE_FREE
 	select HAVE_MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 40f8f4f73fe8f..db79290ed6d5c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -63,6 +63,7 @@ config SPARC64
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
 	select HAVE_RCU_TABLE_FREE if SMP
+	select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_DYNAMIC_FTRACE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19c..b0f30d86c23fa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -183,7 +183,6 @@ config X86
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE		if PARAVIRT
-	select HAVE_RCU_TABLE_INVALIDATE	if HAVE_RCU_TABLE_FREE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
 	select HAVE_FUNCTION_ARG_ACCESS_API
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 1c861989b704c..81799e6a43044 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -135,11 +135,12 @@
  *  When used, an architecture is expected to provide __tlb_remove_table()
  *  which does the actual freeing of these pages.
  *
- *  HAVE_RCU_TABLE_INVALIDATE
+ *  HAVE_RCU_TABLE_NO_INVALIDATE
  *
- *  This makes HAVE_RCU_TABLE_FREE call tlb_flush_mmu_tlbonly() before freeing
- *  the page-table pages. Required if you use HAVE_RCU_TABLE_FREE and your
- *  architecture uses the Linux page-tables natively.
+ *  This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
+ *  freeing the page-table pages. This can be avoided if you use
+ *  HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
+ *  page-tables natively.
  *
  *  MMU_GATHER_NO_RANGE
  *
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 14dfc97155e40..2a5322d52b0aa 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -157,7 +157,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
  */
 static inline void tlb_table_invalidate(struct mmu_gather *tlb)
 {
-#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
 	/*
 	 * Invalidate page-table caches used by hardware walkers. Then we still
 	 * need to RCU-sched wait while freeing the pages because software
-- 
GitLab


From b78180b97dcf667350aac716cd3f32356eaf4984 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 Sep 2018 14:09:30 +0200
Subject: [PATCH 0531/1861] arm/tlb: Convert to generic mmu_gather

Generic mmu_gather provides everything that ARM needs:

 - range tracking
 - RCU table free
 - VM_EXEC tracking
 - VIPT cache flushing

The one notable curiosity is the 'funny' range tracking for classical
ARM in __pte_free_tlb().

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/tlb.h | 254 ++-----------------------------------
 1 file changed, 13 insertions(+), 241 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index d644c3c7c6f38..bc6d04a098998 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -33,270 +33,42 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
-#define MMU_GATHER_BUNDLE	8
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 static inline void __tlb_remove_table(void *_table)
 {
 	free_page_and_swap_cache((struct page *)_table);
 }
 
-struct mmu_table_batch {
-	struct rcu_head		rcu;
-	unsigned int		nr;
-	void			*tables[0];
-};
-
-#define MAX_TABLE_BATCH		\
-	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-
-extern void tlb_table_flush(struct mmu_gather *tlb);
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-#define tlb_remove_entry(tlb, entry)	tlb_remove_table(tlb, entry)
-#else
-#define tlb_remove_entry(tlb, entry)	tlb_remove_page(tlb, entry)
-#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
-
-/*
- * TLB handling.  This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
-	struct mm_struct	*mm;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-	struct mmu_table_batch	*batch;
-	unsigned int		need_flush;
-#endif
-	unsigned int		fullmm;
-	struct vm_area_struct	*vma;
-	unsigned long		start, end;
-	unsigned long		range_start;
-	unsigned long		range_end;
-	unsigned int		nr;
-	unsigned int		max;
-	struct page		**pages;
-	struct page		*local[MMU_GATHER_BUNDLE];
-};
-
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-/*
- * This is unnecessarily complex.  There's three ways the TLB shootdown
- * code is used:
- *  1. Unmapping a range of vmas.  See zap_page_range(), unmap_region().
- *     tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
- *     tlb->vma will be non-NULL.
- *  2. Unmapping all vmas.  See exit_mmap().
- *     tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
- *     tlb->vma will be non-NULL.  Additionally, page tables will be freed.
- *  3. Unmapping argument pages.  See shift_arg_pages().
- *     tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
- *     tlb->vma will be NULL.
- */
-static inline void tlb_flush(struct mmu_gather *tlb)
-{
-	if (tlb->fullmm || !tlb->vma)
-		flush_tlb_mm(tlb->mm);
-	else if (tlb->range_end > 0) {
-		flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
-		tlb->range_start = TASK_SIZE;
-		tlb->range_end = 0;
-	}
-}
-
-static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
-{
-	if (!tlb->fullmm) {
-		if (addr < tlb->range_start)
-			tlb->range_start = addr;
-		if (addr + PAGE_SIZE > tlb->range_end)
-			tlb->range_end = addr + PAGE_SIZE;
-	}
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
-	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
-	if (addr) {
-		tlb->pages = (void *)addr;
-		tlb->max = PAGE_SIZE / sizeof(struct page *);
-	}
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-	tlb_flush(tlb);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-	tlb_table_flush(tlb);
-#endif
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-	free_pages_and_swap_cache(tlb->pages, tlb->nr);
-	tlb->nr = 0;
-	if (tlb->pages == tlb->local)
-		__tlb_alloc_page(tlb);
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-	tlb_flush_mmu_tlbonly(tlb);
-	tlb_flush_mmu_free(tlb);
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-			unsigned long start, unsigned long end)
-{
-	tlb->mm = mm;
-	tlb->fullmm = !(start | (end+1));
-	tlb->start = start;
-	tlb->end = end;
-	tlb->vma = NULL;
-	tlb->max = ARRAY_SIZE(tlb->local);
-	tlb->pages = tlb->local;
-	tlb->nr = 0;
-	__tlb_alloc_page(tlb);
+#include <asm-generic/tlb.h>
 
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-	tlb->batch = NULL;
+#ifndef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_remove_table(tlb, entry) tlb_remove_page(tlb, entry)
 #endif
-}
-
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end, bool force)
-{
-	if (force) {
-		tlb->range_start = start;
-		tlb->range_end = end;
-	}
-
-	tlb_flush_mmu(tlb);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-
-	if (tlb->pages != tlb->local)
-		free_pages((unsigned long)tlb->pages, 0);
-}
-
-/*
- * Memorize the range for the TLB flush.
- */
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
-{
-	tlb_add_flush(tlb, addr);
-}
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
-	tlb_remove_tlb_entry(tlb, ptep, address)
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush.  When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
-	if (!tlb->fullmm) {
-		flush_cache_range(vma, vma->vm_start, vma->vm_end);
-		tlb->vma = vma;
-		tlb->range_start = TASK_SIZE;
-		tlb->range_end = 0;
-	}
-}
 
 static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
-	if (!tlb->fullmm)
-		tlb_flush(tlb);
-}
-
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	tlb->pages[tlb->nr++] = page;
-	VM_WARN_ON(tlb->nr > tlb->max);
-	if (tlb->nr == tlb->max)
-		return true;
-	return false;
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	if (__tlb_remove_page(tlb, page))
-		tlb_flush_mmu(tlb);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size)
-{
-	return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
-					struct page *page, int page_size)
-{
-	return tlb_remove_page(tlb, page);
-}
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
-	unsigned long addr)
+__pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
 {
 	pgtable_page_dtor(pte);
 
-#ifdef CONFIG_ARM_LPAE
-	tlb_add_flush(tlb, addr);
-#else
+#ifndef CONFIG_ARM_LPAE
 	/*
 	 * With the classic ARM MMU, a pte page has two corresponding pmd
 	 * entries, each covering 1MB.
 	 */
-	addr &= PMD_MASK;
-	tlb_add_flush(tlb, addr + SZ_1M - PAGE_SIZE);
-	tlb_add_flush(tlb, addr + SZ_1M);
+	addr = (addr & PMD_MASK) + SZ_1M;
+	__tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE);
 #endif
 
-	tlb_remove_entry(tlb, pte);
-}
-
-static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
-				  unsigned long addr)
-{
-#ifdef CONFIG_ARM_LPAE
-	tlb_add_flush(tlb, addr);
-	tlb_remove_entry(tlb, virt_to_page(pmdp));
-#endif
+	tlb_remove_table(tlb, pte);
 }
 
 static inline void
-tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
-{
-	tlb_add_flush(tlb, addr);
-}
-
-#define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
-#define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
-#define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm)		do { } while (0)
-
-static inline void tlb_change_page_size(struct mmu_gather *tlb,
-						     unsigned int page_size)
+__pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
 {
-}
-
-static inline void tlb_flush_remove_tables(struct mm_struct *mm)
-{
-}
+#ifdef CONFIG_ARM_LPAE
+	struct page *page = virt_to_page(pmdp);
 
-static inline void tlb_flush_remove_tables_local(void *arg)
-{
+	tlb_remove_table(tlb, page);
+#endif
 }
 
 #endif /* CONFIG_MMU */
-- 
GitLab


From e154700774e83264386483aa50bbd5cec44c2d7a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 Sep 2018 14:31:27 +0200
Subject: [PATCH 0532/1861] ia64/tlb: Convert to generic mmu_gather

Generic mmu_gather provides everything ia64 needs (range tracking).

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/include/asm/tlb.h      | 256 +------------------------------
 arch/ia64/include/asm/tlbflush.h |  25 +++
 arch/ia64/mm/tlb.c               |  23 ++-
 3 files changed, 47 insertions(+), 257 deletions(-)

diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index bf8985f5f876d..849fab9ccb333 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -47,262 +47,8 @@
 #include <asm/tlbflush.h>
 #include <asm/machvec.h>
 
-/*
- * If we can't allocate a page to make a big batch of page pointers
- * to work on, then just handle a few from the on-stack structure.
- */
-#define	IA64_GATHER_BUNDLE	8
-
-struct mmu_gather {
-	struct mm_struct	*mm;
-	unsigned int		nr;
-	unsigned int		max;
-	unsigned char		fullmm;		/* non-zero means full mm flush */
-	unsigned char		need_flush;	/* really unmapped some PTEs? */
-	unsigned long		start, end;
-	unsigned long		start_addr;
-	unsigned long		end_addr;
-	struct page		**pages;
-	struct page		*local[IA64_GATHER_BUNDLE];
-};
-
-struct ia64_tr_entry {
-	u64 ifa;
-	u64 itir;
-	u64 pte;
-	u64 rr;
-}; /*Record for tr entry!*/
-
-extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
-extern void ia64_ptr_entry(u64 target_mask, int slot);
-
-extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
-
-/*
- region register macros
-*/
-#define RR_TO_VE(val)   (((val) >> 0) & 0x0000000000000001)
-#define RR_VE(val)	(((val) & 0x0000000000000001) << 0)
-#define RR_VE_MASK	0x0000000000000001L
-#define RR_VE_SHIFT	0
-#define RR_TO_PS(val)	(((val) >> 2) & 0x000000000000003f)
-#define RR_PS(val)	(((val) & 0x000000000000003f) << 2)
-#define RR_PS_MASK	0x00000000000000fcL
-#define RR_PS_SHIFT	2
-#define RR_RID_MASK	0x00000000ffffff00L
-#define RR_TO_RID(val) 	((val >> 8) & 0xffffff)
-
-static inline void
-ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
-	tlb->need_flush = 0;
-
-	if (tlb->fullmm) {
-		/*
-		 * Tearing down the entire address space.  This happens both as a result
-		 * of exit() and execve().  The latter case necessitates the call to
-		 * flush_tlb_mm() here.
-		 */
-		flush_tlb_mm(tlb->mm);
-	} else if (unlikely (end - start >= 1024*1024*1024*1024UL
-			     || REGION_NUMBER(start) != REGION_NUMBER(end - 1)))
-	{
-		/*
-		 * If we flush more than a tera-byte or across regions, we're probably
-		 * better off just flushing the entire TLB(s).  This should be very rare
-		 * and is not worth optimizing for.
-		 */
-		flush_tlb_all();
-	} else {
-		/*
-		 * flush_tlb_range() takes a vma instead of a mm pointer because
-		 * some architectures want the vm_flags for ITLB/DTLB flush.
-		 */
-		struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
-
-		/* flush the address range from the tlb: */
-		flush_tlb_range(&vma, start, end);
-		/* now flush the virt. page-table area mapping the address range: */
-		flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
-	}
-
-}
-
-static inline void
-ia64_tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-	unsigned long i;
-	unsigned int nr;
-
-	/* lastly, release the freed pages */
-	nr = tlb->nr;
-
-	tlb->nr = 0;
-	tlb->start_addr = ~0UL;
-	for (i = 0; i < nr; ++i)
-		free_page_and_swap_cache(tlb->pages[i]);
-}
-
-/*
- * Flush the TLB for address range START to END and, if not in fast mode, release the
- * freed pages that where gathered up to this point.
- */
-static inline void
-ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
-	if (!tlb->need_flush)
-		return;
-	ia64_tlb_flush_mmu_tlbonly(tlb, start, end);
-	ia64_tlb_flush_mmu_free(tlb);
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
-	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
-	if (addr) {
-		tlb->pages = (void *)addr;
-		tlb->max = PAGE_SIZE / sizeof(void *);
-	}
-}
-
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-			unsigned long start, unsigned long end)
-{
-	tlb->mm = mm;
-	tlb->max = ARRAY_SIZE(tlb->local);
-	tlb->pages = tlb->local;
-	tlb->nr = 0;
-	tlb->fullmm = !(start | (end+1));
-	tlb->start = start;
-	tlb->end = end;
-	tlb->start_addr = ~0UL;
-}
-
-/*
- * Called at the end of the shootdown operation to free up any resources that were
- * collected.
- */
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end, bool force)
-{
-	if (force)
-		tlb->need_flush = 1;
-	/*
-	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
-	 * tlb->end_addr.
-	 */
-	ia64_tlb_flush_mmu(tlb, start, end);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-
-	if (tlb->pages != tlb->local)
-		free_pages((unsigned long)tlb->pages, 0);
-}
-
-/*
- * Logically, this routine frees PAGE.  On MP machines, the actual freeing of the page
- * must be delayed until after the TLB has been flushed (see comments at the beginning of
- * this file).
- */
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	tlb->need_flush = 1;
-
-	if (!tlb->nr && tlb->pages == tlb->local)
-		__tlb_alloc_page(tlb);
-
-	tlb->pages[tlb->nr++] = page;
-	VM_WARN_ON(tlb->nr > tlb->max);
-	if (tlb->nr == tlb->max)
-		return true;
-	return false;
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-	ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr);
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-	ia64_tlb_flush_mmu_free(tlb);
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-	ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	if (__tlb_remove_page(tlb, page))
-		tlb_flush_mmu(tlb);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size)
-{
-	return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
-					struct page *page, int page_size)
-{
-	return tlb_remove_page(tlb, page);
-}
-
-/*
- * Remove TLB entry for PTE mapped at virtual address ADDRESS.  This is called for any
- * PTE, not just those pointing to (normal) physical memory.
- */
-static inline void
-__tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
-{
-	if (tlb->start_addr == ~0UL)
-		tlb->start_addr = address;
-	tlb->end_addr = address + PAGE_SIZE;
-}
-
 #define tlb_migrate_finish(mm)	platform_tlb_migrate_finish(mm)
 
-#define tlb_start_vma(tlb, vma)			do { } while (0)
-#define tlb_end_vma(tlb, vma)			do { } while (0)
-
-#define tlb_remove_tlb_entry(tlb, ptep, addr)		\
-do {							\
-	tlb->need_flush = 1;				\
-	__tlb_remove_tlb_entry(tlb, ptep, addr);	\
-} while (0)
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
-	tlb_remove_tlb_entry(tlb, ptep, address)
-
-static inline void tlb_change_page_size(struct mmu_gather *tlb,
-						     unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, address)		\
-do {							\
-	tlb->need_flush = 1;				\
-	__pte_free_tlb(tlb, ptep, address);		\
-} while (0)
-
-#define pmd_free_tlb(tlb, ptep, address)		\
-do {							\
-	tlb->need_flush = 1;				\
-	__pmd_free_tlb(tlb, ptep, address);		\
-} while (0)
-
-#define pud_free_tlb(tlb, pudp, address)		\
-do {							\
-	tlb->need_flush = 1;				\
-	__pud_free_tlb(tlb, pudp, address);		\
-} while (0)
+#include <asm-generic/tlb.h>
 
 #endif /* _ASM_IA64_TLB_H */
diff --git a/arch/ia64/include/asm/tlbflush.h b/arch/ia64/include/asm/tlbflush.h
index 25e280810f6c4..ceac10c4d6e2f 100644
--- a/arch/ia64/include/asm/tlbflush.h
+++ b/arch/ia64/include/asm/tlbflush.h
@@ -14,6 +14,31 @@
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 
+struct ia64_tr_entry {
+	u64 ifa;
+	u64 itir;
+	u64 pte;
+	u64 rr;
+}; /*Record for tr entry!*/
+
+extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
+extern void ia64_ptr_entry(u64 target_mask, int slot);
+extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
+
+/*
+ region register macros
+*/
+#define RR_TO_VE(val)   (((val) >> 0) & 0x0000000000000001)
+#define RR_VE(val)     (((val) & 0x0000000000000001) << 0)
+#define RR_VE_MASK     0x0000000000000001L
+#define RR_VE_SHIFT    0
+#define RR_TO_PS(val)  (((val) >> 2) & 0x000000000000003f)
+#define RR_PS(val)     (((val) & 0x000000000000003f) << 2)
+#define RR_PS_MASK     0x00000000000000fcL
+#define RR_PS_SHIFT    2
+#define RR_RID_MASK    0x00000000ffffff00L
+#define RR_TO_RID(val)         ((val >> 8) & 0xffffff)
+
 /*
  * Now for some TLB flushing routines.  This is the kind of stuff that
  * can be very expensive, so try to avoid them whenever possible.
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 5fc89aabdce1f..5158bd28de055 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -305,8 +305,8 @@ local_flush_tlb_all (void)
 	ia64_srlz_i();			/* srlz.i implies srlz.d */
 }
 
-void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+static void
+__flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
 		 unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -343,6 +343,25 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
 	preempt_enable();
 	ia64_srlz_i();			/* srlz.i implies srlz.d */
 }
+
+void flush_tlb_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
+{
+	if (unlikely(end - start >= 1024*1024*1024*1024UL
+			|| REGION_NUMBER(start) != REGION_NUMBER(end - 1))) {
+		/*
+		 * If we flush more than a tera-byte or across regions, we're
+		 * probably better off just flushing the entire TLB(s).  This
+		 * should be very rare and is not worth optimizing for.
+		 */
+		flush_tlb_all();
+	} else {
+		/* flush the address range from the tlb */
+		__flush_tlb_range(vma, start, end);
+		/* flush the virt. page-table area mapping the addr range */
+		__flush_tlb_range(vma, ia64_thash(start), ia64_thash(end));
+	}
+}
 EXPORT_SYMBOL(flush_tlb_range);
 
 void ia64_tlb_init(void)
-- 
GitLab


From c5b27a889da92f4a969d61df77bd4f79ffce57c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 Sep 2018 14:45:04 +0200
Subject: [PATCH 0533/1861] sh/tlb: Convert SH to generic mmu_gather

Generic mmu_gather provides everything SH needs (range tracking and
cache coherency).

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/sh/include/asm/pgalloc.h |   9 +++
 arch/sh/include/asm/tlb.h     | 130 +---------------------------------
 2 files changed, 10 insertions(+), 129 deletions(-)

diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 8ad73cb311216..b56f908b13950 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -70,6 +70,15 @@ do {							\
 	tlb_remove_page((tlb), (pte));			\
 } while (0)
 
+#if CONFIG_PGTABLE_LEVELS > 2
+#define __pmd_free_tlb(tlb, pmdp, addr)			\
+do {							\
+	struct page *page = virt_to_page(pmdp);		\
+	pgtable_pmd_page_dtor(page);			\
+	tlb_remove_page((tlb), page);			\
+} while (0);
+#endif
+
 static inline void check_pgt_cache(void)
 {
 	quicklist_trim(QUICK_PT, NULL, 25, 16);
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index af7c9d891cf80..bc77f3dd4261d 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -11,131 +11,8 @@
 
 #ifdef CONFIG_MMU
 #include <linux/swap.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
 
-/*
- * TLB handling.  This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
-	struct mm_struct	*mm;
-	unsigned int		fullmm;
-	unsigned long		start, end;
-};
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
-	tlb->start = TASK_SIZE;
-	tlb->end = 0;
-
-	if (tlb->fullmm) {
-		tlb->start = 0;
-		tlb->end = TASK_SIZE;
-	}
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-		unsigned long start, unsigned long end)
-{
-	tlb->mm = mm;
-	tlb->start = start;
-	tlb->end = end;
-	tlb->fullmm = !(start | (end+1));
-
-	init_tlb_gather(tlb);
-}
-
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end, bool force)
-{
-	if (tlb->fullmm || force)
-		flush_tlb_mm(tlb->mm);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-}
-
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
-{
-	if (tlb->start > address)
-		tlb->start = address;
-	if (tlb->end < address + PAGE_SIZE)
-		tlb->end = address + PAGE_SIZE;
-}
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
-	tlb_remove_tlb_entry(tlb, ptep, address)
-
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush.  When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
-	if (!tlb->fullmm)
-		flush_cache_range(vma, vma->vm_start, vma->vm_end);
-}
-
-static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
-	if (!tlb->fullmm && tlb->end) {
-		flush_tlb_range(vma, tlb->start, tlb->end);
-		init_tlb_gather(tlb);
-	}
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-}
-
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	free_page_and_swap_cache(page);
-	return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	__tlb_remove_page(tlb, page);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size)
-{
-	return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
-					struct page *page, int page_size)
-{
-	return tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
-#define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
-#define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm)		do { } while (0)
+#include <asm-generic/tlb.h>
 
 #if defined(CONFIG_CPU_SH4) || defined(CONFIG_SUPERH64)
 extern void tlb_wire_entry(struct vm_area_struct *, unsigned long, pte_t);
@@ -155,11 +32,6 @@ static inline void tlb_unwire_entry(void)
 
 #else /* CONFIG_MMU */
 
-#define tlb_start_vma(tlb, vma)				do { } while (0)
-#define tlb_end_vma(tlb, vma)				do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, address)	do { } while (0)
-#define tlb_flush(tlb)					do { } while (0)
-
 #include <asm-generic/tlb.h>
 
 #endif /* CONFIG_MMU */
-- 
GitLab


From 7bb8709d6ad3ceeb5010a98b0d7eb11db8836da1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 Sep 2018 17:54:03 +0200
Subject: [PATCH 0534/1861] um/tlb: Convert to generic mmu_gather

Generic mmu_gather provides the simple flush_tlb_range() based range
tracking mmu_gather UM needs.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/um/include/asm/tlb.h | 156 +-------------------------------------
 1 file changed, 2 insertions(+), 154 deletions(-)

diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 6463f3ab1767f..70ee603839006 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -2,160 +2,8 @@
 #ifndef __UM_TLB_H
 #define __UM_TLB_H
 
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/percpu.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
-
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
-/* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page.
- */
-struct mmu_gather {
-	struct mm_struct	*mm;
-	unsigned int		need_flush; /* Really unmapped some ptes? */
-	unsigned long		start;
-	unsigned long		end;
-	unsigned int		fullmm; /* non-zero means full mm flush */
-};
-
-static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
-					  unsigned long address)
-{
-	if (tlb->start > address)
-		tlb->start = address;
-	if (tlb->end < address + PAGE_SIZE)
-		tlb->end = address + PAGE_SIZE;
-}
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
-	tlb->need_flush = 0;
-
-	tlb->start = TASK_SIZE;
-	tlb->end = 0;
-
-	if (tlb->fullmm) {
-		tlb->start = 0;
-		tlb->end = TASK_SIZE;
-	}
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-		unsigned long start, unsigned long end)
-{
-	tlb->mm = mm;
-	tlb->start = start;
-	tlb->end = end;
-	tlb->fullmm = !(start | (end+1));
-
-	init_tlb_gather(tlb);
-}
-
-extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-			       unsigned long end);
-
-static inline void
-tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-	flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
-}
-
-static inline void
-tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-	init_tlb_gather(tlb);
-}
-
-static inline void
-tlb_flush_mmu(struct mmu_gather *tlb)
-{
-	if (!tlb->need_flush)
-		return;
-
-	tlb_flush_mmu_tlbonly(tlb);
-	tlb_flush_mmu_free(tlb);
-}
-
-/* arch_tlb_finish_mmu
- *	Called at the end of the shootdown operation to free up any resources
- *	that were required.
- */
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end, bool force)
-{
-	if (force) {
-		tlb->start = start;
-		tlb->end = end;
-		tlb->need_flush = 1;
-	}
-	tlb_flush_mmu(tlb);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-}
-
-/* tlb_remove_page
- *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)),
- *	while handling the additional races in SMP caused by other CPUs
- *	caching valid mappings in their TLBs.
- */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	tlb->need_flush = 1;
-	free_page_and_swap_cache(page);
-	return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	__tlb_remove_page(tlb, page);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size)
-{
-	return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
-					struct page *page, int page_size)
-{
-	return tlb_remove_page(tlb, page);
-}
-
-/**
- * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
- *
- * Record the fact that pte's were really umapped in ->need_flush, so we can
- * later optimise away the tlb invalidate.   This helps when userspace is
- * unmapping already-unmapped pages, which happens quite a lot.
- */
-#define tlb_remove_tlb_entry(tlb, ptep, address)		\
-	do {							\
-		tlb->need_flush = 1;				\
-		__tlb_remove_tlb_entry(tlb, ptep, address);	\
-	} while (0)
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
-	tlb_remove_tlb_entry(tlb, ptep, address)
-
-static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-
-#define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr)
-
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-
-#define tlb_migrate_finish(mm) do {} while (0)
+#include <asm-generic/cacheflush.h>
+#include <asm-generic/tlb.h>
 
 #endif
-- 
GitLab


From 6137fed0823247e32306bde2b48cac627c24f894 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 Sep 2018 17:04:07 +0200
Subject: [PATCH 0535/1861] arch/tlb: Clean up simple architectures

For the architectures that do not implement their own tlb_flush() but
do already use the generic mmu_gather, there are two options:

 1) the platform has an efficient flush_tlb_range() and
    asm-generic/tlb.h doesn't need any overrides at all.

 2) the platform lacks an efficient flush_tlb_range() and
    we select MMU_GATHER_NO_RANGE to minimize full invalidates.

Convert all 'simple' architectures to one of these two forms.

alpha:	    has no range invalidate -> 2
arc:	    already used flush_tlb_range() -> 1
c6x:	    has no range invalidate -> 2
hexagon:    has an efficient flush_tlb_range() -> 1
            (flush_tlb_mm() is in fact a full range invalidate,
	     so no need to shoot down everything)
m68k:	    has inefficient flush_tlb_range() -> 2
microblaze: has no flush_tlb_range() -> 2
mips:	    has efficient flush_tlb_range() -> 1
	    (even though it currently seems to use flush_tlb_mm())
nds32:	    already uses flush_tlb_range() -> 1
nios2:	    has inefficient flush_tlb_range() -> 2
	    (no limit on range iteration)
openrisc:   has inefficient flush_tlb_range() -> 2
	    (no limit on range iteration)
parisc:	    already uses flush_tlb_range() -> 1
sparc32:    already uses flush_tlb_range() -> 1
unicore32:  has inefficient flush_tlb_range() -> 2
	    (no limit on range iteration)
xtensa:	    has efficient flush_tlb_range() -> 1

Note this also fixes a bug in the existing code for a number
platforms. Those platforms that did:

  tlb_end_vma() -> if (!full_mm) flush_tlb_*()
  tlb_flush -> if (full_mm) flush_tlb_mm()

missed the case of shift_arg_pages(), which doesn't have @fullmm set,
nor calls into tlb_*vma(), but still frees page-tables and thus needs
an invalidate. The new code handles this by detecting a non-empty
range, and either issuing the matching range invalidate or a full
invalidate, depending on the capabilities.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/Kconfig                |  1 +
 arch/alpha/include/asm/tlb.h      |  6 ------
 arch/arc/include/asm/tlb.h        | 23 -----------------------
 arch/c6x/Kconfig                  |  1 +
 arch/c6x/include/asm/tlb.h        |  2 --
 arch/h8300/include/asm/tlb.h      |  2 --
 arch/hexagon/include/asm/tlb.h    | 12 ------------
 arch/m68k/Kconfig                 |  1 +
 arch/m68k/include/asm/tlb.h       | 14 --------------
 arch/microblaze/Kconfig           |  1 +
 arch/microblaze/include/asm/tlb.h |  9 ---------
 arch/mips/include/asm/tlb.h       |  8 --------
 arch/nds32/include/asm/tlb.h      | 10 ----------
 arch/nios2/Kconfig                |  1 +
 arch/nios2/include/asm/tlb.h      |  8 ++++----
 arch/openrisc/Kconfig             |  1 +
 arch/openrisc/include/asm/tlb.h   |  8 ++------
 arch/parisc/include/asm/tlb.h     | 13 -------------
 arch/sparc/include/asm/tlb_32.h   | 13 -------------
 arch/unicore32/Kconfig            |  1 +
 arch/unicore32/include/asm/tlb.h  |  7 +++----
 arch/xtensa/include/asm/tlb.h     | 17 -----------------
 22 files changed, 16 insertions(+), 143 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 584a6e1148539..c7c976eb64074 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -36,6 +36,7 @@ config ALPHA
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
 	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+	select MMU_GATHER_NO_RANGE
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/tlb.h b/arch/alpha/include/asm/tlb.h
index 8f5042b61875f..4f79e331af5ea 100644
--- a/arch/alpha/include/asm/tlb.h
+++ b/arch/alpha/include/asm/tlb.h
@@ -2,12 +2,6 @@
 #ifndef _ALPHA_TLB_H
 #define _ALPHA_TLB_H
 
-#define tlb_start_vma(tlb, vma)			do { } while (0)
-#define tlb_end_vma(tlb, vma)			do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, addr)	do { } while (0)
-
-#define tlb_flush(tlb)				flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>
 
 #define __pte_free_tlb(tlb, pte, address)		pte_free((tlb)->mm, pte)
diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h
index 7af2b373ebe75..90cac97643a46 100644
--- a/arch/arc/include/asm/tlb.h
+++ b/arch/arc/include/asm/tlb.h
@@ -9,29 +9,6 @@
 #ifndef _ASM_ARC_TLB_H
 #define _ASM_ARC_TLB_H
 
-#define tlb_flush(tlb)				\
-do {						\
-	if (tlb->fullmm)			\
-		flush_tlb_mm((tlb)->mm);	\
-} while (0)
-
-/*
- * This pair is called at time of munmap/exit to flush cache and TLB entries
- * for mappings being torn down.
- * 1) cache-flush part -implemented via tlb_start_vma( ) for VIPT aliasing D$
- * 2) tlb-flush part - implemted via tlb_end_vma( ) flushes the TLB range
- *
- * Note, read http://lkml.org/lkml/2004/1/15/6
- */
-
-#define tlb_end_vma(tlb, vma)						\
-do {									\
-	if (!tlb->fullmm)						\
-		flush_tlb_range(vma, vma->vm_start, vma->vm_end);	\
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, ptep, address)
-
 #include <linux/pagemap.h>
 #include <asm-generic/tlb.h>
 
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index e5cd3c5f8399c..3bb75e674161c 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -20,6 +20,7 @@ config C6X
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
 	select ARCH_NO_COHERENT_DMA_MMAP
+	select MMU_GATHER_NO_RANGE if MMU
 
 config MMU
 	def_bool n
diff --git a/arch/c6x/include/asm/tlb.h b/arch/c6x/include/asm/tlb.h
index 34525dea13566..240ba0febb57b 100644
--- a/arch/c6x/include/asm/tlb.h
+++ b/arch/c6x/include/asm/tlb.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_C6X_TLB_H
 #define _ASM_C6X_TLB_H
 
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>
 
 #endif /* _ASM_C6X_TLB_H */
diff --git a/arch/h8300/include/asm/tlb.h b/arch/h8300/include/asm/tlb.h
index 98f344279904a..d8201ca312061 100644
--- a/arch/h8300/include/asm/tlb.h
+++ b/arch/h8300/include/asm/tlb.h
@@ -2,8 +2,6 @@
 #ifndef __H8300_TLB_H__
 #define __H8300_TLB_H__
 
-#define tlb_flush(tlb)	do { } while (0)
-
 #include <asm-generic/tlb.h>
 
 #endif
diff --git a/arch/hexagon/include/asm/tlb.h b/arch/hexagon/include/asm/tlb.h
index 2f00772cc08a5..f71c4ba83614c 100644
--- a/arch/hexagon/include/asm/tlb.h
+++ b/arch/hexagon/include/asm/tlb.h
@@ -22,18 +22,6 @@
 #include <linux/pagemap.h>
 #include <asm/tlbflush.h>
 
-/*
- * We don't need any special per-pte or per-vma handling...
- */
-#define tlb_start_vma(tlb, vma)				do { } while (0)
-#define tlb_end_vma(tlb, vma)				do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address)	do { } while (0)
-
-/*
- * .. because we flush the whole mm when it fills up
- */
-#define tlb_flush(tlb)		flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>
 
 #endif
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b54206408f91b..4e37efbc92961 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -28,6 +28,7 @@ config M68K
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 	select ARCH_DISCARD_MEMBLOCK
+	select MMU_GATHER_NO_RANGE if MMU
 
 config CPU_BIG_ENDIAN
 	def_bool y
diff --git a/arch/m68k/include/asm/tlb.h b/arch/m68k/include/asm/tlb.h
index b4b9efb6f963b..3c81f6adfc8b3 100644
--- a/arch/m68k/include/asm/tlb.h
+++ b/arch/m68k/include/asm/tlb.h
@@ -2,20 +2,6 @@
 #ifndef _M68K_TLB_H
 #define _M68K_TLB_H
 
-/*
- * m68k doesn't need any special per-pte or
- * per-vma handling..
- */
-#define tlb_start_vma(tlb, vma)	do { } while (0)
-#define tlb_end_vma(tlb, vma)	do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address)	do { } while (0)
-
-/*
- * .. because we flush the whole mm when it
- * fills up.
- */
-#define tlb_flush(tlb)		flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>
 
 #endif /* _M68K_TLB_H */
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index a51b965b3b823..321e398ab6b51 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -41,6 +41,7 @@ config MICROBLAZE
 	select TRACING_SUPPORT
 	select VIRT_TO_BUS
 	select CPU_NO_EFFICIENT_FFS
+	select MMU_GATHER_NO_RANGE if MMU
 
 # Endianness selection
 choice
diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h
index 99b6ded54849e..628a78ee0a720 100644
--- a/arch/microblaze/include/asm/tlb.h
+++ b/arch/microblaze/include/asm/tlb.h
@@ -11,16 +11,7 @@
 #ifndef _ASM_MICROBLAZE_TLB_H
 #define _ASM_MICROBLAZE_TLB_H
 
-#define tlb_flush(tlb)	flush_tlb_mm((tlb)->mm)
-
 #include <linux/pagemap.h>
-
-#ifdef CONFIG_MMU
-#define tlb_start_vma(tlb, vma)		do { } while (0)
-#define tlb_end_vma(tlb, vma)		do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-#endif
-
 #include <asm-generic/tlb.h>
 
 #endif /* _ASM_MICROBLAZE_TLB_H */
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index 32b8a8187733d..90f3ad76d9e0b 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -5,14 +5,6 @@
 #include <asm/cpu-features.h>
 #include <asm/mipsregs.h>
 
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it fills up.
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
 #define _UNIQUE_ENTRYHI(base, idx)					\
 		(((base) + ((idx) << (PAGE_SHIFT + 1))) |		\
 		 (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0))
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index 0bf7c94823819..d5ae571c8d303 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -4,16 +4,6 @@
 #ifndef __ASMNDS32_TLB_H
 #define __ASMNDS32_TLB_H
 
-#define tlb_end_vma(tlb,vma)				\
-	do { 						\
-		if(!tlb->fullmm)			\
-			flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-	} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
-
-#define tlb_flush(tlb)	flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>
 
 #define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 4ef15a61b7bc3..3633f81443678 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -24,6 +24,7 @@ config NIOS2
 	select USB_ARCH_HAS_HCD if USB_SUPPORT
 	select CPU_NO_EFFICIENT_FFS
 	select ARCH_DISCARD_MEMBLOCK
+	select MMU_GATHER_NO_RANGE if MMU
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/nios2/include/asm/tlb.h b/arch/nios2/include/asm/tlb.h
index 9b518c6d0f622..f9f2e27e32dd5 100644
--- a/arch/nios2/include/asm/tlb.h
+++ b/arch/nios2/include/asm/tlb.h
@@ -11,12 +11,12 @@
 #ifndef _ASM_NIOS2_TLB_H
 #define _ASM_NIOS2_TLB_H
 
-#define tlb_flush(tlb)	flush_tlb_mm((tlb)->mm)
-
 extern void set_mmu_pid(unsigned long pid);
 
-#define tlb_end_vma(tlb, vma)	do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address)	do { } while (0)
+/*
+ * NIOS32 does have flush_tlb_range(), but it lacks a limit and fallback to
+ * full mm invalidation. So use flush_tlb_mm() for everything.
+ */
 
 #include <linux/pagemap.h>
 #include <asm-generic/tlb.h>
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index a5e361fbb75a0..c6cf8a49a0ab5 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -36,6 +36,7 @@ config OPENRISC
 	select OMPIC if SMP
 	select ARCH_WANT_FRAME_POINTERS
 	select GENERIC_IRQ_MULTI_HANDLER
+	select MMU_GATHER_NO_RANGE if MMU
 
 config CPU_BIG_ENDIAN
 	def_bool y
diff --git a/arch/openrisc/include/asm/tlb.h b/arch/openrisc/include/asm/tlb.h
index fa4376a4515d1..92d8a42098849 100644
--- a/arch/openrisc/include/asm/tlb.h
+++ b/arch/openrisc/include/asm/tlb.h
@@ -20,14 +20,10 @@
 #define __ASM_OPENRISC_TLB_H__
 
 /*
- * or32 doesn't need any special per-pte or
- * per-vma handling..
+ * OpenRISC doesn't have an efficient flush_tlb_range() so use flush_tlb_mm()
+ * for everything.
  */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
 #include <linux/pagemap.h>
 #include <asm-generic/tlb.h>
 
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index b1984f9cd3af4..8c0446b04c9e1 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -2,19 +2,6 @@
 #ifndef _PARISC_TLB_H
 #define _PARISC_TLB_H
 
-#define tlb_flush(tlb)			\
-do {	if ((tlb)->fullmm)		\
-		flush_tlb_mm((tlb)->mm);\
-} while (0)
-
-#define tlb_end_vma(tlb, vma)	\
-do {	if (!(tlb)->fullmm)	\
-		flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
-	do { } while (0)
-
 #include <asm-generic/tlb.h>
 
 #define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
diff --git a/arch/sparc/include/asm/tlb_32.h b/arch/sparc/include/asm/tlb_32.h
index 68d817273de87..5cd28a8793e39 100644
--- a/arch/sparc/include/asm/tlb_32.h
+++ b/arch/sparc/include/asm/tlb_32.h
@@ -2,19 +2,6 @@
 #ifndef _SPARC_TLB_H
 #define _SPARC_TLB_H
 
-#define tlb_end_vma(tlb, vma) \
-do {								\
-	flush_tlb_range(vma, vma->vm_start, vma->vm_end);	\
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
-	do { } while (0)
-
-#define tlb_flush(tlb) \
-do {								\
-	flush_tlb_mm((tlb)->mm);				\
-} while (0)
-
 #include <asm-generic/tlb.h>
 
 #endif /* _SPARC_TLB_H */
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 817d82608712a..d83c8f70900d1 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -20,6 +20,7 @@ config UNICORE32
 	select GENERIC_IOMAP
 	select MODULES_USE_ELF_REL
 	select NEED_DMA_MAP_STATE
+	select MMU_GATHER_NO_RANGE if MMU
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip
diff --git a/arch/unicore32/include/asm/tlb.h b/arch/unicore32/include/asm/tlb.h
index 9cca15cdae94c..00a8477333f6d 100644
--- a/arch/unicore32/include/asm/tlb.h
+++ b/arch/unicore32/include/asm/tlb.h
@@ -12,10 +12,9 @@
 #ifndef __UNICORE_TLB_H__
 #define __UNICORE_TLB_H__
 
-#define tlb_start_vma(tlb, vma)				do { } while (0)
-#define tlb_end_vma(tlb, vma)				do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address)	do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+/*
+ * unicore32 lacks an efficient flush_tlb_range(), use flush_tlb_mm().
+ */
 
 #define __pte_free_tlb(tlb, pte, addr)				\
 	do {							\
diff --git a/arch/xtensa/include/asm/tlb.h b/arch/xtensa/include/asm/tlb.h
index 1a93e350382e7..50889935138ad 100644
--- a/arch/xtensa/include/asm/tlb.h
+++ b/arch/xtensa/include/asm/tlb.h
@@ -14,23 +14,6 @@
 #include <asm/cache.h>
 #include <asm/page.h>
 
-#if (DCACHE_WAY_SIZE <= PAGE_SIZE)
-
-# define tlb_end_vma(tlb,vma)			do { } while (0)
-
-#else
-
-# define tlb_end_vma(tlb, vma)						      \
-	do {								      \
-		if (!tlb->fullmm)					      \
-			flush_tlb_range(vma, vma->vm_start, vma->vm_end);     \
-	} while(0)
-
-#endif
-
-#define __tlb_remove_tlb_entry(tlb,pte,addr)	do { } while (0)
-#define tlb_flush(tlb)				flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>
 
 #define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
-- 
GitLab


From 952a31c9e6fa963eabf3692f31a769e59f4c8303 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 18 Sep 2018 14:51:50 +0200
Subject: [PATCH 0536/1861] asm-generic/tlb: Introduce
 CONFIG_HAVE_MMU_GATHER_NO_GATHER=y

Add the Kconfig option HAVE_MMU_GATHER_NO_GATHER to the generic
mmu_gather code. If the option is set the mmu_gather will not
track individual pages for delayed page free anymore. A platform
that enables the option needs to provide its own implementation
of the __tlb_remove_page_size() function to free pages.

No change in behavior intended.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: aneesh.kumar@linux.vnet.ibm.com
Cc: heiko.carstens@de.ibm.com
Cc: linux@armlinux.org.uk
Cc: npiggin@gmail.com
Link: http://lkml.kernel.org/r/20180918125151.31744-2-schwidefsky@de.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig              |   3 ++
 include/asm-generic/tlb.h |   9 +++-
 mm/mmu_gather.c           | 107 +++++++++++++++++++++-----------------
 3 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 04b3e8b94cfe2..a826843470edb 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -389,6 +389,9 @@ config HAVE_RCU_TABLE_NO_INVALIDATE
 config HAVE_MMU_GATHER_PAGE_SIZE
 	bool
 
+config HAVE_MMU_GATHER_NO_GATHER
+	bool
+
 config ARCH_HAVE_NMI_SAFE_CMPXCHG
 	bool
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 81799e6a43044..af20aa8255cde 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -191,6 +191,7 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
 #endif
 
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
 /*
  * If we can't allocate a page to make a big batch of page pointers
  * to work on, then just handle a few from the on-stack structure.
@@ -215,6 +216,10 @@ struct mmu_gather_batch {
  */
 #define MAX_GATHER_BATCH_COUNT	(10000UL/MAX_GATHER_BATCH)
 
+extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
+				   int page_size);
+#endif
+
 /*
  * struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
@@ -261,6 +266,7 @@ struct mmu_gather {
 
 	unsigned int		batch_count;
 
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
 	struct mmu_gather_batch *active;
 	struct mmu_gather_batch	local;
 	struct page		*__pages[MMU_GATHER_BUNDLE];
@@ -268,6 +274,7 @@ struct mmu_gather {
 #ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
 	unsigned int page_size;
 #endif
+#endif
 };
 
 void arch_tlb_gather_mmu(struct mmu_gather *tlb,
@@ -276,8 +283,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb);
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 			 unsigned long start, unsigned long end, bool force);
 void tlb_flush_mmu_free(struct mmu_gather *tlb);
-extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
-				   int page_size);
 
 static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 				      unsigned long address,
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 2a5322d52b0aa..ab220edcd7ef5 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -13,6 +13,8 @@
 
 #ifdef HAVE_GENERIC_MMU_GATHER
 
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+
 static bool tlb_next_batch(struct mmu_gather *tlb)
 {
 	struct mmu_gather_batch *batch;
@@ -41,6 +43,56 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 	return true;
 }
 
+static void tlb_batch_pages_flush(struct mmu_gather *tlb)
+{
+	struct mmu_gather_batch *batch;
+
+	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
+		free_pages_and_swap_cache(batch->pages, batch->nr);
+		batch->nr = 0;
+	}
+	tlb->active = &tlb->local;
+}
+
+static void tlb_batch_list_free(struct mmu_gather *tlb)
+{
+	struct mmu_gather_batch *batch, *next;
+
+	for (batch = tlb->local.next; batch; batch = next) {
+		next = batch->next;
+		free_pages((unsigned long)batch, 0);
+	}
+	tlb->local.next = NULL;
+}
+
+bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
+{
+	struct mmu_gather_batch *batch;
+
+	VM_BUG_ON(!tlb->end);
+
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+	VM_WARN_ON(tlb->page_size != page_size);
+#endif
+
+	batch = tlb->active;
+	/*
+	 * Add the page and check if we are full. If so
+	 * force a flush.
+	 */
+	batch->pages[batch->nr++] = page;
+	if (batch->nr == batch->max) {
+		if (!tlb_next_batch(tlb))
+			return true;
+		batch = tlb->active;
+	}
+	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
+
+	return false;
+}
+
+#endif /* HAVE_MMU_GATHER_NO_GATHER */
+
 void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 				unsigned long start, unsigned long end)
 {
@@ -48,12 +100,15 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 	/* Is it from 0 to ~0? */
 	tlb->fullmm     = !(start | (end+1));
+
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
 	tlb->need_flush_all = 0;
 	tlb->local.next = NULL;
 	tlb->local.nr   = 0;
 	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
 	tlb->active     = &tlb->local;
 	tlb->batch_count = 0;
+#endif
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb->batch = NULL;
@@ -67,16 +122,12 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 void tlb_flush_mmu_free(struct mmu_gather *tlb)
 {
-	struct mmu_gather_batch *batch;
-
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
 #endif
-	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
-		free_pages_and_swap_cache(batch->pages, batch->nr);
-		batch->nr = 0;
-	}
-	tlb->active = &tlb->local;
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+	tlb_batch_pages_flush(tlb);
+#endif
 }
 
 void tlb_flush_mmu(struct mmu_gather *tlb)
@@ -92,8 +143,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 		unsigned long start, unsigned long end, bool force)
 {
-	struct mmu_gather_batch *batch, *next;
-
 	if (force) {
 		__tlb_reset_range(tlb);
 		__tlb_adjust_range(tlb, start, end - start);
@@ -103,45 +152,9 @@ void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
-
-	for (batch = tlb->local.next; batch; batch = next) {
-		next = batch->next;
-		free_pages((unsigned long)batch, 0);
-	}
-	tlb->local.next = NULL;
-}
-
-/* __tlb_remove_page
- *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
- *	handling the additional races in SMP caused by other CPUs caching valid
- *	mappings in their TLBs. Returns the number of free page slots left.
- *	When out of page slots we must call tlb_flush_mmu().
- *returns true if the caller should flush.
- */
-bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
-{
-	struct mmu_gather_batch *batch;
-
-	VM_BUG_ON(!tlb->end);
-
-#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
-	VM_WARN_ON(tlb->page_size != page_size);
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+	tlb_batch_list_free(tlb);
 #endif
-
-	batch = tlb->active;
-	/*
-	 * Add the page and check if we are full. If so
-	 * force a flush.
-	 */
-	batch->pages[batch->nr++] = page;
-	if (batch->nr == batch->max) {
-		if (!tlb_next_batch(tlb))
-			return true;
-		batch = tlb->active;
-	}
-	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
-
-	return false;
 }
 
 #endif /* HAVE_GENERIC_MMU_GATHER */
-- 
GitLab


From 9de7d833e3708213bf99d75c37483e0f773f5e16 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 18 Sep 2018 14:51:51 +0200
Subject: [PATCH 0537/1861] s390/tlb: Convert to generic mmu_gather

No change in behavior intended.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: aneesh.kumar@linux.vnet.ibm.com
Cc: heiko.carstens@de.ibm.com
Cc: linux@armlinux.org.uk
Cc: npiggin@gmail.com
Cc: will.deacon@arm.com
Link: http://lkml.kernel.org/r/20180918125151.31744-3-schwidefsky@de.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/s390/Kconfig           |   2 +
 arch/s390/include/asm/tlb.h | 128 +++++++++++-------------------------
 arch/s390/mm/pgalloc.c      |  63 +-----------------
 3 files changed, 42 insertions(+), 151 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b6e3d0653002a..cf06e313e1038 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -164,11 +164,13 @@ config S390
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MEMBLOCK_PHYS_MAP
+	select HAVE_MMU_GATHER_NO_GATHER
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NOP_MCOUNT
 	select HAVE_OPROFILE
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
+	select HAVE_RCU_TABLE_FREE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RSEQ
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 9941a1442a884..aa406c05a3505 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -22,98 +22,39 @@
  * Pages used for the page tables is a different story. FIXME: more
  */
 
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-
-struct mmu_gather {
-	struct mm_struct *mm;
-	struct mmu_table_batch *batch;
-	unsigned int fullmm;
-	unsigned long start, end;
-};
-
-struct mmu_table_batch {
-	struct rcu_head		rcu;
-	unsigned int		nr;
-	void			*tables[0];
-};
-
-#define MAX_TABLE_BATCH		\
-	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-
-extern void tlb_table_flush(struct mmu_gather *tlb);
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-			unsigned long start, unsigned long end)
-{
-	tlb->mm = mm;
-	tlb->start = start;
-	tlb->end = end;
-	tlb->fullmm = !(start | (end+1));
-	tlb->batch = NULL;
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-	__tlb_flush_mm_lazy(tlb->mm);
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-	tlb_table_flush(tlb);
-}
-
+void __tlb_remove_table(void *_table);
+static inline void tlb_flush(struct mmu_gather *tlb);
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+					  struct page *page, int page_size);
 
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-	tlb_flush_mmu_tlbonly(tlb);
-	tlb_flush_mmu_free(tlb);
-}
+#define tlb_start_vma(tlb, vma)			do { } while (0)
+#define tlb_end_vma(tlb, vma)			do { } while (0)
 
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end, bool force)
-{
-	if (force) {
-		tlb->start = start;
-		tlb->end = end;
-	}
+#define tlb_flush tlb_flush
+#define pte_free_tlb pte_free_tlb
+#define pmd_free_tlb pmd_free_tlb
+#define p4d_free_tlb p4d_free_tlb
+#define pud_free_tlb pud_free_tlb
 
-	tlb_flush_mmu(tlb);
-}
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm-generic/tlb.h>
 
 /*
  * Release the page cache reference for a pte removed by
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  */
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	free_page_and_swap_cache(page);
-	return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
-	free_page_and_swap_cache(page);
-}
-
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
 					  struct page *page, int page_size)
 {
-	return __tlb_remove_page(tlb, page);
+	free_page_and_swap_cache(page);
+	return false;
 }
 
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
-					struct page *page, int page_size)
+static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	return tlb_remove_page(tlb, page);
+	__tlb_flush_mm_lazy(tlb->mm);
 }
 
 /*
@@ -121,8 +62,17 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
  * page table from the tlb.
  */
 static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
-				unsigned long address)
+                                unsigned long address)
 {
+	__tlb_adjust_range(tlb, address, PAGE_SIZE);
+	tlb->mm->context.flush_mm = 1;
+	tlb->freed_tables = 1;
+	tlb->cleared_ptes = 1;
+	/*
+	 * page_table_free_rcu takes care of the allocation bit masks
+	 * of the 2K table fragments in the 4K page table page,
+	 * then calls tlb_remove_table.
+	 */
 	page_table_free_rcu(tlb, (unsigned long *) pte, address);
 }
 
@@ -139,6 +89,10 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 	if (mm_pmd_folded(tlb->mm))
 		return;
 	pgtable_pmd_page_dtor(virt_to_page(pmd));
+	__tlb_adjust_range(tlb, address, PAGE_SIZE);
+	tlb->mm->context.flush_mm = 1;
+	tlb->freed_tables = 1;
+	tlb->cleared_puds = 1;
 	tlb_remove_table(tlb, pmd);
 }
 
@@ -154,6 +108,10 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 {
 	if (mm_p4d_folded(tlb->mm))
 		return;
+	__tlb_adjust_range(tlb, address, PAGE_SIZE);
+	tlb->mm->context.flush_mm = 1;
+	tlb->freed_tables = 1;
+	tlb->cleared_p4ds = 1;
 	tlb_remove_table(tlb, p4d);
 }
 
@@ -169,19 +127,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 {
 	if (mm_pud_folded(tlb->mm))
 		return;
+	tlb->mm->context.flush_mm = 1;
+	tlb->freed_tables = 1;
+	tlb->cleared_puds = 1;
 	tlb_remove_table(tlb, pud);
 }
 
-#define tlb_start_vma(tlb, vma)			do { } while (0)
-#define tlb_end_vma(tlb, vma)			do { } while (0)
-#define tlb_remove_tlb_entry(tlb, ptep, addr)	do { } while (0)
-#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr)	do { } while (0)
-#define tlb_migrate_finish(mm)			do { } while (0)
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
-	tlb_remove_tlb_entry(tlb, ptep, address)
-
-static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size)
-{
-}
 
 #endif /* _S390_TLB_H */
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index db6bb2f97a2c6..99e06213a22b7 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -290,7 +290,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 	tlb_remove_table(tlb, table);
 }
 
-static void __tlb_remove_table(void *_table)
+void __tlb_remove_table(void *_table)
 {
 	unsigned int mask = (unsigned long) _table & 3;
 	void *table = (void *)((unsigned long) _table ^ mask);
@@ -316,67 +316,6 @@ static void __tlb_remove_table(void *_table)
 	}
 }
 
-static void tlb_remove_table_smp_sync(void *arg)
-{
-	/* Simply deliver the interrupt */
-}
-
-static void tlb_remove_table_one(void *table)
-{
-	/*
-	 * This isn't an RCU grace period and hence the page-tables cannot be
-	 * assumed to be actually RCU-freed.
-	 *
-	 * It is however sufficient for software page-table walkers that rely
-	 * on IRQ disabling. See the comment near struct mmu_table_batch.
-	 */
-	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
-	__tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
-	struct mmu_table_batch *batch;
-	int i;
-
-	batch = container_of(head, struct mmu_table_batch, rcu);
-
-	for (i = 0; i < batch->nr; i++)
-		__tlb_remove_table(batch->tables[i]);
-
-	free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
-	struct mmu_table_batch **batch = &tlb->batch;
-
-	if (*batch) {
-		call_rcu(&(*batch)->rcu, tlb_remove_table_rcu);
-		*batch = NULL;
-	}
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
-	struct mmu_table_batch **batch = &tlb->batch;
-
-	tlb->mm->context.flush_mm = 1;
-	if (*batch == NULL) {
-		*batch = (struct mmu_table_batch *)
-			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-		if (*batch == NULL) {
-			__tlb_flush_mm_lazy(tlb->mm);
-			tlb_remove_table_one(table);
-			return;
-		}
-		(*batch)->nr = 0;
-	}
-	(*batch)->tables[(*batch)->nr++] = table;
-	if ((*batch)->nr == MAX_TABLE_BATCH)
-		tlb_flush_mmu(tlb);
-}
-
 /*
  * Base infrastructure required to generate basic asces, region, segment,
  * and page tables that do not make use of enhanced features like EDAT1.
-- 
GitLab


From 1808d65b55e4489770dd4f76fb0dff5b81eb9b11 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 20 Sep 2018 10:50:11 +0200
Subject: [PATCH 0538/1861] asm-generic/tlb: Remove arch_tlb*_mmu()

Now that all architectures are converted to the generic code, remove
the arch hooks.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 mm/mmu_gather.c | 93 ++++++++++++++++++++++---------------------------
 1 file changed, 42 insertions(+), 51 deletions(-)

diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index ab220edcd7ef5..60ef38d200c07 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -93,33 +93,6 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 
 #endif /* HAVE_MMU_GATHER_NO_GATHER */
 
-void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-				unsigned long start, unsigned long end)
-{
-	tlb->mm = mm;
-
-	/* Is it from 0 to ~0? */
-	tlb->fullmm     = !(start | (end+1));
-
-#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
-	tlb->need_flush_all = 0;
-	tlb->local.next = NULL;
-	tlb->local.nr   = 0;
-	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
-	tlb->active     = &tlb->local;
-	tlb->batch_count = 0;
-#endif
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-	tlb->batch = NULL;
-#endif
-#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
-	tlb->page_size = 0;
-#endif
-
-	__tlb_reset_range(tlb);
-}
-
 void tlb_flush_mmu_free(struct mmu_gather *tlb)
 {
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
@@ -136,27 +109,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-/* tlb_finish_mmu
- *	Called at the end of the shootdown operation to free up any resources
- *	that were required.
- */
-void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end, bool force)
-{
-	if (force) {
-		__tlb_reset_range(tlb);
-		__tlb_adjust_range(tlb, start, end - start);
-	}
-
-	tlb_flush_mmu(tlb);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
-	tlb_batch_list_free(tlb);
-#endif
-}
-
 #endif /* HAVE_GENERIC_MMU_GATHER */
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
@@ -258,10 +210,40 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 			unsigned long start, unsigned long end)
 {
-	arch_tlb_gather_mmu(tlb, mm, start, end);
+	tlb->mm = mm;
+
+	/* Is it from 0 to ~0? */
+	tlb->fullmm     = !(start | (end+1));
+
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+	tlb->need_flush_all = 0;
+	tlb->local.next = NULL;
+	tlb->local.nr   = 0;
+	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
+	tlb->active     = &tlb->local;
+	tlb->batch_count = 0;
+#endif
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb->batch = NULL;
+#endif
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+	tlb->page_size = 0;
+#endif
+
+	__tlb_reset_range(tlb);
 	inc_tlb_flush_pending(tlb->mm);
 }
 
+/**
+ * tlb_finish_mmu - finish an mmu_gather structure
+ * @tlb: the mmu_gather structure to finish
+ * @start: start of the region that will be removed from the page-table
+ * @end: end of the region that will be removed from the page-table
+ *
+ * Called at the end of the shootdown operation to free up any resources that
+ * were required.
+ */
 void tlb_finish_mmu(struct mmu_gather *tlb,
 		unsigned long start, unsigned long end)
 {
@@ -272,8 +254,17 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
 	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
 	 * forcefully if we detect parallel PTE batching threads.
 	 */
-	bool force = mm_tlb_flush_nested(tlb->mm);
+	if (mm_tlb_flush_nested(tlb->mm)) {
+		__tlb_reset_range(tlb);
+		__tlb_adjust_range(tlb, start, end - start);
+	}
 
-	arch_tlb_finish_mmu(tlb, start, end, force);
+	tlb_flush_mmu(tlb);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+	tlb_batch_list_free(tlb);
+#endif
 	dec_tlb_flush_pending(tlb->mm);
 }
-- 
GitLab


From b3fa8ed4e48802e6ba0aa5f3283313a27dcbf46f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 20 Sep 2018 10:51:01 +0200
Subject: [PATCH 0539/1861] asm-generic/tlb: Remove
 CONFIG_HAVE_GENERIC_MMU_GATHER

Since all architectures are now using it, it is redundant.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/tlb.h | 1 -
 mm/mmu_gather.c           | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index af20aa8255cde..2648a02a6b1be 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -146,7 +146,6 @@
  *
  *  Use this if your architecture lacks an efficient flush_tlb_range().
  */
-#define HAVE_GENERIC_MMU_GATHER
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 /*
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 60ef38d200c07..7f5b2b8aa9dd4 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -11,8 +11,6 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
-#ifdef HAVE_GENERIC_MMU_GATHER
-
 #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
 
 static bool tlb_next_batch(struct mmu_gather *tlb)
@@ -109,8 +107,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-#endif /* HAVE_GENERIC_MMU_GATHER */
-
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 
 /*
-- 
GitLab


From fa0aafb8acb684e68231ff0a547ed249f8dc31a5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 20 Sep 2018 10:54:04 +0200
Subject: [PATCH 0540/1861] asm-generic/tlb: Remove tlb_flush_mmu_free()

As the comment notes; it is a potentially dangerous operation. Just
use tlb_flush_mmu(), that will skip the (double) TLB invalidate if
it really isn't needed anyway.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/tlb.h | 10 +++-------
 mm/memory.c               |  2 +-
 mm/mmu_gather.c           |  2 +-
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 2648a02a6b1be..ddd3d02be93d2 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -67,16 +67,13 @@
  *    call before __tlb_remove_page*() to set the current page-size; implies a
  *    possible tlb_flush_mmu() call.
  *
- *  - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() / tlb_flush_mmu_free()
+ *  - tlb_flush_mmu() / tlb_flush_mmu_tlbonly()
  *
  *    tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets
  *                              related state, like the range)
  *
- *    tlb_flush_mmu_free() - frees the queued pages; make absolutely
- *			     sure no additional tlb_remove_page()
- *			     calls happen between _tlbonly() and this.
- *
- *    tlb_flush_mmu() - the above two calls.
+ *    tlb_flush_mmu() - in addition to the above TLB invalidate, also frees
+ *			whatever pages are still batched.
  *
  *  - mmu_gather::fullmm
  *
@@ -281,7 +278,6 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 			 unsigned long start, unsigned long end, bool force);
-void tlb_flush_mmu_free(struct mmu_gather *tlb);
 
 static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 				      unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 1aa5c03566f11..36aac68446627 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1155,7 +1155,7 @@ again:
 	 */
 	if (force_flush) {
 		force_flush = 0;
-		tlb_flush_mmu_free(tlb);
+		tlb_flush_mmu(tlb);
 		if (addr != end)
 			goto again;
 	}
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 7f5b2b8aa9dd4..35699a4d0a740 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -91,7 +91,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 
 #endif /* HAVE_MMU_GATHER_NO_GATHER */
 
-void tlb_flush_mmu_free(struct mmu_gather *tlb)
+static void tlb_flush_mmu_free(struct mmu_gather *tlb)
 {
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
-- 
GitLab


From 0a8caf211bcf52cbb59e100ead4908fe88d2a510 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 20 Sep 2018 10:55:10 +0200
Subject: [PATCH 0541/1861] asm-generic/tlb: Remove tlb_table_flush()

There are no external users of this API (nor should there be); remove it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/tlb.h |  1 -
 mm/mmu_gather.c           | 34 +++++++++++++++++-----------------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index ddd3d02be93d2..fc661b4675ffa 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -182,7 +182,6 @@ struct mmu_table_batch {
 #define MAX_TABLE_BATCH		\
 	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
 
-extern void tlb_table_flush(struct mmu_gather *tlb);
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
 #endif
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 35699a4d0a740..99740e1dd2730 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -91,22 +91,6 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 
 #endif /* HAVE_MMU_GATHER_NO_GATHER */
 
-static void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-	tlb_table_flush(tlb);
-#endif
-#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
-	tlb_batch_pages_flush(tlb);
-#endif
-}
-
-void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-	tlb_flush_mmu_tlbonly(tlb);
-	tlb_flush_mmu_free(tlb);
-}
-
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 
 /*
@@ -159,7 +143,7 @@ static void tlb_remove_table_rcu(struct rcu_head *head)
 	free_page((unsigned long)batch);
 }
 
-void tlb_table_flush(struct mmu_gather *tlb)
+static void tlb_table_flush(struct mmu_gather *tlb)
 {
 	struct mmu_table_batch **batch = &tlb->batch;
 
@@ -191,6 +175,22 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 
+static void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb_table_flush(tlb);
+#endif
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+	tlb_batch_pages_flush(tlb);
+#endif
+}
+
+void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	tlb_flush_mmu_tlbonly(tlb);
+	tlb_flush_mmu_free(tlb);
+}
+
 /**
  * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
  * @tlb: the mmu_gather structure to initialize
-- 
GitLab


From 6455959819bf2469190ae9f6b4ccebaa9827e884 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 19 Feb 2019 14:38:37 +0100
Subject: [PATCH 0542/1861] ia64/tlb: Eradicate tlb_migrate_finish() callback

Only ia64-sn2 uses this as an optimization, and there it is of
questionable correctness due to the mm_users==1 test.

Remove it entirely.

No change in behavior intended.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/core-api/cachetlb.rst | 10 ----------
 arch/ia64/include/asm/machvec.h     | 13 -------------
 arch/ia64/include/asm/machvec_sn2.h |  2 --
 arch/ia64/include/asm/tlb.h         |  2 --
 arch/ia64/sn/kernel/sn2/sn2_smp.c   |  7 -------
 arch/nds32/include/asm/tlbflush.h   |  1 -
 include/asm-generic/tlb.h           |  4 ----
 kernel/sched/core.c                 |  1 -
 8 files changed, 40 deletions(-)

diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst
index 6eb9d3f090cdf..93cb65d52720a 100644
--- a/Documentation/core-api/cachetlb.rst
+++ b/Documentation/core-api/cachetlb.rst
@@ -101,16 +101,6 @@ changes occur:
 	translations for software managed TLB configurations.
 	The sparc64 port currently does this.
 
-6) ``void tlb_migrate_finish(struct mm_struct *mm)``
-
-	This interface is called at the end of an explicit
-	process migration. This interface provides a hook
-	to allow a platform to update TLB or context-specific
-	information for the address space.
-
-	The ia64 sn2 platform is one example of a platform
-	that uses this interface.
-
 Next, we have the cache flushing interfaces.  In general, when Linux
 is changing an existing virtual-->physical mapping to a new value,
 the sequence will be in one of the following forms::
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 5133739966bcf..beae261fbcb41 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -30,7 +30,6 @@ typedef void ia64_mv_irq_init_t (void);
 typedef void ia64_mv_send_ipi_t (int, int, int, int);
 typedef void ia64_mv_timer_interrupt_t (int, void *);
 typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long);
-typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *);
 typedef u8 ia64_mv_irq_to_vector (int);
 typedef unsigned int ia64_mv_local_vector_to_irq (u8);
 typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *);
@@ -79,11 +78,6 @@ machvec_noop (void)
 {
 }
 
-static inline void
-machvec_noop_mm (struct mm_struct *mm)
-{
-}
-
 static inline void
 machvec_noop_task (struct task_struct *task)
 {
@@ -96,7 +90,6 @@ machvec_noop_bus (struct pci_bus *bus)
 
 extern void machvec_setup (char **);
 extern void machvec_timer_interrupt (int, void *);
-extern void machvec_tlb_migrate_finish (struct mm_struct *);
 
 # if defined (CONFIG_IA64_HP_SIM)
 #  include <asm/machvec_hpsim.h>
@@ -124,7 +117,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
 #  define platform_send_ipi	ia64_mv.send_ipi
 #  define platform_timer_interrupt	ia64_mv.timer_interrupt
 #  define platform_global_tlb_purge	ia64_mv.global_tlb_purge
-#  define platform_tlb_migrate_finish	ia64_mv.tlb_migrate_finish
 #  define platform_dma_init		ia64_mv.dma_init
 #  define platform_dma_get_ops		ia64_mv.dma_get_ops
 #  define platform_irq_to_vector	ia64_mv.irq_to_vector
@@ -167,7 +159,6 @@ struct ia64_machine_vector {
 	ia64_mv_send_ipi_t *send_ipi;
 	ia64_mv_timer_interrupt_t *timer_interrupt;
 	ia64_mv_global_tlb_purge_t *global_tlb_purge;
-	ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
 	ia64_mv_dma_init *dma_init;
 	ia64_mv_dma_get_ops *dma_get_ops;
 	ia64_mv_irq_to_vector *irq_to_vector;
@@ -206,7 +197,6 @@ struct ia64_machine_vector {
 	platform_send_ipi,			\
 	platform_timer_interrupt,		\
 	platform_global_tlb_purge,		\
-	platform_tlb_migrate_finish,		\
 	platform_dma_init,			\
 	platform_dma_get_ops,			\
 	platform_irq_to_vector,			\
@@ -270,9 +260,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *);
 #ifndef platform_global_tlb_purge
 # define platform_global_tlb_purge	ia64_global_tlb_purge /* default to architected version */
 #endif
-#ifndef platform_tlb_migrate_finish
-# define platform_tlb_migrate_finish	machvec_noop_mm
-#endif
 #ifndef platform_kernel_launch_event
 # define platform_kernel_launch_event	machvec_noop
 #endif
diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h
index b5153d3002897..a243e4fb4877d 100644
--- a/arch/ia64/include/asm/machvec_sn2.h
+++ b/arch/ia64/include/asm/machvec_sn2.h
@@ -34,7 +34,6 @@ extern ia64_mv_irq_init_t sn_irq_init;
 extern ia64_mv_send_ipi_t sn2_send_IPI;
 extern ia64_mv_timer_interrupt_t sn_timer_interrupt;
 extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge;
-extern ia64_mv_tlb_migrate_finish_t	sn_tlb_migrate_finish;
 extern ia64_mv_irq_to_vector sn_irq_to_vector;
 extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq;
 extern ia64_mv_pci_get_legacy_mem_t sn_pci_get_legacy_mem;
@@ -77,7 +76,6 @@ extern ia64_mv_pci_fixup_bus_t		sn_pci_fixup_bus;
 #define platform_send_ipi		sn2_send_IPI
 #define platform_timer_interrupt	sn_timer_interrupt
 #define platform_global_tlb_purge       sn2_global_tlb_purge
-#define platform_tlb_migrate_finish	sn_tlb_migrate_finish
 #define platform_pci_fixup		sn_pci_fixup
 #define platform_inb			__sn_inb
 #define platform_inw			__sn_inw
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 849fab9ccb333..86ec034ba4991 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -47,8 +47,6 @@
 #include <asm/tlbflush.h>
 #include <asm/machvec.h>
 
-#define tlb_migrate_finish(mm)	platform_tlb_migrate_finish(mm)
-
 #include <asm-generic/tlb.h>
 
 #endif /* _ASM_IA64_TLB_H */
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b73b0ebf82148..b510f4f17fd46 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -120,13 +120,6 @@ void sn_migrate(struct task_struct *task)
 		cpu_relax();
 }
 
-void sn_tlb_migrate_finish(struct mm_struct *mm)
-{
-	/* flush_tlb_mm is inefficient if more than 1 users of mm */
-	if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
-		flush_tlb_mm(mm);
-}
-
 static void
 sn2_ipi_flush_all_tlb(struct mm_struct *mm)
 {
diff --git a/arch/nds32/include/asm/tlbflush.h b/arch/nds32/include/asm/tlbflush.h
index 9b411f4019036..38ee769b18d8a 100644
--- a/arch/nds32/include/asm/tlbflush.h
+++ b/arch/nds32/include/asm/tlbflush.h
@@ -42,6 +42,5 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
 
 void update_mmu_cache(struct vm_area_struct *vma,
 		      unsigned long address, pte_t * pte);
-void tlb_migrate_finish(struct mm_struct *mm);
 
 #endif
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index fc661b4675ffa..b9edc7608d901 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -604,8 +604,4 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
 
 #endif /* CONFIG_MMU */
 
-#ifndef tlb_migrate_finish
-#define tlb_migrate_finish(mm) do {} while (0)
-#endif
-
 #endif /* _ASM_GENERIC__TLB_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4778c48a7fda4..ade3f2287d1f6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1151,7 +1151,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 		/* Need help from migration thread: drop lock and wait. */
 		task_rq_unlock(rq, p, &rf);
 		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-		tlb_migrate_finish(p->mm);
 		return 0;
 	} else if (task_on_rq_queued(p)) {
 		/*
-- 
GitLab


From e74deb11931ff682b59d5b9d387f7115f689698e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 3 Apr 2019 09:39:48 +0200
Subject: [PATCH 0543/1861] x86/uaccess: Introduce user_access_{save,restore}()

Introduce common helpers for when we need to safely suspend a
uaccess section; for instance to generate a {KA,UB}SAN report.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/smap.h    | 20 ++++++++++++++++++++
 arch/x86/include/asm/uaccess.h |  3 +++
 include/linux/uaccess.h        |  2 ++
 3 files changed, 25 insertions(+)

diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index db333300bd4be..6cfe431710203 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -58,6 +58,23 @@ static __always_inline void stac(void)
 	alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
 }
 
+static __always_inline unsigned long smap_save(void)
+{
+	unsigned long flags;
+
+	asm volatile (ALTERNATIVE("", "pushf; pop %0; " __stringify(__ASM_CLAC),
+				  X86_FEATURE_SMAP)
+		      : "=rm" (flags) : : "memory", "cc");
+
+	return flags;
+}
+
+static __always_inline void smap_restore(unsigned long flags)
+{
+	asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+		      : : "g" (flags) : "memory", "cc");
+}
+
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
 	ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
@@ -69,6 +86,9 @@ static __always_inline void stac(void)
 static inline void clac(void) { }
 static inline void stac(void) { }
 
+static inline unsigned long smap_save(void) { return 0; }
+static inline void smap_restore(unsigned long flags) { }
+
 #define ASM_CLAC
 #define ASM_STAC
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index ae5783b5fab01..5ca7b91faf67c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -715,6 +715,9 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt
 #define user_access_begin(a,b)	user_access_begin(a,b)
 #define user_access_end()	__uaccess_end()
 
+#define user_access_save()	smap_save()
+#define user_access_restore(x)	smap_restore(x)
+
 #define unsafe_put_user(x, ptr, label)	\
 	__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
 
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 37b226e8df13f..2b70130af5857 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -268,6 +268,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
 #define user_access_end() do { } while (0)
 #define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
 #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
+static inline unsigned long user_access_save(void) { return 0UL; }
+static inline void user_access_restore(unsigned long flags) { }
 #endif
 
 #ifdef CONFIG_HARDENED_USERCOPY
-- 
GitLab


From a936af8ea3580855adcc80daa8e01c0196afeb15 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Mar 2019 14:44:44 +0100
Subject: [PATCH 0544/1861] x86/smap: Ditch __stringify()

Linus noticed all users of __ASM_STAC/__ASM_CLAC are with
__stringify(). Just make them a string.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/smap.h | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 6cfe431710203..f94a7d0ddd490 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -13,13 +13,12 @@
 #ifndef _ASM_X86_SMAP_H
 #define _ASM_X86_SMAP_H
 
-#include <linux/stringify.h>
 #include <asm/nops.h>
 #include <asm/cpufeatures.h>
 
 /* "Raw" instruction opcodes */
-#define __ASM_CLAC	.byte 0x0f,0x01,0xca
-#define __ASM_STAC	.byte 0x0f,0x01,0xcb
+#define __ASM_CLAC	".byte 0x0f,0x01,0xca"
+#define __ASM_STAC	".byte 0x0f,0x01,0xcb"
 
 #ifdef __ASSEMBLY__
 
@@ -28,10 +27,10 @@
 #ifdef CONFIG_X86_SMAP
 
 #define ASM_CLAC \
-	ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+	ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
 
 #define ASM_STAC \
-	ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
+	ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
 
 #else /* CONFIG_X86_SMAP */
 
@@ -49,20 +48,20 @@
 static __always_inline void clac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
-	alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+	alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
 }
 
 static __always_inline void stac(void)
 {
 	/* Note: a barrier is implicit in alternative() */
-	alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+	alternative("", __ASM_STAC, X86_FEATURE_SMAP);
 }
 
 static __always_inline unsigned long smap_save(void)
 {
 	unsigned long flags;
 
-	asm volatile (ALTERNATIVE("", "pushf; pop %0; " __stringify(__ASM_CLAC),
+	asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
 				  X86_FEATURE_SMAP)
 		      : "=rm" (flags) : : "memory", "cc");
 
@@ -77,9 +76,9 @@ static __always_inline void smap_restore(unsigned long flags)
 
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
-	ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+	ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP)
 #define ASM_STAC \
-	ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+	ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
 
 #else /* CONFIG_X86_SMAP */
 
-- 
GitLab


From 57b78a62e7f23c4686fe54091cdc3d12e60d6513 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 3 Apr 2019 09:39:50 +0200
Subject: [PATCH 0545/1861] x86/uaccess, kasan: Fix KASAN vs SMAP

KASAN inserts extra code for every LOAD/STORE emitted by te compiler.
Much of this code is simple and safe to run with AC=1, however the
kasan_report() function, called on error, is most certainly not safe
to call with AC=1.

Therefore wrap kasan_report() in user_access_{save,restore}; which for
x86 SMAP, saves/restores EFLAGS and clears AC before calling the real
function.

Also ensure all the functions are without __fentry__ hook. The
function tracer is also not safe.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 mm/kasan/Makefile |  3 +++
 mm/kasan/common.c | 10 ++++++++++
 mm/kasan/report.c |  3 +--
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 5d1065efbd476..613dfe681e9fc 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -2,11 +2,13 @@
 KASAN_SANITIZE := n
 UBSAN_SANITIZE_common.o := n
 UBSAN_SANITIZE_generic.o := n
+UBSAN_SANITIZE_generic_report.o := n
 UBSAN_SANITIZE_tags.o := n
 KCOV_INSTRUMENT := n
 
 CFLAGS_REMOVE_common.o = -pg
 CFLAGS_REMOVE_generic.o = -pg
+CFLAGS_REMOVE_generic_report.o = -pg
 CFLAGS_REMOVE_tags.o = -pg
 
 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1
@@ -14,6 +16,7 @@ CFLAGS_REMOVE_tags.o = -pg
 
 CFLAGS_common.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 CFLAGS_generic.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+CFLAGS_generic_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 CFLAGS_tags.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 
 obj-$(CONFIG_KASAN) := common.o init.o report.o
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 80bbe62b16cd2..09c5864745116 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,6 +36,7 @@
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 #include <linux/bug.h>
+#include <linux/uaccess.h>
 
 #include "kasan.h"
 #include "../slab.h"
@@ -614,6 +615,15 @@ void kasan_free_shadow(const struct vm_struct *vm)
 		vfree(kasan_mem_to_shadow(vm->addr));
 }
 
+extern void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip);
+
+void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip)
+{
+	unsigned long flags = user_access_save();
+	__kasan_report(addr, size, is_write, ip);
+	user_access_restore(flags);
+}
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 static bool shadow_mapped(unsigned long addr)
 {
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index ca9418fe9232a..0772820ad0988 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -281,8 +281,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
 	end_report(&flags);
 }
 
-void kasan_report(unsigned long addr, size_t size,
-		bool is_write, unsigned long ip)
+void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip)
 {
 	struct kasan_access_info info;
 	void *tagged_addr;
-- 
GitLab


From d08965a27e84ca090b504844d50c24fc98587b11 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 3 Apr 2019 09:40:16 +0200
Subject: [PATCH 0546/1861] x86/uaccess, ubsan: Fix UBSAN vs. SMAP

UBSAN can insert extra code in random locations; including AC=1
sections. Typically this code is not safe and needs wrapping.

So far, only __ubsan_handle_type_mismatch* have been observed in AC=1
sections and therefore only those are annotated.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/Makefile | 1 +
 lib/ubsan.c  | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/lib/Makefile b/lib/Makefile
index 3b08673e8881a..c4563e7c47ba8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -268,6 +268,7 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
 obj-$(CONFIG_UBSAN) += ubsan.o
 
 UBSAN_SANITIZE_ubsan.o := n
+CFLAGS_ubsan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 
 obj-$(CONFIG_SBITMAP) += sbitmap.o
 
diff --git a/lib/ubsan.c b/lib/ubsan.c
index e4162f59a81cc..c8e905bfb627e 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/uaccess.h>
 
 #include "ubsan.h"
 
@@ -313,6 +314,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
 static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
 				unsigned long ptr)
 {
+	unsigned long flags = user_access_save();
 
 	if (!ptr)
 		handle_null_ptr_deref(data);
@@ -320,6 +322,8 @@ static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
 		handle_misaligned_access(data, ptr);
 	else
 		handle_object_size_mismatch(data, ptr);
+
+	user_access_restore(flags);
 }
 
 void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
-- 
GitLab


From 4a6c91fbdef846ec7250b82f2eeeb87ac5f18cf9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Mar 2019 11:09:13 +0100
Subject: [PATCH 0547/1861] x86/uaccess, ftrace: Fix ftrace_likely_update() vs.
 SMAP

For CONFIG_TRACE_BRANCH_PROFILING=y the likely/unlikely things get
overloaded and generate callouts to this code, and thus also when
AC=1.

Make it safe.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/trace/trace_branch.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4ad967453b6fb..3ea65cdff30d5 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -205,6 +205,8 @@ void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
 void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 			  int expect, int is_constant)
 {
+	unsigned long flags = user_access_save();
+
 	/* A constant is always correct */
 	if (is_constant) {
 		f->constant++;
@@ -223,6 +225,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 		f->data.correct++;
 	else
 		f->data.incorrect++;
+
+	user_access_restore(flags);
 }
 EXPORT_SYMBOL(ftrace_likely_update);
 
-- 
GitLab


From 40ea97290b08be2e038b31cbb33097d1145e8169 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Mar 2019 19:54:25 +0100
Subject: [PATCH 0548/1861] x86/uaccess, kcov: Disable stack protector

New tooling noticed this mishap:

  kernel/kcov.o: warning: objtool: write_comp_data()+0x138: call to __stack_chk_fail() with UACCESS enabled
  kernel/kcov.o: warning: objtool: __sanitizer_cov_trace_pc()+0xd9: call to __stack_chk_fail() with UACCESS enabled

All the other instrumentation (KASAN,UBSAN) also have stack protector
disabled.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/Makefile b/kernel/Makefile
index 6c57e78817dad..62471e75a2b0a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -30,6 +30,7 @@ KCOV_INSTRUMENT_extable.o := n
 # Don't self-instrument.
 KCOV_INSTRUMENT_kcov.o := n
 KASAN_SANITIZE_kcov.o := n
+CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
 
 # cond_syscall is currently not LTO compatible
 CFLAGS_sys_ni.o = $(DISABLE_LTO)
-- 
GitLab


From a4d09dde9093a04a9b48fb9e5ef3177bdfaff199 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 25 Feb 2019 10:31:24 +0100
Subject: [PATCH 0549/1861] objtool: Set insn->func for alternatives

In preparation of function attributes, we need each instruction to
have a valid link back to its function.

Therefore make sure we set the function association for alternative
instruction sequences; they are, after all, still part of the function.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 110ea3d847724..950d0f62d22b3 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -695,6 +695,7 @@ static int handle_group_alt(struct objtool_file *file,
 		last_new_insn = insn;
 
 		insn->ignore = orig_insn->ignore_alts;
+		insn->func = orig_insn->func;
 
 		if (insn->type != INSN_JUMP_CONDITIONAL &&
 		    insn->type != INSN_JUMP_UNCONDITIONAL)
-- 
GitLab


From 09f30d83d33029faf6377a86f5ae80a658af9254 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 28 Feb 2019 14:17:50 +0100
Subject: [PATCH 0550/1861] objtool: Handle function aliases

Function aliases result in different symbols for the same set of
instructions; track a canonical symbol so there is a unique point of
access.

This again prepares the way for function attributes. And in particular
the need for aliases comes from how KASAN uses them.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/elf.c | 15 +++++++++++----
 tools/objtool/elf.h |  2 +-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index b8f3cca8e58b4..dd198d53387df 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -219,7 +219,7 @@ static int read_sections(struct elf *elf)
 static int read_symbols(struct elf *elf)
 {
 	struct section *symtab, *sec;
-	struct symbol *sym, *pfunc;
+	struct symbol *sym, *pfunc, *alias;
 	struct list_head *entry, *tmp;
 	int symbols_nr, i;
 	char *coldstr;
@@ -239,6 +239,7 @@ static int read_symbols(struct elf *elf)
 			return -1;
 		}
 		memset(sym, 0, sizeof(*sym));
+		alias = sym;
 
 		sym->idx = i;
 
@@ -288,11 +289,17 @@ static int read_symbols(struct elf *elf)
 				break;
 			}
 
-			if (sym->offset == s->offset && sym->len >= s->len) {
-				entry = tmp;
-				break;
+			if (sym->offset == s->offset) {
+				if (sym->len == s->len && alias == sym)
+					alias = s;
+
+				if (sym->len >= s->len) {
+					entry = tmp;
+					break;
+				}
 			}
 		}
+		sym->alias = alias;
 		list_add(&sym->list, entry);
 		hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
 	}
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index bc97ed86b9cd8..968265b4b4cd8 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -61,7 +61,7 @@ struct symbol {
 	unsigned char bind, type;
 	unsigned long offset;
 	unsigned int len;
-	struct symbol *pfunc, *cfunc;
+	struct symbol *pfunc, *cfunc, *alias;
 };
 
 struct rela {
-- 
GitLab


From aaf5c623b915d64beba676b8c2e9708d1fda94d6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 27 Feb 2019 14:04:13 +0100
Subject: [PATCH 0551/1861] objtool: Rewrite add_ignores()

The whole add_ignores() thing was wildly weird; rewrite it according
to 'modern' ways.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 51 +++++++++++++++++--------------------------
 tools/objtool/check.h |  1 -
 2 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 950d0f62d22b3..8d8191f25381b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -104,29 +104,6 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
-/*
- * Check if the function has been manually whitelisted with the
- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
- * due to its use of a context switching instruction.
- */
-static bool ignore_func(struct objtool_file *file, struct symbol *func)
-{
-	struct rela *rela;
-
-	/* check for STACK_FRAME_NON_STANDARD */
-	if (file->whitelist && file->whitelist->rela)
-		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
-			if (rela->sym->type == STT_SECTION &&
-			    rela->sym->sec == func->sec &&
-			    rela->addend == func->offset)
-				return true;
-			if (rela->sym->type == STT_FUNC && rela->sym == func)
-				return true;
-		}
-
-	return false;
-}
-
 /*
  * This checks to see if the given function is a "noreturn" function.
  *
@@ -436,18 +413,31 @@ static void add_ignores(struct objtool_file *file)
 	struct instruction *insn;
 	struct section *sec;
 	struct symbol *func;
+	struct rela *rela;
 
-	for_each_sec(file, sec) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
+	sec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard");
+	if (!sec)
+		return;
 
-			if (!ignore_func(file, func))
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		switch (rela->sym->type) {
+		case STT_FUNC:
+			func = rela->sym;
+			break;
+
+		case STT_SECTION:
+			func = find_symbol_by_offset(rela->sym->sec, rela->addend);
+			if (!func || func->type != STT_FUNC)
 				continue;
+			break;
 
-			func_for_each_insn_all(file, func, insn)
-				insn->ignore = true;
+		default:
+			WARN("unexpected relocation symbol type in %s: %d", sec->name, rela->sym->type);
+			continue;
 		}
+
+		func_for_each_insn_all(file, func, insn)
+			insn->ignore = true;
 	}
 }
 
@@ -2199,7 +2189,6 @@ int check(const char *_objname, bool orc)
 
 	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
-	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
 	file.c_file = find_section_by_name(file.elf, ".comment");
 	file.ignore_unreachables = no_unreachable;
 	file.hints = false;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index e6e8a655b5563..d8896eb435212 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -60,7 +60,6 @@ struct objtool_file {
 	struct elf *elf;
 	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 16);
-	struct section *whitelist;
 	bool ignore_unreachables, c_file, hints, rodata;
 };
 
-- 
GitLab


From 7697eee3ddd768a1fd78c1e687afaa6c5aa5072d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 1 Mar 2019 11:15:49 +0100
Subject: [PATCH 0552/1861] objtool: Add --backtrace support

For when you want to know the path that reached your fail state:

  $ ./objtool check --no-fp --backtrace arch/x86/lib/usercopy_64.o
  arch/x86/lib/usercopy_64.o: warning: objtool: .altinstr_replacement+0x3: UACCESS disable without MEMOPs: __clear_user()
  arch/x86/lib/usercopy_64.o: warning: objtool:   __clear_user()+0x3a: (alt)
  arch/x86/lib/usercopy_64.o: warning: objtool:   __clear_user()+0x2e: (branch)
  arch/x86/lib/usercopy_64.o: warning: objtool:   __clear_user()+0x18: (branch)
  arch/x86/lib/usercopy_64.o: warning: objtool:   .altinstr_replacement+0xffffffffffffffff: (branch)
  arch/x86/lib/usercopy_64.o: warning: objtool:   __clear_user()+0x5: (alt)
  arch/x86/lib/usercopy_64.o: warning: objtool:   __clear_user()+0x0: <=== (func)

  0000000000000000 <__clear_user>:
    0:   e8 00 00 00 00          callq  5 <__clear_user+0x5>
                 1: R_X86_64_PLT32       __fentry__-0x4
    5:   90                      nop
    6:   90                      nop
    7:   90                      nop
    8:   48 89 f0                mov    %rsi,%rax
    b:   48 c1 ee 03             shr    $0x3,%rsi
    f:   83 e0 07                and    $0x7,%eax
   12:   48 89 f1                mov    %rsi,%rcx
   15:   48 85 c9                test   %rcx,%rcx
   18:   74 0f                   je     29 <__clear_user+0x29>
   1a:   48 c7 07 00 00 00 00    movq   $0x0,(%rdi)
   21:   48 83 c7 08             add    $0x8,%rdi
   25:   ff c9                   dec    %ecx
   27:   75 f1                   jne    1a <__clear_user+0x1a>
   29:   48 89 c1                mov    %rax,%rcx
   2c:   85 c9                   test   %ecx,%ecx
   2e:   74 0a                   je     3a <__clear_user+0x3a>
   30:   c6 07 00                movb   $0x0,(%rdi)
   33:   48 ff c7                inc    %rdi
   36:   ff c9                   dec    %ecx
   38:   75 f6                   jne    30 <__clear_user+0x30>
   3a:   90                      nop
   3b:   90                      nop
   3c:   90                      nop
   3d:   48 89 c8                mov    %rcx,%rax
   40:   c3                      retq

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/builtin-check.c |  3 ++-
 tools/objtool/builtin.h       |  2 +-
 tools/objtool/check.c         | 18 ++++++++++++++----
 tools/objtool/warn.h          |  8 ++++++++
 4 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 694abc628e9b3..99f10c585cbee 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool no_fp, no_unreachable, retpoline, module;
+bool no_fp, no_unreachable, retpoline, module, backtrace;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -41,6 +41,7 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
 	OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
 	OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+	OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
 	OPT_END(),
 };
 
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index 28ff40e19a141..65fd3cc3c98b5 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -20,7 +20,7 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8d8191f25381b..ccc66af5907f5 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1885,8 +1885,11 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 		if (!insn->ignore_alts) {
 			list_for_each_entry(alt, &insn->alts, list) {
 				ret = validate_branch(file, alt->insn, state);
-				if (ret)
-					return 1;
+				if (ret) {
+					if (backtrace)
+						BT_FUNC("(alt)", insn);
+					return ret;
+				}
 			}
 		}
 
@@ -1933,8 +1936,11 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			     insn->jump_dest->func->pfunc == func)) {
 				ret = validate_branch(file, insn->jump_dest,
 						      state);
-				if (ret)
-					return 1;
+				if (ret) {
+					if (backtrace)
+						BT_FUNC("(branch)", insn);
+					return ret;
+				}
 
 			} else if (func && has_modified_stack_frame(&state)) {
 				WARN_FUNC("sibling call from callable instruction with modified stack frame",
@@ -2005,6 +2011,8 @@ static int validate_unwind_hints(struct objtool_file *file)
 	for_each_insn(file, insn) {
 		if (insn->hint && !insn->visited) {
 			ret = validate_branch(file, insn, state);
+			if (ret && backtrace)
+				BT_FUNC("<=== (hint)", insn);
 			warnings += ret;
 		}
 	}
@@ -2133,6 +2141,8 @@ static int validate_functions(struct objtool_file *file)
 				continue;
 
 			ret = validate_branch(file, insn, state);
+			if (ret && backtrace)
+				BT_FUNC("<=== (func)", insn);
 			warnings += ret;
 		}
 	}
diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
index afd9f7a05f6d1..f4fbb972b611c 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/warn.h
@@ -64,6 +64,14 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 	free(_str);					\
 })
 
+#define BT_FUNC(format, insn, ...)			\
+({							\
+	struct instruction *_insn = (insn);		\
+	char *_str = offstr(_insn->sec, _insn->offset); \
+	WARN("  %s: " format, _str, ##__VA_ARGS__);	\
+	free(_str);					\
+})
+
 #define WARN_ELF(format, ...)				\
 	WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
 
-- 
GitLab


From 764eef4b109ae11e6c987de9c14fc7c482041be0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 1 Mar 2019 11:19:03 +0100
Subject: [PATCH 0553/1861] objtool: Rewrite alt->skip_orig

Really skip the original instruction flow, instead of letting it
continue with NOPs.

Since the alternative code flow already continues after the original
instructions, only the alt-original is skipped.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index ccc66af5907f5..5264a305d6583 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -31,6 +31,7 @@
 struct alternative {
 	struct list_head list;
 	struct instruction *insn;
+	bool skip_orig;
 };
 
 const char *objname;
@@ -623,9 +624,6 @@ static int add_call_destinations(struct objtool_file *file)
  *    conditionally jumps to the _end_ of the entry.  We have to modify these
  *    jumps' destinations to point back to .text rather than the end of the
  *    entry in .altinstr_replacement.
- *
- * 4. It has been requested that we don't validate the !POPCNT feature path
- *    which is a "very very small percentage of machines".
  */
 static int handle_group_alt(struct objtool_file *file,
 			    struct special_alt *special_alt,
@@ -641,9 +639,6 @@ static int handle_group_alt(struct objtool_file *file,
 		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
 			break;
 
-		if (special_alt->skip_orig)
-			insn->type = INSN_NOP;
-
 		insn->alt_group = true;
 		last_orig_insn = insn;
 	}
@@ -808,6 +803,7 @@ static int add_special_section_alts(struct objtool_file *file)
 		}
 
 		alt->insn = new_insn;
+		alt->skip_orig = special_alt->skip_orig;
 		list_add_tail(&alt->list, &orig_insn->alts);
 
 		list_del(&special_alt->list);
@@ -1883,7 +1879,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 		insn->visited = true;
 
 		if (!insn->ignore_alts) {
+			bool skip_orig = false;
+
 			list_for_each_entry(alt, &insn->alts, list) {
+				if (alt->skip_orig)
+					skip_orig = true;
+
 				ret = validate_branch(file, alt->insn, state);
 				if (ret) {
 					if (backtrace)
@@ -1891,6 +1892,9 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 					return ret;
 				}
 			}
+
+			if (skip_orig)
+				return 0;
 		}
 
 		switch (insn->type) {
-- 
GitLab


From 54262aa2830151f89699fa8a6c5aa05f0992e672 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Mar 2019 12:58:15 +0100
Subject: [PATCH 0554/1861] objtool: Fix sibling call detection

It turned out that we failed to detect some sibling calls;
specifically those without relocation records; like:

  $ ./objdump-func.sh defconfig-build/mm/kasan/generic.o __asan_loadN
  0000 0000000000000840 <__asan_loadN>:
  0000  840:      48 8b 0c 24             mov    (%rsp),%rcx
  0004  844:      31 d2                   xor    %edx,%edx
  0006  846:      e9 45 fe ff ff          jmpq   690 <check_memory_region>

So extend the cross-function jump to also consider those that are not
between known (or newly detected) parent/child functions, as
sibling-cals when they jump to the start of the function.

The second part of that condition is to deal with random jumps to the
middle of other function, as can be found in
arch/x86/lib/copy_user_64.S for example.

This then (with later patches applied) makes the above recognise the
sibling call:

  mm/kasan/generic.o: warning: objtool: __asan_loadN()+0x6: call to check_memory_region() with UACCESS enabled

Also make sure to set insn->call_dest for sibling calls so we can know
who we're calling. This is useful information when printing validation
warnings later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/check.c | 86 +++++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 31 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5264a305d6583..8118361295ddd 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -515,7 +515,8 @@ static int add_jump_destinations(struct objtool_file *file)
 			continue;
 		} else {
 			/* sibling call */
-			insn->jump_dest = 0;
+			insn->call_dest = rela->sym;
+			insn->jump_dest = NULL;
 			continue;
 		}
 
@@ -537,25 +538,38 @@ static int add_jump_destinations(struct objtool_file *file)
 		}
 
 		/*
-		 * For GCC 8+, create parent/child links for any cold
-		 * subfunctions.  This is _mostly_ redundant with a similar
-		 * initialization in read_symbols().
-		 *
-		 * If a function has aliases, we want the *first* such function
-		 * in the symbol table to be the subfunction's parent.  In that
-		 * case we overwrite the initialization done in read_symbols().
-		 *
-		 * However this code can't completely replace the
-		 * read_symbols() code because this doesn't detect the case
-		 * where the parent function's only reference to a subfunction
-		 * is through a switch table.
+		 * Cross-function jump.
 		 */
 		if (insn->func && insn->jump_dest->func &&
-		    insn->func != insn->jump_dest->func &&
-		    !strstr(insn->func->name, ".cold.") &&
-		    strstr(insn->jump_dest->func->name, ".cold.")) {
-			insn->func->cfunc = insn->jump_dest->func;
-			insn->jump_dest->func->pfunc = insn->func;
+		    insn->func != insn->jump_dest->func) {
+
+			/*
+			 * For GCC 8+, create parent/child links for any cold
+			 * subfunctions.  This is _mostly_ redundant with a
+			 * similar initialization in read_symbols().
+			 *
+			 * If a function has aliases, we want the *first* such
+			 * function in the symbol table to be the subfunction's
+			 * parent.  In that case we overwrite the
+			 * initialization done in read_symbols().
+			 *
+			 * However this code can't completely replace the
+			 * read_symbols() code because this doesn't detect the
+			 * case where the parent function's only reference to a
+			 * subfunction is through a switch table.
+			 */
+			if (!strstr(insn->func->name, ".cold.") &&
+			    strstr(insn->jump_dest->func->name, ".cold.")) {
+				insn->func->cfunc = insn->jump_dest->func;
+				insn->jump_dest->func->pfunc = insn->func;
+
+			} else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
+				   insn->jump_dest->offset == insn->jump_dest->func->offset) {
+
+				/* sibling class */
+				insn->call_dest = insn->jump_dest->func;
+				insn->jump_dest = NULL;
+			}
 		}
 	}
 
@@ -1785,6 +1799,17 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state)
 	return false;
 }
 
+static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
+{
+	if (has_modified_stack_frame(state)) {
+		WARN_FUNC("sibling call from callable instruction with modified stack frame",
+				insn->sec, insn->offset);
+		return 1;
+	}
+
+	return 0;
+}
+
 /*
  * Follow the branch starting at the given instruction, and recursively follow
  * any other branches (jumps).  Meanwhile, track the frame pointer state at
@@ -1935,9 +1960,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 
 		case INSN_JUMP_CONDITIONAL:
 		case INSN_JUMP_UNCONDITIONAL:
-			if (insn->jump_dest &&
-			    (!func || !insn->jump_dest->func ||
-			     insn->jump_dest->func->pfunc == func)) {
+			if (func && !insn->jump_dest) {
+				ret = validate_sibling_call(insn, &state);
+				if (ret)
+					return ret;
+
+			} else if (insn->jump_dest &&
+				   (!func || !insn->jump_dest->func ||
+				    insn->jump_dest->func->pfunc == func)) {
 				ret = validate_branch(file, insn->jump_dest,
 						      state);
 				if (ret) {
@@ -1945,11 +1975,6 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 						BT_FUNC("(branch)", insn);
 					return ret;
 				}
-
-			} else if (func && has_modified_stack_frame(&state)) {
-				WARN_FUNC("sibling call from callable instruction with modified stack frame",
-					  sec, insn->offset);
-				return 1;
 			}
 
 			if (insn->type == INSN_JUMP_UNCONDITIONAL)
@@ -1958,11 +1983,10 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			break;
 
 		case INSN_JUMP_DYNAMIC:
-			if (func && list_empty(&insn->alts) &&
-			    has_modified_stack_frame(&state)) {
-				WARN_FUNC("sibling call from callable instruction with modified stack frame",
-					  sec, insn->offset);
-				return 1;
+			if (func && list_empty(&insn->alts)) {
+				ret = validate_sibling_call(insn, &state);
+				if (ret)
+					return ret;
 			}
 
 			return 0;
-- 
GitLab


From ea24213d8088f9da73e1b6aadf7abd2435b70397 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 25 Feb 2019 12:50:09 +0100
Subject: [PATCH 0555/1861] objtool: Add UACCESS validation

It is important that UACCESS regions are as small as possible;
furthermore the UACCESS state is not scheduled, so doing anything that
might directly call into the scheduler will cause random code to be
ran with UACCESS enabled.

Teach objtool too track UACCESS state and warn about any CALL made
while UACCESS is enabled. This very much includes the __fentry__()
and __preempt_schedule() calls.

Note that exceptions _do_ save/restore the UACCESS state, and therefore
they can drive preemption. This also means that all exception handlers
must have an otherwise redundant UACCESS disable instruction;
therefore ignore this warning for !STT_FUNC code (exception handlers
are not normal functions).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 scripts/Makefile.build          |   3 +
 tools/objtool/arch.h            |   6 +-
 tools/objtool/arch/x86/decode.c |  13 ++-
 tools/objtool/builtin-check.c   |   3 +-
 tools/objtool/builtin.h         |   2 +-
 tools/objtool/check.c           | 197 +++++++++++++++++++++++++++++---
 tools/objtool/check.h           |   3 +-
 tools/objtool/elf.h             |   1 +
 tools/objtool/special.c         |  18 +++
 tools/objtool/special.h         |   1 +
 10 files changed, 226 insertions(+), 21 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 76ca30cc47919..0c5969fa795f8 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -222,6 +222,9 @@ endif
 ifdef CONFIG_RETPOLINE
   objtool_args += --retpoline
 endif
+ifdef CONFIG_X86_SMAP
+  objtool_args += --uaccess
+endif
 
 # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
 # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index b0d7dc3d71b5a..467c2fe798a9e 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -33,7 +33,9 @@
 #define INSN_STACK		8
 #define INSN_BUG		9
 #define INSN_NOP		10
-#define INSN_OTHER		11
+#define INSN_STAC		11
+#define INSN_CLAC		12
+#define INSN_OTHER		13
 #define INSN_LAST		INSN_OTHER
 
 enum op_dest_type {
@@ -41,6 +43,7 @@ enum op_dest_type {
 	OP_DEST_REG_INDIRECT,
 	OP_DEST_MEM,
 	OP_DEST_PUSH,
+	OP_DEST_PUSHF,
 	OP_DEST_LEAVE,
 };
 
@@ -55,6 +58,7 @@ enum op_src_type {
 	OP_SRC_REG_INDIRECT,
 	OP_SRC_CONST,
 	OP_SRC_POP,
+	OP_SRC_POPF,
 	OP_SRC_ADD,
 	OP_SRC_AND,
 };
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 540a209b78ab3..ab20a96fee50d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -357,19 +357,26 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		/* pushf */
 		*type = INSN_STACK;
 		op->src.type = OP_SRC_CONST;
-		op->dest.type = OP_DEST_PUSH;
+		op->dest.type = OP_DEST_PUSHF;
 		break;
 
 	case 0x9d:
 		/* popf */
 		*type = INSN_STACK;
-		op->src.type = OP_SRC_POP;
+		op->src.type = OP_SRC_POPF;
 		op->dest.type = OP_DEST_MEM;
 		break;
 
 	case 0x0f:
 
-		if (op2 >= 0x80 && op2 <= 0x8f) {
+		if (op2 == 0x01) {
+
+			if (modrm == 0xca)
+				*type = INSN_CLAC;
+			else if (modrm == 0xcb)
+				*type = INSN_STAC;
+
+		} else if (op2 >= 0x80 && op2 <= 0x8f) {
 
 			*type = INSN_JUMP_CONDITIONAL;
 
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 99f10c585cbee..f3b378126011f 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool no_fp, no_unreachable, retpoline, module, backtrace;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess;
 
 static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
@@ -42,6 +42,7 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
 	OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
 	OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+	OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
 	OPT_END(),
 };
 
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index 65fd3cc3c98b5..69762f9c5602c 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -20,7 +20,7 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8118361295ddd..965e954e07f43 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -442,6 +442,82 @@ static void add_ignores(struct objtool_file *file)
 	}
 }
 
+/*
+ * This is a whitelist of functions that is allowed to be called with AC set.
+ * The list is meant to be minimal and only contains compiler instrumentation
+ * ABI and a few functions used to implement *_{to,from}_user() functions.
+ *
+ * These functions must not directly change AC, but may PUSHF/POPF.
+ */
+static const char *uaccess_safe_builtin[] = {
+	/* KASAN */
+	"kasan_report",
+	"check_memory_region",
+	/* KASAN out-of-line */
+	"__asan_loadN_noabort",
+	"__asan_load1_noabort",
+	"__asan_load2_noabort",
+	"__asan_load4_noabort",
+	"__asan_load8_noabort",
+	"__asan_load16_noabort",
+	"__asan_storeN_noabort",
+	"__asan_store1_noabort",
+	"__asan_store2_noabort",
+	"__asan_store4_noabort",
+	"__asan_store8_noabort",
+	"__asan_store16_noabort",
+	/* KASAN in-line */
+	"__asan_report_load_n_noabort",
+	"__asan_report_load1_noabort",
+	"__asan_report_load2_noabort",
+	"__asan_report_load4_noabort",
+	"__asan_report_load8_noabort",
+	"__asan_report_load16_noabort",
+	"__asan_report_store_n_noabort",
+	"__asan_report_store1_noabort",
+	"__asan_report_store2_noabort",
+	"__asan_report_store4_noabort",
+	"__asan_report_store8_noabort",
+	"__asan_report_store16_noabort",
+	/* KCOV */
+	"write_comp_data",
+	"__sanitizer_cov_trace_pc",
+	"__sanitizer_cov_trace_const_cmp1",
+	"__sanitizer_cov_trace_const_cmp2",
+	"__sanitizer_cov_trace_const_cmp4",
+	"__sanitizer_cov_trace_const_cmp8",
+	"__sanitizer_cov_trace_cmp1",
+	"__sanitizer_cov_trace_cmp2",
+	"__sanitizer_cov_trace_cmp4",
+	"__sanitizer_cov_trace_cmp8",
+	/* UBSAN */
+	"ubsan_type_mismatch_common",
+	"__ubsan_handle_type_mismatch",
+	"__ubsan_handle_type_mismatch_v1",
+	/* misc */
+	"csum_partial_copy_generic",
+	"__memcpy_mcsafe",
+	"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
+	NULL
+};
+
+static void add_uaccess_safe(struct objtool_file *file)
+{
+	struct symbol *func;
+	const char **name;
+
+	if (!uaccess)
+		return;
+
+	for (name = uaccess_safe_builtin; *name; name++) {
+		func = find_symbol_by_name(file->elf, *name);
+		if (!func)
+			continue;
+
+		func->alias->uaccess_safe = true;
+	}
+}
+
 /*
  * FIXME: For now, just ignore any alternatives which add retpolines.  This is
  * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
@@ -818,6 +894,7 @@ static int add_special_section_alts(struct objtool_file *file)
 
 		alt->insn = new_insn;
 		alt->skip_orig = special_alt->skip_orig;
+		orig_insn->ignore_alts |= special_alt->skip_alt;
 		list_add_tail(&alt->list, &orig_insn->alts);
 
 		list_del(&special_alt->list);
@@ -1239,6 +1316,7 @@ static int decode_sections(struct objtool_file *file)
 		return ret;
 
 	add_ignores(file);
+	add_uaccess_safe(file);
 
 	ret = add_ignore_alternatives(file);
 	if (ret)
@@ -1320,11 +1398,11 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
 		return 0;
 
 	/* push */
-	if (op->dest.type == OP_DEST_PUSH)
+	if (op->dest.type == OP_DEST_PUSH || op->dest.type == OP_DEST_PUSHF)
 		cfa->offset += 8;
 
 	/* pop */
-	if (op->src.type == OP_SRC_POP)
+	if (op->src.type == OP_SRC_POP || op->src.type == OP_SRC_POPF)
 		cfa->offset -= 8;
 
 	/* add immediate to sp */
@@ -1581,6 +1659,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 			break;
 
 		case OP_SRC_POP:
+		case OP_SRC_POPF:
 			if (!state->drap && op->dest.type == OP_DEST_REG &&
 			    op->dest.reg == cfa->base) {
 
@@ -1645,6 +1724,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		break;
 
 	case OP_DEST_PUSH:
+	case OP_DEST_PUSHF:
 		state->stack_size += 8;
 		if (cfa->base == CFI_SP)
 			cfa->offset += 8;
@@ -1735,7 +1815,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		break;
 
 	case OP_DEST_MEM:
-		if (op->src.type != OP_SRC_POP) {
+		if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) {
 			WARN_FUNC("unknown stack-related memory operation",
 				  insn->sec, insn->offset);
 			return -1;
@@ -1799,6 +1879,33 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state)
 	return false;
 }
 
+static inline bool func_uaccess_safe(struct symbol *func)
+{
+	if (func)
+		return func->alias->uaccess_safe;
+
+	return false;
+}
+
+static inline const char *insn_dest_name(struct instruction *insn)
+{
+	if (insn->call_dest)
+		return insn->call_dest->name;
+
+	return "{dynamic}";
+}
+
+static int validate_call(struct instruction *insn, struct insn_state *state)
+{
+	if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
+		WARN_FUNC("call to %s() with UACCESS enabled",
+				insn->sec, insn->offset, insn_dest_name(insn));
+		return 1;
+	}
+
+	return 0;
+}
+
 static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
 {
 	if (has_modified_stack_frame(state)) {
@@ -1807,7 +1914,7 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st
 		return 1;
 	}
 
-	return 0;
+	return validate_call(insn, state);
 }
 
 /*
@@ -1855,7 +1962,9 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			if (!insn->hint && !insn_state_match(insn, &state))
 				return 1;
 
-			return 0;
+			/* If we were here with AC=0, but now have AC=1, go again */
+			if (insn->state.uaccess || !state.uaccess)
+				return 0;
 		}
 
 		if (insn->hint) {
@@ -1925,6 +2034,16 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 		switch (insn->type) {
 
 		case INSN_RETURN:
+			if (state.uaccess && !func_uaccess_safe(func)) {
+				WARN_FUNC("return with UACCESS enabled", sec, insn->offset);
+				return 1;
+			}
+
+			if (!state.uaccess && func_uaccess_safe(func)) {
+				WARN_FUNC("return with UACCESS disabled from a UACCESS-safe function", sec, insn->offset);
+				return 1;
+			}
+
 			if (func && has_modified_stack_frame(&state)) {
 				WARN_FUNC("return with modified stack frame",
 					  sec, insn->offset);
@@ -1940,17 +2059,22 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			return 0;
 
 		case INSN_CALL:
-			if (is_fentry_call(insn))
-				break;
+		case INSN_CALL_DYNAMIC:
+			ret = validate_call(insn, &state);
+			if (ret)
+				return ret;
 
-			ret = dead_end_function(file, insn->call_dest);
-			if (ret == 1)
-				return 0;
-			if (ret == -1)
-				return 1;
+			if (insn->type == INSN_CALL) {
+				if (is_fentry_call(insn))
+					break;
+
+				ret = dead_end_function(file, insn->call_dest);
+				if (ret == 1)
+					return 0;
+				if (ret == -1)
+					return 1;
+			}
 
-			/* fallthrough */
-		case INSN_CALL_DYNAMIC:
 			if (!no_fp && func && !has_valid_stack_frame(&state)) {
 				WARN_FUNC("call without frame pointer save/setup",
 					  sec, insn->offset);
@@ -2003,6 +2127,49 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			if (update_insn_state(insn, &state))
 				return 1;
 
+			if (insn->stack_op.dest.type == OP_DEST_PUSHF) {
+				if (!state.uaccess_stack) {
+					state.uaccess_stack = 1;
+				} else if (state.uaccess_stack >> 31) {
+					WARN_FUNC("PUSHF stack exhausted", sec, insn->offset);
+					return 1;
+				}
+				state.uaccess_stack <<= 1;
+				state.uaccess_stack  |= state.uaccess;
+			}
+
+			if (insn->stack_op.src.type == OP_SRC_POPF) {
+				if (state.uaccess_stack) {
+					state.uaccess = state.uaccess_stack & 1;
+					state.uaccess_stack >>= 1;
+					if (state.uaccess_stack == 1)
+						state.uaccess_stack = 0;
+				}
+			}
+
+			break;
+
+		case INSN_STAC:
+			if (state.uaccess) {
+				WARN_FUNC("recursive UACCESS enable", sec, insn->offset);
+				return 1;
+			}
+
+			state.uaccess = true;
+			break;
+
+		case INSN_CLAC:
+			if (!state.uaccess && insn->func) {
+				WARN_FUNC("redundant UACCESS disable", sec, insn->offset);
+				return 1;
+			}
+
+			if (func_uaccess_safe(func) && !state.uaccess_stack) {
+				WARN_FUNC("UACCESS-safe disables UACCESS", sec, insn->offset);
+				return 1;
+			}
+
+			state.uaccess = false;
 			break;
 
 		default:
@@ -2168,6 +2335,8 @@ static int validate_functions(struct objtool_file *file)
 			if (!insn || insn->ignore)
 				continue;
 
+			state.uaccess = func->alias->uaccess_safe;
+
 			ret = validate_branch(file, insn, state);
 			if (ret && backtrace)
 				BT_FUNC("<=== (func)", insn);
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index d8896eb435212..78a95d06c165e 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -31,7 +31,8 @@ struct insn_state {
 	int stack_size;
 	unsigned char type;
 	bool bp_scratch;
-	bool drap, end;
+	bool drap, end, uaccess;
+	unsigned int uaccess_stack;
 	int drap_reg, drap_offset;
 	struct cfi_reg vals[CFI_NUM_REGS];
 };
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 968265b4b4cd8..2cc2ed49322d1 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -62,6 +62,7 @@ struct symbol {
 	unsigned long offset;
 	unsigned int len;
 	struct symbol *pfunc, *cfunc, *alias;
+	bool uaccess_safe;
 };
 
 struct rela {
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 50af4e1274b39..4e50563d87c64 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -23,6 +23,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "builtin.h"
 #include "special.h"
 #include "warn.h"
 
@@ -42,6 +43,7 @@
 #define ALT_NEW_LEN_OFFSET	11
 
 #define X86_FEATURE_POPCNT (4*32+23)
+#define X86_FEATURE_SMAP   (9*32+20)
 
 struct special_entry {
 	const char *sec;
@@ -110,6 +112,22 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 		 */
 		if (feature == X86_FEATURE_POPCNT)
 			alt->skip_orig = true;
+
+		/*
+		 * If UACCESS validation is enabled; force that alternative;
+		 * otherwise force it the other way.
+		 *
+		 * What we want to avoid is having both the original and the
+		 * alternative code flow at the same time, in that case we can
+		 * find paths that see the STAC but take the NOP instead of
+		 * CLAC and the other way around.
+		 */
+		if (feature == X86_FEATURE_SMAP) {
+			if (uaccess)
+				alt->skip_orig = true;
+			else
+				alt->skip_alt = true;
+		}
 	}
 
 	orig_rela = find_rela_by_dest(sec, offset + entry->orig);
diff --git a/tools/objtool/special.h b/tools/objtool/special.h
index fad1d092f679e..d5c062e718eff 100644
--- a/tools/objtool/special.h
+++ b/tools/objtool/special.h
@@ -26,6 +26,7 @@ struct special_alt {
 
 	bool group;
 	bool skip_orig;
+	bool skip_alt;
 	bool jump_or_nop;
 
 	struct section *orig_sec;
-- 
GitLab


From 2f0f9e9ad7b3459c5c54ef2c03145a98e65dd158 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 25 Feb 2019 11:10:55 +0100
Subject: [PATCH 0556/1861] objtool: Add Direction Flag validation

Having DF escape is BAD(tm).

Linus; you suggested this one, but since DF really is only used from
ASM and the failure case is fairly obvious, do we really need this?

OTOH the patch is fairly small and simple, so let's just do this
to demonstrate objtool's superior awesomeness.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/arch.h            |  4 +++-
 tools/objtool/arch/x86/decode.c |  8 ++++++++
 tools/objtool/check.c           | 25 +++++++++++++++++++++++++
 tools/objtool/check.h           |  2 +-
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index 467c2fe798a9e..7a111a77b7aa4 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -35,7 +35,9 @@
 #define INSN_NOP		10
 #define INSN_STAC		11
 #define INSN_CLAC		12
-#define INSN_OTHER		13
+#define INSN_STD		13
+#define INSN_CLD		14
+#define INSN_OTHER		15
 #define INSN_LAST		INSN_OTHER
 
 enum op_dest_type {
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index ab20a96fee50d..472e991f6512d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -451,6 +451,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		*type = INSN_CALL;
 		break;
 
+	case 0xfc:
+		*type = INSN_CLD;
+		break;
+
+	case 0xfd:
+		*type = INSN_STD;
+		break;
+
 	case 0xff:
 		if (modrm_reg == 2 || modrm_reg == 3)
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 965e954e07f43..38b0517dc49ef 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1903,6 +1903,12 @@ static int validate_call(struct instruction *insn, struct insn_state *state)
 		return 1;
 	}
 
+	if (state->df) {
+		WARN_FUNC("call to %s() with DF set",
+				insn->sec, insn->offset, insn_dest_name(insn));
+		return 1;
+	}
+
 	return 0;
 }
 
@@ -2044,6 +2050,11 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 				return 1;
 			}
 
+			if (state.df) {
+				WARN_FUNC("return with DF set", sec, insn->offset);
+				return 1;
+			}
+
 			if (func && has_modified_stack_frame(&state)) {
 				WARN_FUNC("return with modified stack frame",
 					  sec, insn->offset);
@@ -2172,6 +2183,20 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			state.uaccess = false;
 			break;
 
+		case INSN_STD:
+			if (state.df)
+				WARN_FUNC("recursive STD", sec, insn->offset);
+
+			state.df = true;
+			break;
+
+		case INSN_CLD:
+			if (!state.df && insn->func)
+				WARN_FUNC("redundant CLD", sec, insn->offset);
+
+			state.df = false;
+			break;
+
 		default:
 			break;
 		}
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 78a95d06c165e..71e54f97dbcdc 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -31,7 +31,7 @@ struct insn_state {
 	int stack_size;
 	unsigned char type;
 	bool bp_scratch;
-	bool drap, end, uaccess;
+	bool drap, end, uaccess, df;
 	unsigned int uaccess_stack;
 	int drap_reg, drap_offset;
 	struct cfi_reg vals[CFI_NUM_REGS];
-- 
GitLab


From 64604d54d3115fee89598bfb6d8d2252f8a2d114 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 19 Mar 2019 11:35:46 +0100
Subject: [PATCH 0557/1861] sched/x86_64: Don't save flags on context switch

Now that we have objtool validating AC=1 state for all x86_64 code,
we can once again guarantee clean flags on schedule.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/entry_64.S        | 2 --
 arch/x86/include/asm/switch_to.h | 2 +-
 arch/x86/kernel/process_64.c     | 7 -------
 3 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4fe27b67d7e24..1f0efdb7b6294 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -291,7 +291,6 @@ ENTRY(__switch_to_asm)
 	pushq	%r13
 	pushq	%r14
 	pushq	%r15
-	pushfq
 
 	/* switch stack */
 	movq	%rsp, TASK_threadsp(%rdi)
@@ -314,7 +313,6 @@ ENTRY(__switch_to_asm)
 #endif
 
 	/* restore callee-saved registers */
-	popfq
 	popq	%r15
 	popq	%r14
 	popq	%r13
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 157149d4129c0..18a4b6890fa82 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -40,13 +40,13 @@ asmlinkage void ret_from_fork(void);
  * order of the fields must match the code in __switch_to_asm().
  */
 struct inactive_task_frame {
-	unsigned long flags;
 #ifdef CONFIG_X86_64
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 #else
+	unsigned long flags;
 	unsigned long si;
 	unsigned long di;
 #endif
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 026a43be9bd1f..844a28b29967d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -393,13 +393,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	fork_frame = container_of(childregs, struct fork_frame, regs);
 	frame = &fork_frame->frame;
 
-	/*
-	 * For a new task use the RESET flags value since there is no before.
-	 * All the status flags are zero; DF and all the system flags must also
-	 * be 0, specifically IF must be 0 because we context switch to the new
-	 * task with interrupts disabled.
-	 */
-	frame->flags = X86_EFLAGS_FIXED;
 	frame->bp = 0;
 	frame->ret_addr = (unsigned long) ret_from_fork;
 	p->thread.sp = (unsigned long) fork_frame;
-- 
GitLab


From 1279e41d535e28cc3b56fa4a09e71a709641cae6 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Wed, 3 Apr 2019 14:54:24 +0800
Subject: [PATCH 0558/1861] perf/headers: Fix stale comment for struct
 perf_addr_filter

The @inode field has been removed after:

  9511bce9fe8e ("perf/core: Fix bad use of igrab()")

Update the description.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: https://lkml.kernel.org/r/1554274464-5739-1-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e47ef764f613e..085a95e2582ad 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -464,7 +464,7 @@ enum perf_addr_filter_action_t {
 /**
  * struct perf_addr_filter - address range filter definition
  * @entry:	event's filter list linkage
- * @inode:	object file's inode for file-based filters
+ * @path:	object file's path for file-based filters
  * @offset:	filter range offset
  * @size:	filter range size (size==0 means single address trigger)
  * @action:	filter/start/stop
-- 
GitLab


From d7262457e35dbe239659e62654e56f8ddb814bed Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 Mar 2019 13:38:49 +0100
Subject: [PATCH 0559/1861] perf/x86/intel: Initialize TFA MSR

Stephane reported that the TFA MSR is not initialized by the kernel,
but the TFA bit could set by firmware or as a leftover from a kexec,
which makes the state inconsistent.

Reported-by: Stephane Eranian <eranian@google.com>
Tested-by: Nelson DSouza <nelson.dsouza@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: tonyj@suse.com
Link: https://lkml.kernel.org/r/20190321123849.GN6521@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 1539647ea39d1..f61dcbef20ffe 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3575,6 +3575,12 @@ static void intel_pmu_cpu_starting(int cpu)
 
 	cpuc->lbr_sel = NULL;
 
+	if (x86_pmu.flags & PMU_FL_TFA) {
+		WARN_ON_ONCE(cpuc->tfa_shadow);
+		cpuc->tfa_shadow = ~0ULL;
+		intel_set_tfa(cpuc, false);
+	}
+
 	if (x86_pmu.version > 1)
 		flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
 
-- 
GitLab


From 914123fa39042e651d79eaf86bbf63a1b938dddf Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 2 Apr 2019 15:21:14 +0000
Subject: [PATCH 0560/1861] x86/perf/amd: Resolve race condition when disabling
 PMC

On AMD processors, the detection of an overflowed counter in the NMI
handler relies on the current value of the counter. So, for example, to
check for overflow on a 48 bit counter, bit 47 is checked to see if it
is 1 (not overflowed) or 0 (overflowed).

There is currently a race condition present when disabling and then
updating the PMC. Increased NMI latency in newer AMD processors makes this
race condition more pronounced. If the counter value has overflowed, it is
possible to update the PMC value before the NMI handler can run. The
updated PMC value is not an overflowed value, so when the perf NMI handler
does run, it will not find an overflowed counter. This may appear as an
unknown NMI resulting in either a panic or a series of messages, depending
on how the kernel is configured.

To eliminate this race condition, the PMC value must be checked after
disabling the counter. Add an AMD function, amd_pmu_disable_all(), that
will wait for the NMI handler to reset any active and overflowed counter
after calling x86_pmu_disable_all().

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org> # 4.14.x-
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: https://lkml.kernel.org/r/Message-ID:
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/amd/core.c | 65 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 7d2d7c801dba6..c09ee88b0eed5 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -3,6 +3,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/delay.h>
 #include <asm/apicdef.h>
 
 #include "../perf_event.h"
@@ -429,6 +430,64 @@ static void amd_pmu_cpu_dead(int cpu)
 	}
 }
 
+/*
+ * When a PMC counter overflows, an NMI is used to process the event and
+ * reset the counter. NMI latency can result in the counter being updated
+ * before the NMI can run, which can result in what appear to be spurious
+ * NMIs. This function is intended to wait for the NMI to run and reset
+ * the counter to avoid possible unhandled NMI messages.
+ */
+#define OVERFLOW_WAIT_COUNT	50
+
+static void amd_pmu_wait_on_overflow(int idx)
+{
+	unsigned int i;
+	u64 counter;
+
+	/*
+	 * Wait for the counter to be reset if it has overflowed. This loop
+	 * should exit very, very quickly, but just in case, don't wait
+	 * forever...
+	 */
+	for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
+		rdmsrl(x86_pmu_event_addr(idx), counter);
+		if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+			break;
+
+		/* Might be in IRQ context, so can't sleep */
+		udelay(1);
+	}
+}
+
+static void amd_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int idx;
+
+	x86_pmu_disable_all();
+
+	/*
+	 * This shouldn't be called from NMI context, but add a safeguard here
+	 * to return, since if we're in NMI context we can't wait for an NMI
+	 * to reset an overflowed counter value.
+	 */
+	if (in_nmi())
+		return;
+
+	/*
+	 * Check each counter for overflow and wait for it to be reset by the
+	 * NMI if it has overflowed. This relies on the fact that all active
+	 * counters are always enabled when this function is caled and
+	 * ARCH_PERFMON_EVENTSEL_INT is always set.
+	 */
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		amd_pmu_wait_on_overflow(idx);
+	}
+}
+
 static struct event_constraint *
 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
@@ -622,7 +681,7 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
 static __initconst const struct x86_pmu amd_pmu = {
 	.name			= "AMD",
 	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
+	.disable_all		= amd_pmu_disable_all,
 	.enable_all		= x86_pmu_enable_all,
 	.enable			= x86_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
@@ -732,7 +791,7 @@ void amd_pmu_enable_virt(void)
 	cpuc->perf_ctr_virt_mask = 0;
 
 	/* Reload all events */
-	x86_pmu_disable_all();
+	amd_pmu_disable_all();
 	x86_pmu_enable_all(0);
 }
 EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
@@ -750,7 +809,7 @@ void amd_pmu_disable_virt(void)
 	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
 
 	/* Reload all events */
-	x86_pmu_disable_all();
+	amd_pmu_disable_all();
 	x86_pmu_enable_all(0);
 }
 EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
-- 
GitLab


From 6d3edaae16c6c7d238360f2841212c2b26774d5e Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 2 Apr 2019 15:21:16 +0000
Subject: [PATCH 0561/1861] x86/perf/amd: Resolve NMI latency issues for active
 PMCs

On AMD processors, the detection of an overflowed PMC counter in the NMI
handler relies on the current value of the PMC. So, for example, to check
for overflow on a 48-bit counter, bit 47 is checked to see if it is 1 (not
overflowed) or 0 (overflowed).

When the perf NMI handler executes it does not know in advance which PMC
counters have overflowed. As such, the NMI handler will process all active
PMC counters that have overflowed. NMI latency in newer AMD processors can
result in multiple overflowed PMC counters being processed in one NMI and
then a subsequent NMI, that does not appear to be a back-to-back NMI, not
finding any PMC counters that have overflowed. This may appear to be an
unhandled NMI resulting in either a panic or a series of messages,
depending on how the kernel was configured.

To mitigate this issue, add an AMD handle_irq callback function,
amd_pmu_handle_irq(), that will invoke the common x86_pmu_handle_irq()
function and upon return perform some additional processing that will
indicate if the NMI has been handled or would have been handled had an
earlier NMI not handled the overflowed PMC. Using a per-CPU variable, a
minimum value of the number of active PMCs or 2 will be set whenever a
PMC is active. This is used to indicate the possible number of NMIs that
can still occur. The value of 2 is used for when an NMI does not arrive
at the LAPIC in time to be collapsed into an already pending NMI. Each
time the function is called without having handled an overflowed counter,
the per-CPU value is checked. If the value is non-zero, it is decremented
and the NMI indicates that it handled the NMI. If the value is zero, then
the NMI indicates that it did not handle the NMI.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org> # 4.14.x-
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: https://lkml.kernel.org/r/Message-ID:
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/amd/core.c | 56 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index c09ee88b0eed5..34c191453ce33 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -4,10 +4,13 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/nmi.h>
 #include <asm/apicdef.h>
 
 #include "../perf_event.h"
 
+static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
+
 static __initconst const u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -488,6 +491,57 @@ static void amd_pmu_disable_all(void)
 	}
 }
 
+/*
+ * Because of NMI latency, if multiple PMC counters are active or other sources
+ * of NMIs are received, the perf NMI handler can handle one or more overflowed
+ * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
+ * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
+ * back-to-back NMI support won't be active. This PMC handler needs to take into
+ * account that this can occur, otherwise this could result in unknown NMI
+ * messages being issued. Examples of this is PMC overflow while in the NMI
+ * handler when multiple PMCs are active or PMC overflow while handling some
+ * other source of an NMI.
+ *
+ * Attempt to mitigate this by using the number of active PMCs to determine
+ * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
+ * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
+ * number of active PMCs or 2. The value of 2 is used in case an NMI does not
+ * arrive at the LAPIC in time to be collapsed into an already pending NMI.
+ */
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int active, handled;
+
+	/*
+	 * Obtain the active count before calling x86_pmu_handle_irq() since
+	 * it is possible that x86_pmu_handle_irq() may make a counter
+	 * inactive (through x86_pmu_stop).
+	 */
+	active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
+
+	/* Process any counter overflows */
+	handled = x86_pmu_handle_irq(regs);
+
+	/*
+	 * If a counter was handled, record the number of possible remaining
+	 * NMIs that can occur.
+	 */
+	if (handled) {
+		this_cpu_write(perf_nmi_counter,
+			       min_t(unsigned int, 2, active));
+
+		return handled;
+	}
+
+	if (!this_cpu_read(perf_nmi_counter))
+		return NMI_DONE;
+
+	this_cpu_dec(perf_nmi_counter);
+
+	return NMI_HANDLED;
+}
+
 static struct event_constraint *
 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
@@ -680,7 +734,7 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
 
 static __initconst const struct x86_pmu amd_pmu = {
 	.name			= "AMD",
-	.handle_irq		= x86_pmu_handle_irq,
+	.handle_irq		= amd_pmu_handle_irq,
 	.disable_all		= amd_pmu_disable_all,
 	.enable_all		= x86_pmu_enable_all,
 	.enable			= x86_pmu_enable_event,
-- 
GitLab


From a1247d06d01045d7ab2882a9c074fbf21137c690 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 19 Mar 2019 13:18:56 +0100
Subject: [PATCH 0562/1861] locking/static_key: Fix false positive warnings on
 concurrent dec/inc

Even though the atomic_dec_and_mutex_lock() in
__static_key_slow_dec_cpuslocked() can never see a negative value in
key->enabled the subsequent sanity check is re-reading key->enabled, which may
have been set to -1 in the meantime by static_key_slow_inc_cpuslocked().

                CPU  A                               CPU B

 __static_key_slow_dec_cpuslocked():          static_key_slow_inc_cpuslocked():
                               # enabled = 1
   atomic_dec_and_mutex_lock()
                               # enabled = 0
                                              atomic_read() == 0
                                              atomic_set(-1)
                               # enabled = -1
   val = atomic_read()
   # Oops - val == -1!

The test case is TCP's clean_acked_data_enable() / clean_acked_data_disable()
as tickled by KTLS (net/ktls).

Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reported-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: ard.biesheuvel@linaro.org
Cc: oss-drivers@netronome.com
Cc: pbonzini@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/jump_label.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index bad96b476eb6e..a799b1ac6b2fe 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -206,6 +206,8 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key,
 					   unsigned long rate_limit,
 					   struct delayed_work *work)
 {
+	int val;
+
 	lockdep_assert_cpus_held();
 
 	/*
@@ -215,17 +217,20 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key,
 	 * returns is unbalanced, because all other static_key_slow_inc()
 	 * instances block while the update is in progress.
 	 */
-	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
-		WARN(atomic_read(&key->enabled) < 0,
-		     "jump label: negative count!\n");
+	val = atomic_fetch_add_unless(&key->enabled, -1, 1);
+	if (val != 1) {
+		WARN(val < 0, "jump label: negative count!\n");
 		return;
 	}
 
-	if (rate_limit) {
-		atomic_inc(&key->enabled);
-		schedule_delayed_work(work, rate_limit);
-	} else {
-		jump_label_update(key);
+	jump_label_lock();
+	if (atomic_dec_and_test(&key->enabled)) {
+		if (rate_limit) {
+			atomic_inc(&key->enabled);
+			schedule_delayed_work(work, rate_limit);
+		} else {
+			jump_label_update(key);
+		}
 	}
 	jump_label_unlock();
 }
-- 
GitLab


From a0fe2c6479aab5723239b315ef1b552673f434a3 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 29 Mar 2019 22:46:49 +0100
Subject: [PATCH 0563/1861] linux/kernel.h: Use parentheses around argument in
 u64_to_user_ptr()

Use parentheses around uses of the argument in u64_to_user_ptr() to
ensure that the cast doesn't apply to part of the argument.

There are existing uses of the macro of the form

  u64_to_user_ptr(A + B)

which expands to

  (void __user *)(uintptr_t)A + B

(the cast applies to the first operand of the addition, the addition
is a pointer addition). This happens to still work as intended, the
semantic difference doesn't cause a difference in behavior.

But I want to use u64_to_user_ptr() with a ternary operator in the
argument, like so:

  u64_to_user_ptr(A ? B : C)

This currently doesn't work as intended.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Andrei Vagin <avagin@openvz.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: NeilBrown <neilb@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190329214652.258477-1-jannh@google.com
---
 include/linux/kernel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 34a5036debd34..2d14e21c16c0b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -47,8 +47,8 @@
 
 #define u64_to_user_ptr(x) (		\
 {					\
-	typecheck(u64, x);		\
-	(void __user *)(uintptr_t)x;	\
+	typecheck(u64, (x));		\
+	(void __user *)(uintptr_t)(x);	\
 }					\
 )
 
-- 
GitLab


From 8983eb602af511fc5822f5ff4a82074c68816fd9 Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Wed, 3 Apr 2019 15:31:49 +0800
Subject: [PATCH 0564/1861] ALSA: hda/realtek - Move to ACT_INIT state

It will be lose Mic JD state when Chrome OS boot and headset was plugged.
Just Implement of reset combo jack JD verb for ACT_PRE_PROBE state.
Intel test result was also failed.
It test passed until changed the initial state to ACT_INIT.
Mic JD will show every time.
This patch also changed the model name as 'alc-chrome-book' for
application of Chrome OS.

Fixes: 10f5b1b85ed1 ("ALSA: hda/realtek - Fixed Headset Mic JD not stable")
Signed-off-by: Kailang Yang <kailang@realtek.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 41 +++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 715ab2342151e..c235b26add071 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5491,7 +5491,7 @@ static void alc_headset_btn_callback(struct hda_codec *codec,
 	jack->jack->button_state = report;
 }
 
-static void alc295_fixup_chromebook(struct hda_codec *codec,
+static void alc_fixup_headset_jack(struct hda_codec *codec,
 				    const struct hda_fixup *fix, int action)
 {
 
@@ -5501,16 +5501,6 @@ static void alc295_fixup_chromebook(struct hda_codec *codec,
 						    alc_headset_btn_callback);
 		snd_hda_jack_add_kctl(codec, 0x55, "Headset Jack", false,
 				      SND_JACK_HEADSET, alc_headset_btn_keymap);
-		switch (codec->core.vendor_id) {
-		case 0x10ec0295:
-			alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */
-			alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15);
-			break;
-		case 0x10ec0236:
-			alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */
-			alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15);
-			break;
-		}
 		break;
 	case HDA_FIXUP_ACT_INIT:
 		switch (codec->core.vendor_id) {
@@ -5531,6 +5521,25 @@ static void alc295_fixup_chromebook(struct hda_codec *codec,
 	}
 }
 
+static void alc295_fixup_chromebook(struct hda_codec *codec,
+				    const struct hda_fixup *fix, int action)
+{
+	switch (action) {
+	case HDA_FIXUP_ACT_INIT:
+		switch (codec->core.vendor_id) {
+		case 0x10ec0295:
+			alc_update_coef_idx(codec, 0x4a, 0x8000, 1 << 15); /* Reset HP JD */
+			alc_update_coef_idx(codec, 0x4a, 0x8000, 0 << 15);
+			break;
+		case 0x10ec0236:
+			alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */
+			alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15);
+			break;
+		}
+		break;
+	}
+}
+
 static void alc_fixup_disable_mic_vref(struct hda_codec *codec,
 				  const struct hda_fixup *fix, int action)
 {
@@ -5685,6 +5694,7 @@ enum {
 	ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE,
 	ALC255_FIXUP_ACER_HEADSET_MIC,
 	ALC295_FIXUP_CHROME_BOOK,
+	ALC225_FIXUP_HEADSET_JACK,
 	ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE,
 	ALC225_FIXUP_WYSE_AUTO_MUTE,
 	ALC225_FIXUP_WYSE_DISABLE_MIC_VREF,
@@ -6646,6 +6656,12 @@ static const struct hda_fixup alc269_fixups[] = {
 	[ALC295_FIXUP_CHROME_BOOK] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc295_fixup_chromebook,
+		.chained = true,
+		.chain_id = ALC225_FIXUP_HEADSET_JACK
+	},
+	[ALC225_FIXUP_HEADSET_JACK] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_headset_jack,
 	},
 	[ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = {
 		.type = HDA_FIXUP_PINS,
@@ -7144,7 +7160,8 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC255_FIXUP_DUMMY_LINEOUT_VERB, .name = "alc255-dummy-lineout"},
 	{.id = ALC255_FIXUP_DELL_HEADSET_MIC, .name = "alc255-dell-headset"},
 	{.id = ALC295_FIXUP_HP_X360, .name = "alc295-hp-x360"},
-	{.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-sense-combo"},
+	{.id = ALC225_FIXUP_HEADSET_JACK, .name = "alc-headset-jack"},
+	{.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-chrome-book"},
 	{.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"},
 	{}
 };
-- 
GitLab


From 80690a276f444a68a332136d98bfea1c338bc263 Mon Sep 17 00:00:00 2001
From: Richard Sailer <rs@tuxedocomputers.com>
Date: Tue, 2 Apr 2019 15:52:04 +0200
Subject: [PATCH 0565/1861] ALSA: hda/realtek - Add quirk for Tuxedo XC 1509

This adds a SND_PCI_QUIRK(...) line for the Tuxedo XC 1509.

The Tuxedo XC 1509 and the System76 oryp5 are the same barebone
notebooks manufactured by Clevo. To name the fixups both use after the
actual underlying hardware, this patch also changes System76_orpy5
to clevo_pb51ed in 2 enum symbols and one function name,
matching the other pci_quirk entries which are also named after the
device ODM.

Fixes: 7f665b1c3283 ("ALSA: hda/realtek - Headset microphone and internal speaker support for System76 oryp5")
Signed-off-by: Richard Sailer <rs@tuxedocomputers.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index c235b26add071..8104797660903 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1864,8 +1864,8 @@ enum {
 	ALC887_FIXUP_BASS_CHMAP,
 	ALC1220_FIXUP_GB_DUAL_CODECS,
 	ALC1220_FIXUP_CLEVO_P950,
-	ALC1220_FIXUP_SYSTEM76_ORYP5,
-	ALC1220_FIXUP_SYSTEM76_ORYP5_PINS,
+	ALC1220_FIXUP_CLEVO_PB51ED,
+	ALC1220_FIXUP_CLEVO_PB51ED_PINS,
 };
 
 static void alc889_fixup_coef(struct hda_codec *codec,
@@ -2070,7 +2070,7 @@ static void alc1220_fixup_clevo_p950(struct hda_codec *codec,
 static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec,
 				const struct hda_fixup *fix, int action);
 
-static void alc1220_fixup_system76_oryp5(struct hda_codec *codec,
+static void alc1220_fixup_clevo_pb51ed(struct hda_codec *codec,
 				     const struct hda_fixup *fix,
 				     int action)
 {
@@ -2322,18 +2322,18 @@ static const struct hda_fixup alc882_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc1220_fixup_clevo_p950,
 	},
-	[ALC1220_FIXUP_SYSTEM76_ORYP5] = {
+	[ALC1220_FIXUP_CLEVO_PB51ED] = {
 		.type = HDA_FIXUP_FUNC,
-		.v.func = alc1220_fixup_system76_oryp5,
+		.v.func = alc1220_fixup_clevo_pb51ed,
 	},
-	[ALC1220_FIXUP_SYSTEM76_ORYP5_PINS] = {
+	[ALC1220_FIXUP_CLEVO_PB51ED_PINS] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
 			{ 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
 			{}
 		},
 		.chained = true,
-		.chain_id = ALC1220_FIXUP_SYSTEM76_ORYP5,
+		.chain_id = ALC1220_FIXUP_CLEVO_PB51ED,
 	},
 };
 
@@ -2411,8 +2411,9 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
 	SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950),
 	SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
-	SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_SYSTEM76_ORYP5_PINS),
-	SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_SYSTEM76_ORYP5_PINS),
+	SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+	SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+	SND_PCI_QUIRK(0x1558, 0x65d1, "Tuxedo Book XC1509", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
-- 
GitLab


From b10abd0a8859493a93c6b8020f2be2587557749d Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Wed, 20 Mar 2019 20:34:23 -0400
Subject: [PATCH 0566/1861] sched/cpufreq: Annotate cpufreq_update_util_data
 pointer with __rcu

Recently I added an RCU annotation check to rcu_assign_pointer(). All
pointers assigned to RCU protected data are to be annotated with __rcu
inorder to be able to use rcu_assign_pointer() similar to checks in
other RCU APIs.

This resulted in a sparse error:

  kernel//sched/cpufreq.c:41:9: sparse: error: incompatible types in comparison expression (different address spaces)

Fix this by annotating cpufreq_update_util_data pointer with __rcu. This
will also help sparse catch any future RCU misuage bugs.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
[ From an RCU perspective. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: keescook@chromium.org
Cc: kernel-hardening@lists.openwall.com
Cc: kernel-team@android.com
Link: https://lkml.kernel.org/r/20190321003426.160260-2-joel@joelfernandes.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cpufreq.c | 2 +-
 kernel/sched/sched.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 835671f0f9170..b5dcd1d83c7fa 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -7,7 +7,7 @@
  */
 #include "sched.h"
 
-DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
+DEFINE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
 
 /**
  * cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index efa686eeff269..713715dd00cfd 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2185,7 +2185,7 @@ static inline u64 irq_time_read(int cpu)
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 #ifdef CONFIG_CPU_FREQ
-DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
 
 /**
  * cpufreq_update_util - Take a note about CPU utilization changes.
-- 
GitLab


From 994aeb7a93e43d28f6074195ccb03a384342e1bf Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Wed, 20 Mar 2019 20:34:24 -0400
Subject: [PATCH 0567/1861] sched_domain: Annotate RCU pointers properly

The scheduler uses RCU API in various places to access sched_domain
pointers. These cause sparse errors as below.

Many new errors show up because of an annotation check I added to
rcu_assign_pointer(). Let us annotate the pointers correctly which also
will help sparse catch any potential future bugs.

This fixes the following sparse errors:

  rt.c:1681:9: error: incompatible types in comparison expression
  deadline.c:1904:9: error: incompatible types in comparison expression
  core.c:519:9: error: incompatible types in comparison expression
  core.c:1634:17: error: incompatible types in comparison expression
  fair.c:6193:14: error: incompatible types in comparison expression
  fair.c:9883:22: error: incompatible types in comparison expression
  fair.c:9897:9: error: incompatible types in comparison expression
  sched.h:1287:9: error: incompatible types in comparison expression
  topology.c:612:9: error: incompatible types in comparison expression
  topology.c:615:9: error: incompatible types in comparison expression
  sched.h:1300:9: error: incompatible types in comparison expression
  topology.c:618:9: error: incompatible types in comparison expression
  sched.h:1287:9: error: incompatible types in comparison expression
  topology.c:621:9: error: incompatible types in comparison expression
  sched.h:1300:9: error: incompatible types in comparison expression
  topology.c:624:9: error: incompatible types in comparison expression
  topology.c:671:9: error: incompatible types in comparison expression
  stats.c:45:17: error: incompatible types in comparison expression
  fair.c:5998:15: error: incompatible types in comparison expression
  fair.c:5989:15: error: incompatible types in comparison expression
  fair.c:5998:15: error: incompatible types in comparison expression
  fair.c:5989:15: error: incompatible types in comparison expression
  fair.c:6120:19: error: incompatible types in comparison expression
  fair.c:6506:14: error: incompatible types in comparison expression
  fair.c:6515:14: error: incompatible types in comparison expression
  fair.c:6623:9: error: incompatible types in comparison expression
  fair.c:5970:17: error: incompatible types in comparison expression
  fair.c:8642:21: error: incompatible types in comparison expression
  fair.c:9253:9: error: incompatible types in comparison expression
  fair.c:9331:9: error: incompatible types in comparison expression
  fair.c:9519:15: error: incompatible types in comparison expression
  fair.c:9533:14: error: incompatible types in comparison expression
  fair.c:9542:14: error: incompatible types in comparison expression
  fair.c:9567:14: error: incompatible types in comparison expression
  fair.c:9597:14: error: incompatible types in comparison expression
  fair.c:9421:16: error: incompatible types in comparison expression
  fair.c:9421:16: error: incompatible types in comparison expression

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ From an RCU perspective. ]
Reviewed-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: keescook@chromium.org
Cc: kernel-hardening@lists.openwall.com
Cc: kernel-team@android.com
Link: https://lkml.kernel.org/r/20190321003426.160260-3-joel@joelfernandes.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/topology.h |  4 ++--
 kernel/sched/sched.h           | 14 +++++++-------
 kernel/sched/topology.c        | 10 +++++-----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 57c7ed3fe4659..cfc0a89a71598 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -76,8 +76,8 @@ struct sched_domain_shared {
 
 struct sched_domain {
 	/* These fields must be setup */
-	struct sched_domain *parent;	/* top domain must be null terminated */
-	struct sched_domain *child;	/* bottom domain must be null terminated */
+	struct sched_domain __rcu *parent;	/* top domain must be null terminated */
+	struct sched_domain __rcu *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 713715dd00cfd..2b452d68ab2ed 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -869,8 +869,8 @@ struct rq {
 	atomic_t		nr_iowait;
 
 #ifdef CONFIG_SMP
-	struct root_domain	*rd;
-	struct sched_domain	*sd;
+	struct root_domain		*rd;
+	struct sched_domain __rcu	*sd;
 
 	unsigned long		cpu_capacity;
 	unsigned long		cpu_capacity_orig;
@@ -1324,13 +1324,13 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
 	return sd;
 }
 
-DECLARE_PER_CPU(struct sched_domain *, sd_llc);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
 DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
-DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
-DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-DECLARE_PER_CPU(struct sched_domain *, sd_asym_packing);
-DECLARE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
+DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
 extern struct static_key_false sched_asym_cpucapacity;
 
 struct sched_group_capacity {
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index ab7f371a3a179..64bec54ded3e0 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -615,13 +615,13 @@ static void destroy_sched_domains(struct sched_domain *sd)
  * the cpumask of the domain), this allows us to quickly tell if
  * two CPUs are in the same cache domain, see cpus_share_cache().
  */
-DEFINE_PER_CPU(struct sched_domain *, sd_llc);
+DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
-DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
-DEFINE_PER_CPU(struct sched_domain *, sd_numa);
-DEFINE_PER_CPU(struct sched_domain *, sd_asym_packing);
-DEFINE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
+DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
+DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
+DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
+DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
 DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
 
 static void update_top_cache_domain(int cpu)
-- 
GitLab


From 03f4b48edae7d936c5bf23488c1ce3fbe7fc2733 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Wed, 20 Mar 2019 20:34:25 -0400
Subject: [PATCH 0568/1861] rcuwait: Annotate task_struct with __rcu

This suppresses sparse error generated due to the recently added
rcu_assign_pointer sparse check.

  percpu-rwsem.c:162:9: sparse: error: incompatible types in comparison expression
  exit.c:316:16: sparse: error: incompatible types in comparison expression

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ From an RCU perspective. ]
Reviewed-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: keescook@chromium.org
Cc: kernel-hardening@lists.openwall.com
Cc: kernel-team@android.com
Link: https://lkml.kernel.org/r/20190321003426.160260-4-joel@joelfernandes.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rcuwait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 90bfa3279a01c..563290fc194f2 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -18,7 +18,7 @@
  * awoken.
  */
 struct rcuwait {
-	struct task_struct *task;
+	struct task_struct __rcu *task;
 };
 
 #define __RCUWAIT_INITIALIZER(name)		\
-- 
GitLab


From 7ba7319f9e3898101bff5d63cbae5a6cc174c8c9 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Wed, 20 Mar 2019 20:34:26 -0400
Subject: [PATCH 0569/1861] sched/core: Annotate perf_domain pointer with __rcu

This fixes the following sparse errors in sched/fair.c:

  fair.c:6506:14: error: incompatible types in comparison expression
  fair.c:8642:21: error: incompatible types in comparison expression

Using __rcu will also help sparse catch any future bugs.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ From an RCU perspective. ]
Reviewed-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: keescook@chromium.org
Cc: kernel-hardening@lists.openwall.com
Cc: kernel-team@android.com
Link: https://lkml.kernel.org/r/20190321003426.160260-5-joel@joelfernandes.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2b452d68ab2ed..b52ed1ada0be8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -780,7 +780,7 @@ struct root_domain {
 	 * NULL-terminated list of performance domains intersecting with the
 	 * CPUs of the rd. Protected by RCU.
 	 */
-	struct perf_domain	*pd;
+	struct perf_domain __rcu *pd;
 };
 
 extern struct root_domain def_root_domain;
-- 
GitLab


From 71b47eaf6fb29b7f9722dc1646c26eb8a96e0a6d Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 20 Mar 2019 21:38:39 +0800
Subject: [PATCH 0570/1861] sched/fair: Make sync_entity_load_avg() and
 remove_entity_load_avg() static

Fix these sparse warnigs:

  kernel/sched/fair.c:3570:6: warning: symbol 'sync_entity_load_avg' was not declared. Should it be static?
  kernel/sched/fair.c:3583:6: warning: symbol 'remove_entity_load_avg' was not declared. Should it be static?

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190320133839.21392-1-yuehaibing@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 40bd1e27b1b79..ed7f5f8107b71 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3567,7 +3567,7 @@ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
  * Synchronize entity load avg of dequeued entity without locking
  * the previous rq.
  */
-void sync_entity_load_avg(struct sched_entity *se)
+static void sync_entity_load_avg(struct sched_entity *se)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	u64 last_update_time;
@@ -3580,7 +3580,7 @@ void sync_entity_load_avg(struct sched_entity *se)
  * Task first catches up with cfs_rq, and then subtract
  * itself from the cfs_rq (task must be off the queue now).
  */
-void remove_entity_load_avg(struct sched_entity *se)
+static void remove_entity_load_avg(struct sched_entity *se)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	unsigned long flags;
-- 
GitLab


From ce856634af8cda3490947df8ac1ef5843e6356af Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 2 Apr 2019 09:57:13 -0700
Subject: [PATCH 0571/1861] HID: input: add mapping for Assistant key

According to HUTRR89 usage 0x1cb from the consumer page was assigned to
allow launching desktop-aware assistant application, so let's add the
mapping.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index b10b1922c5bdf..1fce0076e7dc4 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -998,6 +998,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x1b8: map_key_clear(KEY_VIDEO);		break;
 		case 0x1bc: map_key_clear(KEY_MESSENGER);	break;
 		case 0x1bd: map_key_clear(KEY_INFO);		break;
+		case 0x1cb: map_key_clear(KEY_ASSISTANT);	break;
 		case 0x201: map_key_clear(KEY_NEW);		break;
 		case 0x202: map_key_clear(KEY_OPEN);		break;
 		case 0x203: map_key_clear(KEY_CLOSE);		break;
-- 
GitLab


From b1ce45e86b81e8ce354c47a7440d263e4cd5dc56 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Thu, 14 Mar 2019 12:16:19 -0400
Subject: [PATCH 0572/1861] arm64/mm: fix kernel-doc comments

Building a kernel with W=1 generates several warnings due to abuse of
kernel-doc comments:

  | arch/arm64/mm/numa.c:281: warning: Cannot understand  *
  |   on line 281 - I thought it was a doc line

Tidy up the comments to remove the warnings.

Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.")
Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/numa.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 06a6f264f2ddf..5202f63c29c9d 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -124,7 +124,7 @@ static void __init setup_node_to_cpumask_map(void)
 }
 
 /*
- *  Set the cpu to node and mem mapping
+ * Set the cpu to node and mem mapping
  */
 void numa_store_cpu_info(unsigned int cpu)
 {
@@ -200,7 +200,7 @@ void __init setup_per_cpu_areas(void)
 #endif
 
 /**
- * numa_add_memblk - Set node id to memblk
+ * numa_add_memblk() - Set node id to memblk
  * @nid: NUMA node ID of the new memblk
  * @start: Start address of the new memblk
  * @end:  End address of the new memblk
@@ -223,7 +223,7 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 	return ret;
 }
 
-/**
+/*
  * Initialize NODE_DATA for a node on the local memory
  */
 static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
@@ -257,7 +257,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 }
 
-/**
+/*
  * numa_free_distance
  *
  * The current table is freed.
@@ -277,10 +277,8 @@ void __init numa_free_distance(void)
 	numa_distance = NULL;
 }
 
-/**
- *
+/*
  * Create a new NUMA distance table.
- *
  */
 static int __init numa_alloc_distance(void)
 {
@@ -311,7 +309,7 @@ static int __init numa_alloc_distance(void)
 }
 
 /**
- * numa_set_distance - Set inter node NUMA distance from node to node.
+ * numa_set_distance() - Set inter node NUMA distance from node to node.
  * @from: the 'from' node to set distance
  * @to: the 'to'  node to set distance
  * @distance: NUMA distance
@@ -321,7 +319,6 @@ static int __init numa_alloc_distance(void)
  *
  * If @from or @to is higher than the highest known node or lower than zero
  * or @distance doesn't make sense, the call is ignored.
- *
  */
 void __init numa_set_distance(int from, int to, int distance)
 {
@@ -347,7 +344,7 @@ void __init numa_set_distance(int from, int to, int distance)
 	numa_distance[from * numa_distance_cnt + to] = distance;
 }
 
-/**
+/*
  * Return NUMA distance @from to @to
  */
 int __node_distance(int from, int to)
@@ -422,13 +419,15 @@ out_free_distance:
 }
 
 /**
- * dummy_numa_init - Fallback dummy NUMA init
+ * dummy_numa_init() - Fallback dummy NUMA init
  *
  * Used if there's no underlying NUMA architecture, NUMA initialization
  * fails, or NUMA is disabled on the command line.
  *
  * Must online at least one node (node 0) and add memory blocks that cover all
  * allowed memory. It is unlikely that this function fails.
+ *
+ * Return: 0 on success, -errno on failure.
  */
 static int __init dummy_numa_init(void)
 {
@@ -454,9 +453,9 @@ static int __init dummy_numa_init(void)
 }
 
 /**
- * arm64_numa_init - Initialize NUMA
+ * arm64_numa_init() - Initialize NUMA
  *
- * Try each configured NUMA initialization method until one succeeds.  The
+ * Try each configured NUMA initialization method until one succeeds. The
  * last fallback is dummy single node config encomapssing whole memory.
  */
 void __init arm64_numa_init(void)
-- 
GitLab


From 19d6242ece1f35aa004a7da9adf52e0ec18a854e Mon Sep 17 00:00:00 2001
From: Miles Chen <miles.chen@mediatek.com>
Date: Thu, 21 Mar 2019 12:21:25 +0800
Subject: [PATCH 0573/1861] arm64: setup min_low_pfn

When debugging with CONFIG_PAGE_OWNER, I noticed that the min_low_pfn
on arm64 is always zero and the page owner scanning has to start from zero.
We have to loop a while before we see the first valid pfn.
(see: read_page_owner())

Setup min_low_pfn to save some loops.

Before setting min_low_pfn:

[   21.265602] min_low_pfn=0, *ppos=0
Page allocated via order 0, mask 0x100cca(GFP_HIGHUSER_MOVABLE)
PFN 262144 type Movable Block 512 type Movable Flags 0x8001e
referenced|uptodate|dirty|lru|swapbacked)
prep_new_page+0x13c/0x140
get_page_from_freelist+0x254/0x1068
__alloc_pages_nodemask+0xd4/0xcb8

After setting min_low_pfn:

[   11.025787] min_low_pfn=262144, *ppos=0
Page allocated via order 0, mask 0x100cca(GFP_HIGHUSER_MOVABLE)
PFN 262144 type Movable Block 512 type Movable Flags 0x8001e
referenced|uptodate|dirty|lru|swapbacked)
prep_new_page+0x13c/0x140
get_page_from_freelist+0x254/0x1068
__alloc_pages_nodemask+0xd4/0xcb8
shmem_alloc_page+0x7c/0xa0
shmem_alloc_and_acct_page+0x124/0x1e8
shmem_getpage_gfp.isra.7+0x118/0x878
shmem_write_begin+0x38/0x68

Signed-off-by: Miles Chen <miles.chen@mediatek.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index c29b17b520cd7..43fa75601b3d9 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -440,6 +440,7 @@ void __init bootmem_init(void)
 	early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
 
 	max_pfn = max_low_pfn = max;
+	min_low_pfn = min;
 
 	arm64_numa_init();
 	/*
-- 
GitLab


From 0f1bf7e39822476b2f921435cf990f67a61f5f92 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Mon, 1 Apr 2019 12:44:47 +0200
Subject: [PATCH 0574/1861] arm64/vdso: don't leak kernel addresses

Since commit ad67b74d2469d9b8 ("printk: hash addresses printed with %p"),
two obfuscated kernel pointer are printed at every boot:

    vdso: 2 pages (1 code @ (____ptrval____), 1 data @ (____ptrval____))

Remove the the print completely, as it's useless without the addresses.

Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p")
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/vdso.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 2d419006ad433..9fb0c0c95a85c 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -146,8 +146,6 @@ static int __init vdso_init(void)
 	}
 
 	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-	pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
-		vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
 
 	/* Allocate the vDSO pagelist, plus a page for the data. */
 	vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
-- 
GitLab


From 7048a5973eb1d6a747b516334c052d92d8ed7ab4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 3 Apr 2019 13:36:54 +0100
Subject: [PATCH 0575/1861] arm64: mm: Make show_pte() a static function

show_pte() doesn't have any external callers, so make it static.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/system_misc.h | 1 -
 arch/arm64/mm/fault.c                | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 32693f34f4317..fca95424e8735 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -41,7 +41,6 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
 			   int sig, int code, const char *name);
 
 struct mm_struct;
-extern void show_pte(unsigned long addr);
 extern void __show_regs(struct pt_regs *);
 
 extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1a7e92ab69ebb..4f343e603925d 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -148,7 +148,7 @@ static inline bool is_ttbr1_addr(unsigned long addr)
 /*
  * Dump out the page tables associated with 'addr' in the currently active mm.
  */
-void show_pte(unsigned long addr)
+static void show_pte(unsigned long addr)
 {
 	struct mm_struct *mm;
 	pgd_t *pgdp;
-- 
GitLab


From 92606ec9285fb84cd9b5943df23f07d741384bfc Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Tue, 5 Mar 2019 19:34:05 +0800
Subject: [PATCH 0576/1861] arm64: cpu_ops: fix a leaked reference by adding
 missing of_node_put

The call to of_get_next_child returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
  ./arch/arm64/kernel/cpu_ops.c:102:1-7: ERROR: missing of_node_put;
  acquired a node pointer with refcount incremented on line 69, but
  without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpu_ops.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index ea001241bdd47..00f8b8612b69f 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -85,6 +85,7 @@ static const char *__init cpu_read_enable_method(int cpu)
 				pr_err("%pOF: missing enable-method property\n",
 					dn);
 		}
+		of_node_put(dn);
 	} else {
 		enable_method = acpi_get_enable_method(cpu);
 		if (!enable_method) {
-- 
GitLab


From 46ad0840b1584b92b5ff2cc3ed0b011dd6b8e0f1 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Fri, 22 Mar 2019 10:30:06 -0400
Subject: [PATCH 0577/1861] locking/rwsem: Remove arch specific rwsem files

As the generic rwsem-xadd code is using the appropriate acquire and
release versions of the atomic operations, the arch specific rwsem.h
files will not be that much faster than the generic code as long as the
atomic functions are properly implemented. So we can remove those arch
specific rwsem.h and stop building asm/rwsem.h to reduce maintenance
effort.

Currently, only x86, alpha and ia64 have implemented architecture
specific fast paths. I don't have access to alpha and ia64 systems for
testing, but they are legacy systems that are not likely to be updated
to the latest kernel anyway.

By using a rwsem microbenchmark, the total locking rates on a 4-socket
56-core 112-thread x86-64 system before and after the patch were as
follows (mixed means equal # of read and write locks):

                      Before Patch              After Patch
   # of Threads  wlock   rlock   mixed     wlock   rlock   mixed
   ------------  -----   -----   -----     -----   -----   -----
        1        29,201  30,143  29,458    28,615  30,172  29,201
        2         6,807  13,299   1,171     7,725  15,025   1,804
        4         6,504  12,755   1,520     7,127  14,286   1,345
        8         6,762  13,412     764     6,826  13,652     726
       16         6,693  15,408     662     6,599  15,938     626
       32         6,145  15,286     496     5,549  15,487     511
       64         5,812  15,495      60     5,858  15,572      60

There were some run-to-run variations for the multi-thread tests. For
x86-64, using the generic C code fast path seems to be a little bit
faster than the assembly version with low lock contention.  Looking at
the assembly version of the fast paths, there are assembly to/from C
code wrappers that save and restore all the callee-clobbered registers
(7 registers on x86-64). The assembly generated from the generic C
code doesn't need to do that. That may explain the slight performance
gain here.

The generic asm rwsem.h can also be merged into kernel/locking/rwsem.h
with no code change as no other code other than those under
kernel/locking needs to access the internal rwsem macros and functions.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: linux-m68k@lists.linux-m68k.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-um@lists.infradead.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: nios2-dev@lists.rocketboards.org
Cc: openrisc@lists.librecores.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Link: https://lkml.kernel.org/r/20190322143008.21313-2-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 MAINTAINERS                     |   1 -
 arch/alpha/include/asm/rwsem.h  | 211 ----------------------------
 arch/arm/include/asm/Kbuild     |   1 -
 arch/arm64/include/asm/Kbuild   |   1 -
 arch/hexagon/include/asm/Kbuild |   1 -
 arch/ia64/include/asm/rwsem.h   | 172 -----------------------
 arch/powerpc/include/asm/Kbuild |   1 -
 arch/s390/include/asm/Kbuild    |   1 -
 arch/sh/include/asm/Kbuild      |   1 -
 arch/sparc/include/asm/Kbuild   |   1 -
 arch/x86/include/asm/rwsem.h    | 237 --------------------------------
 arch/x86/lib/Makefile           |   1 -
 arch/x86/lib/rwsem.S            | 156 ---------------------
 arch/x86/um/Makefile            |   4 +-
 arch/xtensa/include/asm/Kbuild  |   1 -
 include/asm-generic/rwsem.h     | 140 -------------------
 include/linux/rwsem.h           |   4 +-
 kernel/locking/percpu-rwsem.c   |   2 +
 kernel/locking/rwsem.h          | 130 ++++++++++++++++++
 19 files changed, 134 insertions(+), 932 deletions(-)
 delete mode 100644 arch/alpha/include/asm/rwsem.h
 delete mode 100644 arch/ia64/include/asm/rwsem.h
 delete mode 100644 arch/x86/include/asm/rwsem.h
 delete mode 100644 arch/x86/lib/rwsem.S
 delete mode 100644 include/asm-generic/rwsem.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 43b36dbed48e7..8876f5de77386 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9098,7 +9098,6 @@ F:	arch/*/include/asm/spinlock*.h
 F:	include/linux/rwlock*.h
 F:	include/linux/mutex*.h
 F:	include/linux/rwsem*.h
-F:	arch/*/include/asm/rwsem.h
 F:	include/linux/seqlock.h
 F:	lib/locking*.[ch]
 F:	kernel/locking/
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
deleted file mode 100644
index cf8fc8f9a2ed5..0000000000000
--- a/arch/alpha/include/asm/rwsem.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ALPHA_RWSEM_H
-#define _ALPHA_RWSEM_H
-
-/*
- * Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001.
- * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
- */
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-
-#define RWSEM_UNLOCKED_VALUE		0x0000000000000000L
-#define RWSEM_ACTIVE_BIAS		0x0000000000000001L
-#define RWSEM_ACTIVE_MASK		0x00000000ffffffffL
-#define RWSEM_WAITING_BIAS		(-0x0000000100000000L)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-static inline int ___down_read(struct rw_semaphore *sem)
-{
-	long oldcount;
-#ifndef	CONFIG_SMP
-	oldcount = sem->count.counter;
-	sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
-#else
-	long temp;
-	__asm__ __volatile__(
-	"1:	ldq_l	%0,%1\n"
-	"	addq	%0,%3,%2\n"
-	"	stq_c	%2,%1\n"
-	"	beq	%2,2f\n"
-	"	mb\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
-	:"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
-#endif
-	return (oldcount < 0);
-}
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (unlikely(___down_read(sem)))
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
-	if (unlikely(___down_read(sem)))
-		if (IS_ERR(rwsem_down_read_failed_killable(sem)))
-			return -EINTR;
-
-	return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	long old, new, res;
-
-	res = atomic_long_read(&sem->count);
-	do {
-		new = res + RWSEM_ACTIVE_READ_BIAS;
-		if (new <= 0)
-			break;
-		old = res;
-		res = atomic_long_cmpxchg(&sem->count, old, new);
-	} while (res != old);
-	return res >= 0 ? 1 : 0;
-}
-
-static inline long ___down_write(struct rw_semaphore *sem)
-{
-	long oldcount;
-#ifndef	CONFIG_SMP
-	oldcount = sem->count.counter;
-	sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
-#else
-	long temp;
-	__asm__ __volatile__(
-	"1:	ldq_l	%0,%1\n"
-	"	addq	%0,%3,%2\n"
-	"	stq_c	%2,%1\n"
-	"	beq	%2,2f\n"
-	"	mb\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
-	:"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
-#endif
-	return oldcount;
-}
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	if (unlikely(___down_write(sem)))
-		rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
-	if (unlikely(___down_write(sem))) {
-		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
-			return -EINTR;
-	}
-
-	return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
-			   RWSEM_ACTIVE_WRITE_BIAS);
-	if (ret == RWSEM_UNLOCKED_VALUE)
-		return 1;
-	return 0;
-}
-
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	long oldcount;
-#ifndef	CONFIG_SMP
-	oldcount = sem->count.counter;
-	sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
-#else
-	long temp;
-	__asm__ __volatile__(
-	"	mb\n"
-	"1:	ldq_l	%0,%1\n"
-	"	subq	%0,%3,%2\n"
-	"	stq_c	%2,%1\n"
-	"	beq	%2,2f\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
-	:"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
-#endif
-	if (unlikely(oldcount < 0))
-		if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0)
-			rwsem_wake(sem);
-}
-
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	long count;
-#ifndef	CONFIG_SMP
-	sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
-	count = sem->count.counter;
-#else
-	long temp;
-	__asm__ __volatile__(
-	"	mb\n"
-	"1:	ldq_l	%0,%1\n"
-	"	subq	%0,%3,%2\n"
-	"	stq_c	%2,%1\n"
-	"	beq	%2,2f\n"
-	"	subq	%0,%3,%0\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (count), "=m" (sem->count), "=&r" (temp)
-	:"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
-#endif
-	if (unlikely(count))
-		if ((int)count == 0)
-			rwsem_wake(sem);
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	long oldcount;
-#ifndef	CONFIG_SMP
-	oldcount = sem->count.counter;
-	sem->count.counter -= RWSEM_WAITING_BIAS;
-#else
-	long temp;
-	__asm__ __volatile__(
-	"1:	ldq_l	%0,%1\n"
-	"	addq	%0,%3,%2\n"
-	"	stq_c	%2,%1\n"
-	"	beq	%2,2f\n"
-	"	mb\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
-	:"Ir" (-RWSEM_WAITING_BIAS), "m" (sem->count) : "memory");
-#endif
-	if (unlikely(oldcount < 0))
-		rwsem_downgrade_wake(sem);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index a8a4eb7f6dae0..8fb51b7bf1d58 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += mm-arch-hooks.h
 generic-y += msi.h
 generic-y += parport.h
 generic-y += preempt.h
-generic-y += rwsem.h
 generic-y += seccomp.h
 generic-y += segment.h
 generic-y += serial.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 1e17ea5c372b2..60a933b070019 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += mm-arch-hooks.h
 generic-y += msi.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
-generic-y += rwsem.h
 generic-y += segment.h
 generic-y += serial.h
 generic-y += set_memory.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index d046e8ccdf786..3ff5f297acda7 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -27,7 +27,6 @@ generic-y += mm-arch-hooks.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
-generic-y += rwsem.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
deleted file mode 100644
index 917910607e0ea..0000000000000
--- a/arch/ia64/include/asm/rwsem.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * R/W semaphores for ia64
- *
- * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
- * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2005 Christoph Lameter <cl@linux.com>
- *
- * Based on asm-i386/rwsem.h and other architecture implementation.
- *
- * The MSW of the count is the negated number of active writers and
- * waiting lockers, and the LSW is the total number of active locks.
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
- * the case of an uncontended lock. Readers increment by 1 and see a positive
- * value when uncontended, negative if there are writers (and maybe) readers
- * waiting (in which case it goes to sleep).
- */
-
-#ifndef _ASM_IA64_RWSEM_H
-#define _ASM_IA64_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#include <asm/intrinsics.h>
-
-#define RWSEM_UNLOCKED_VALUE		__IA64_UL_CONST(0x0000000000000000)
-#define RWSEM_ACTIVE_BIAS		(1L)
-#define RWSEM_ACTIVE_MASK		(0xffffffffL)
-#define RWSEM_WAITING_BIAS		(-0x100000000L)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline int
-___down_read (struct rw_semaphore *sem)
-{
-	long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
-
-	return (result < 0);
-}
-
-static inline void
-__down_read (struct rw_semaphore *sem)
-{
-	if (___down_read(sem))
-		rwsem_down_read_failed(sem);
-}
-
-static inline int
-__down_read_killable (struct rw_semaphore *sem)
-{
-	if (___down_read(sem))
-		if (IS_ERR(rwsem_down_read_failed_killable(sem)))
-			return -EINTR;
-
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline long
-___down_write (struct rw_semaphore *sem)
-{
-	long old, new;
-
-	do {
-		old = atomic_long_read(&sem->count);
-		new = old + RWSEM_ACTIVE_WRITE_BIAS;
-	} while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
-
-	return old;
-}
-
-static inline void
-__down_write (struct rw_semaphore *sem)
-{
-	if (___down_write(sem))
-		rwsem_down_write_failed(sem);
-}
-
-static inline int
-__down_write_killable (struct rw_semaphore *sem)
-{
-	if (___down_write(sem)) {
-		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
-			return -EINTR;
-	}
-
-	return 0;
-}
-
-/*
- * unlock after reading
- */
-static inline void
-__up_read (struct rw_semaphore *sem)
-{
-	long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
-
-	if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void
-__up_write (struct rw_semaphore *sem)
-{
-	long old, new;
-
-	do {
-		old = atomic_long_read(&sem->count);
-		new = old - RWSEM_ACTIVE_WRITE_BIAS;
-	} while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
-
-	if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
-		rwsem_wake(sem);
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int
-__down_read_trylock (struct rw_semaphore *sem)
-{
-	long tmp;
-	while ((tmp = atomic_long_read(&sem->count)) >= 0) {
-		if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int
-__down_write_trylock (struct rw_semaphore *sem)
-{
-	long tmp = atomic_long_cmpxchg_acquire(&sem->count,
-			RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void
-__downgrade_write (struct rw_semaphore *sem)
-{
-	long old, new;
-
-	do {
-		old = atomic_long_read(&sem->count);
-		new = old - RWSEM_WAITING_BIAS;
-	} while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
-
-	if (old < 0)
-		rwsem_downgrade_wake(sem);
-}
-
-#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a0c132bedfae8..36bda391e549f 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -8,6 +8,5 @@ generic-y += irq_regs.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
-generic-y += rwsem.h
 generic-y += vtime.h
 generic-y += msi.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 12d77cb11fe5a..d5fadefea33ca 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += rwsem.h
 generic-y += trace_clock.h
 generic-y += unaligned.h
 generic-y += word-at-a-time.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 7bf2cb680d328..73fff39a0122f 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += mm-arch-hooks.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
-generic-y += rwsem.h
 generic-y += serial.h
 generic-y += sizes.h
 generic-y += trace_clock.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a22cfd5c0ee86..2ca3200d3616a 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -18,7 +18,6 @@ generic-y += mm-arch-hooks.h
 generic-y += module.h
 generic-y += msi.h
 generic-y += preempt.h
-generic-y += rwsem.h
 generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
deleted file mode 100644
index 4c25cf6caefa1..0000000000000
--- a/arch/x86/include/asm/rwsem.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
- *
- * Written by David Howells (dhowells@redhat.com).
- *
- * Derived from asm-x86/semaphore.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consecutive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _ASM_X86_RWSEM_H
-#define _ASM_X86_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-#include <asm/asm.h>
-
-/*
- * The bias values and the counter type limits the number of
- * potential readers/writers to 32767 for 32 bits and 2147483647
- * for 64 bits.
- */
-
-#ifdef CONFIG_X86_64
-# define RWSEM_ACTIVE_MASK		0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK		0x0000ffffL
-#endif
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000L
-#define RWSEM_ACTIVE_BIAS		0x00000001L
-#define RWSEM_WAITING_BIAS		(-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-#define ____down_read(sem, slow_path)					\
-({									\
-	struct rw_semaphore* ret;					\
-	asm volatile("# beginning down_read\n\t"			\
-		     LOCK_PREFIX _ASM_INC "(%[sem])\n\t"		\
-		     /* adds 0x00000001 */				\
-		     "  jns        1f\n"				\
-		     "  call " slow_path "\n"				\
-		     "1:\n\t"						\
-		     "# ending down_read\n\t"				\
-		     : "+m" (sem->count), "=a" (ret),			\
-			ASM_CALL_CONSTRAINT				\
-		     : [sem] "a" (sem)					\
-		     : "memory", "cc");					\
-	ret;								\
-})
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	____down_read(sem, "call_rwsem_down_read_failed");
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
-	if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable")))
-		return -EINTR;
-	return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_read_trylock(struct rw_semaphore *sem)
-{
-	long result, tmp;
-	asm volatile("# beginning __down_read_trylock\n\t"
-		     "  mov          %[count],%[result]\n\t"
-		     "1:\n\t"
-		     "  mov          %[result],%[tmp]\n\t"
-		     "  add          %[inc],%[tmp]\n\t"
-		     "  jle	     2f\n\t"
-		     LOCK_PREFIX "  cmpxchg  %[tmp],%[count]\n\t"
-		     "  jnz	     1b\n\t"
-		     "2:\n\t"
-		     "# ending __down_read_trylock\n\t"
-		     : [count] "+m" (sem->count), [result] "=&a" (result),
-		       [tmp] "=&r" (tmp)
-		     : [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
-		     : "memory", "cc");
-	return result >= 0;
-}
-
-/*
- * lock for writing
- */
-#define ____down_write(sem, slow_path)			\
-({							\
-	long tmp;					\
-	struct rw_semaphore* ret;			\
-							\
-	asm volatile("# beginning down_write\n\t"	\
-		     LOCK_PREFIX "  xadd      %[tmp],(%[sem])\n\t"	\
-		     /* adds 0xffff0001, returns the old value */ \
-		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
-		     /* was the active mask 0 before? */\
-		     "  jz        1f\n"			\
-		     "  call " slow_path "\n"		\
-		     "1:\n"				\
-		     "# ending down_write"		\
-		     : "+m" (sem->count), [tmp] "=d" (tmp),	\
-		       "=a" (ret), ASM_CALL_CONSTRAINT	\
-		     : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \
-		     : "memory", "cc");			\
-	ret;						\
-})
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	____down_write(sem, "call_rwsem_down_write_failed");
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
-	if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
-		return -EINTR;
-
-	return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_write_trylock(struct rw_semaphore *sem)
-{
-	bool result;
-	long tmp0, tmp1;
-	asm volatile("# beginning __down_write_trylock\n\t"
-		     "  mov          %[count],%[tmp0]\n\t"
-		     "1:\n\t"
-		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
-		     /* was the active mask 0 before? */
-		     "  jnz          2f\n\t"
-		     "  mov          %[tmp0],%[tmp1]\n\t"
-		     "  add          %[inc],%[tmp1]\n\t"
-		     LOCK_PREFIX "  cmpxchg  %[tmp1],%[count]\n\t"
-		     "  jnz	     1b\n\t"
-		     "2:\n\t"
-		     CC_SET(e)
-		     "# ending __down_write_trylock\n\t"
-		     : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0),
-		       [tmp1] "=&r" (tmp1), CC_OUT(e) (result)
-		     : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS)
-		     : "memory");
-	return result;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	long tmp;
-	asm volatile("# beginning __up_read\n\t"
-		     LOCK_PREFIX "  xadd      %[tmp],(%[sem])\n\t"
-		     /* subtracts 1, returns the old value */
-		     "  jns        1f\n\t"
-		     "  call call_rwsem_wake\n" /* expects old value in %edx */
-		     "1:\n"
-		     "# ending __up_read\n"
-		     : "+m" (sem->count), [tmp] "=d" (tmp)
-		     : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS)
-		     : "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	long tmp;
-	asm volatile("# beginning __up_write\n\t"
-		     LOCK_PREFIX "  xadd      %[tmp],(%[sem])\n\t"
-		     /* subtracts 0xffff0001, returns the old value */
-		     "  jns        1f\n\t"
-		     "  call call_rwsem_wake\n" /* expects old value in %edx */
-		     "1:\n\t"
-		     "# ending __up_write\n"
-		     : "+m" (sem->count), [tmp] "=d" (tmp)
-		     : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS)
-		     : "memory", "cc");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	asm volatile("# beginning __downgrade_write\n\t"
-		     LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t"
-		     /*
-		      * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
-		      *     0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
-		      */
-		     "  jns       1f\n\t"
-		     "  call call_rwsem_downgrade_wake\n"
-		     "1:\n\t"
-		     "# ending __downgrade_write\n"
-		     : "+m" (sem->count)
-		     : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS)
-		     : "memory", "cc");
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 140e61843a079..986652064b151 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
 lib-y := delay.o misc.o cmdline.o cpu.o
 lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
-lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
deleted file mode 100644
index dc2ab6ea67686..0000000000000
--- a/arch/x86/lib/rwsem.S
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * x86 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-#include <asm/frame.h>
-
-#define __ASM_HALF_REG(reg)	__ASM_SEL(reg, e##reg)
-#define __ASM_HALF_SIZE(inst)	__ASM_SEL(inst##w, inst##l)
-
-#ifdef CONFIG_X86_32
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax which is either a return
- * value or just gets clobbered. Same is true for %edx so make sure GCC
- * reloads it after the slow path, by making it hold a temporary, for
- * example see ____down_write().
- */
-
-#define save_common_regs \
-	pushl %ecx
-
-#define restore_common_regs \
-	popl %ecx
-
-	/* Avoid uglifying the argument copying x86-64 needs to do. */
-	.macro movq src, dst
-	.endm
-
-#else
-
-/*
- * x86-64 rwsem wrappers
- *
- * This interfaces the inline asm code to the slow-path
- * C routines. We need to save the call-clobbered regs
- * that the asm does not mark as clobbered, and move the
- * argument from %rax to %rdi.
- *
- * NOTE! We don't need to save %rax, because the functions
- * will always return the semaphore pointer in %rax (which
- * is also the input argument to these helpers)
- *
- * The following can clobber %rdx because the asm clobbers it:
- *   call_rwsem_down_write_failed
- *   call_rwsem_wake
- * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
- */
-
-#define save_common_regs \
-	pushq %rdi; \
-	pushq %rsi; \
-	pushq %rcx; \
-	pushq %r8;  \
-	pushq %r9;  \
-	pushq %r10; \
-	pushq %r11
-
-#define restore_common_regs \
-	popq %r11; \
-	popq %r10; \
-	popq %r9; \
-	popq %r8; \
-	popq %rcx; \
-	popq %rsi; \
-	popq %rdi
-
-#endif
-
-/* Fix up special calling conventions */
-ENTRY(call_rwsem_down_read_failed)
-	FRAME_BEGIN
-	save_common_regs
-	__ASM_SIZE(push,) %__ASM_REG(dx)
-	movq %rax,%rdi
-	call rwsem_down_read_failed
-	__ASM_SIZE(pop,) %__ASM_REG(dx)
-	restore_common_regs
-	FRAME_END
-	ret
-ENDPROC(call_rwsem_down_read_failed)
-
-ENTRY(call_rwsem_down_read_failed_killable)
-	FRAME_BEGIN
-	save_common_regs
-	__ASM_SIZE(push,) %__ASM_REG(dx)
-	movq %rax,%rdi
-	call rwsem_down_read_failed_killable
-	__ASM_SIZE(pop,) %__ASM_REG(dx)
-	restore_common_regs
-	FRAME_END
-	ret
-ENDPROC(call_rwsem_down_read_failed_killable)
-
-ENTRY(call_rwsem_down_write_failed)
-	FRAME_BEGIN
-	save_common_regs
-	movq %rax,%rdi
-	call rwsem_down_write_failed
-	restore_common_regs
-	FRAME_END
-	ret
-ENDPROC(call_rwsem_down_write_failed)
-
-ENTRY(call_rwsem_down_write_failed_killable)
-	FRAME_BEGIN
-	save_common_regs
-	movq %rax,%rdi
-	call rwsem_down_write_failed_killable
-	restore_common_regs
-	FRAME_END
-	ret
-ENDPROC(call_rwsem_down_write_failed_killable)
-
-ENTRY(call_rwsem_wake)
-	FRAME_BEGIN
-	/* do nothing if still outstanding active readers */
-	__ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx)
-	jnz 1f
-	save_common_regs
-	movq %rax,%rdi
-	call rwsem_wake
-	restore_common_regs
-1:	FRAME_END
-	ret
-ENDPROC(call_rwsem_wake)
-
-ENTRY(call_rwsem_downgrade_wake)
-	FRAME_BEGIN
-	save_common_regs
-	__ASM_SIZE(push,) %__ASM_REG(dx)
-	movq %rax,%rdi
-	call rwsem_downgrade_wake
-	__ASM_SIZE(pop,) %__ASM_REG(dx)
-	restore_common_regs
-	FRAME_END
-	ret
-ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 2d686ae54681d..33c51c064c77e 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -21,14 +21,12 @@ obj-y += checksum_32.o syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
-subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
 
 else
 
 obj-y += syscalls_64.o vdso/
 
-subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \
-		../lib/rwsem.o
+subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o
 
 endif
 
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 3843198e03d4b..4148090cafb00 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -25,7 +25,6 @@ generic-y += percpu.h
 generic-y += preempt.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
-generic-y += rwsem.h
 generic-y += sections.h
 generic-y += socket.h
 generic-y += topology.h
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
deleted file mode 100644
index 93e67a055a4d8..0000000000000
--- a/include/asm-generic/rwsem.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_GENERIC_RWSEM_H
-#define _ASM_GENERIC_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#ifdef __KERNEL__
-
-/*
- * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
- * Adapted largely from include/asm-i386/rwsem.h
- * by Paul Mackerras <paulus@samba.org>.
- */
-
-/*
- * the semaphore definition
- */
-#ifdef CONFIG_64BIT
-# define RWSEM_ACTIVE_MASK		0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK		0x0000ffffL
-#endif
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000L
-#define RWSEM_ACTIVE_BIAS		0x00000001L
-#define RWSEM_WAITING_BIAS		(-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
-	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
-		if (IS_ERR(rwsem_down_read_failed_killable(sem)))
-			return -EINTR;
-	}
-
-	return 0;
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	while ((tmp = atomic_long_read(&sem->count)) >= 0) {
-		if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
-					     &sem->count);
-	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
-		rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
-					     &sem->count);
-	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
-		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
-			return -EINTR;
-	return 0;
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
-		      RWSEM_ACTIVE_WRITE_BIAS);
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = atomic_long_dec_return_release(&sem->count);
-	if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
-						    &sem->count) < 0))
-		rwsem_wake(sem);
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	/*
-	 * When downgrading from exclusive to shared ownership,
-	 * anything inside the write-locked region cannot leak
-	 * into the read side. In contrast, anything in the
-	 * read-locked region is ok to be re-ordered into the
-	 * write side. As such, rely on RELEASE semantics.
-	 */
-	tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
-	if (tmp < 0)
-		rwsem_downgrade_wake(sem);
-}
-
-#endif	/* __KERNEL__ */
-#endif	/* _ASM_GENERIC_RWSEM_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 67dbb57508b1f..6e56006b2cb6b 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -57,15 +57,13 @@ extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore
 extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
 extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
 
-/* Include the arch specific part */
-#include <asm/rwsem.h>
-
 /* In all implementations count != 0 means locked */
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
 	return atomic_long_read(&sem->count) != 0;
 }
 
+#define RWSEM_UNLOCKED_VALUE		0L
 #define __RWSEM_INIT_COUNT(name)	.count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
 #endif
 
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 883cf1b92d908..f17dad99eec8b 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -7,6 +7,8 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 
+#include "rwsem.h"
+
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
 			const char *name, struct lock_class_key *rwsem_key)
 {
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index bad2bca0268b1..067e265fa5c1d 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -32,6 +32,26 @@
 # define DEBUG_RWSEMS_WARN_ON(c)
 #endif
 
+/*
+ * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
+ * Adapted largely from include/asm-i386/rwsem.h
+ * by Paul Mackerras <paulus@samba.org>.
+ */
+
+/*
+ * the semaphore definition
+ */
+#ifdef CONFIG_64BIT
+# define RWSEM_ACTIVE_MASK		0xffffffffL
+#else
+# define RWSEM_ACTIVE_MASK		0x0000ffffL
+#endif
+
+#define RWSEM_ACTIVE_BIAS		0x00000001L
+#define RWSEM_WAITING_BIAS		(-RWSEM_ACTIVE_MASK-1)
+#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 /*
  * All writes to owner are protected by WRITE_ONCE() to make sure that
@@ -132,3 +152,113 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
 {
 }
 #endif
+
+#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
+		rwsem_down_read_failed(sem);
+}
+
+static inline int __down_read_killable(struct rw_semaphore *sem)
+{
+	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
+		if (IS_ERR(rwsem_down_read_failed_killable(sem)))
+			return -EINTR;
+	}
+
+	return 0;
+}
+
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+		if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
+				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
+					     &sem->count);
+	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+		rwsem_down_write_failed(sem);
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
+					     &sem->count);
+	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+	return 0;
+}
+
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
+		      RWSEM_ACTIVE_WRITE_BIAS);
+	return tmp == RWSEM_UNLOCKED_VALUE;
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	tmp = atomic_long_dec_return_release(&sem->count);
+	if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
+		rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+	if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
+						    &sem->count) < 0))
+		rwsem_wake(sem);
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	/*
+	 * When downgrading from exclusive to shared ownership,
+	 * anything inside the write-locked region cannot leak
+	 * into the read side. In contrast, anything in the
+	 * read-locked region is ok to be re-ordered into the
+	 * write side. As such, rely on RELEASE semantics.
+	 */
+	tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
+	if (tmp < 0)
+		rwsem_downgrade_wake(sem);
+}
+
+#endif /* CONFIG_RWSEM_XCHGADD_ALGORITHM */
-- 
GitLab


From 390a0c62c23cb026cd4664a66f6f45fed3a215f6 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Fri, 22 Mar 2019 10:30:07 -0400
Subject: [PATCH 0578/1861] locking/rwsem: Remove rwsem-spinlock.c & use
 rwsem-xadd.c for all archs

Currently, we have two different implementation of rwsem:

 1) CONFIG_RWSEM_GENERIC_SPINLOCK (rwsem-spinlock.c)
 2) CONFIG_RWSEM_XCHGADD_ALGORITHM (rwsem-xadd.c)

As we are going to use a single generic implementation for rwsem-xadd.c
and no architecture-specific code will be needed, there is no point
in keeping two different implementations of rwsem. In most cases, the
performance of rwsem-spinlock.c will be worse. It also doesn't get all
the performance tuning and optimizations that had been implemented in
rwsem-xadd.c over the years.

For simplication, we are going to remove rwsem-spinlock.c and make all
architectures use a single implementation of rwsem - rwsem-xadd.c.

All references to RWSEM_GENERIC_SPINLOCK and RWSEM_XCHGADD_ALGORITHM
in the code are removed.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: linux-m68k@lists.linux-m68k.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-um@lists.infradead.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: nios2-dev@lists.rocketboards.org
Cc: openrisc@lists.librecores.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Link: https://lkml.kernel.org/r/20190322143008.21313-3-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/Kconfig              |   7 -
 arch/arc/Kconfig                |   3 -
 arch/arm/Kconfig                |   4 -
 arch/arm64/Kconfig              |   3 -
 arch/c6x/Kconfig                |   3 -
 arch/csky/Kconfig               |   3 -
 arch/h8300/Kconfig              |   3 -
 arch/hexagon/Kconfig            |   6 -
 arch/ia64/Kconfig               |   4 -
 arch/m68k/Kconfig               |   7 -
 arch/microblaze/Kconfig         |   6 -
 arch/mips/Kconfig               |   7 -
 arch/nds32/Kconfig              |   3 -
 arch/nios2/Kconfig              |   3 -
 arch/openrisc/Kconfig           |   6 -
 arch/parisc/Kconfig             |   6 -
 arch/powerpc/Kconfig            |   7 -
 arch/riscv/Kconfig              |   3 -
 arch/s390/Kconfig               |   6 -
 arch/sh/Kconfig                 |   6 -
 arch/sparc/Kconfig              |   8 -
 arch/unicore32/Kconfig          |   6 -
 arch/x86/Kconfig                |   3 -
 arch/x86/um/Kconfig             |   6 -
 arch/xtensa/Kconfig             |   3 -
 include/linux/rwsem-spinlock.h  |  47 -----
 include/linux/rwsem.h           |   5 -
 kernel/Kconfig.locks            |   2 +-
 kernel/locking/Makefile         |   4 +-
 kernel/locking/rwsem-spinlock.c | 339 --------------------------------
 kernel/locking/rwsem.h          |   3 -
 31 files changed, 2 insertions(+), 520 deletions(-)
 delete mode 100644 include/linux/rwsem-spinlock.h
 delete mode 100644 kernel/locking/rwsem-spinlock.c

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 584a6e1148539..27c871227eee9 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -49,13 +49,6 @@ config MMU
 	bool
 	default y
 
-config RWSEM_GENERIC_SPINLOCK
-	bool
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default y
-
 config ARCH_HAS_ILOG2_U32
 	bool
 	default n
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c781e45d1d995..23e063df5d2cf 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER
 config GENERIC_CSUM
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool n
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 850b4805e2d17..c8b03e1c6c2a4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -178,10 +178,6 @@ config TRACE_IRQFLAGS_SUPPORT
 	bool
 	default !CPU_V7M
 
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default y
-
 config ARCH_HAS_ILOG2_U32
 	bool
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de1..c62b9db2b5e88 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -237,9 +237,6 @@ config LOCKDEP_SUPPORT
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
-config RWSEM_XCHGADD_ALGORITHM
-	def_bool y
-
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index e5cd3c5f8399c..ed92b5840c0a8 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -27,9 +27,6 @@ config MMU
 config FPU
 	def_bool n
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 725a115759c97..6555d17811322 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -92,9 +92,6 @@ config GENERIC_HWEIGHT
 config MMU
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
 config STACKTRACE_SUPPORT
 	def_bool y
 
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index c071da34e0817..61c01db6c2923 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -27,9 +27,6 @@ config H8300
 config CPU_BIG_ENDIAN
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index ac441680dcc06..3e54a53208d58 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -65,12 +65,6 @@ config GENERIC_CSUM
 config GENERIC_IRQ_PROBE
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool n
-
-config RWSEM_XCHGADD_ALGORITHM
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8d7396bd17903..73a26f04644e3 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -83,10 +83,6 @@ config STACKTRACE_SUPPORT
 config GENERIC_LOCKBREAK
 	def_bool n
 
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default y
-
 config HUGETLB_PAGE_SIZE_VARIABLE
 	bool
 	depends on HUGETLB_PAGE
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b54206408f91b..f5661f48019c6 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -32,13 +32,6 @@ config M68K
 config CPU_BIG_ENDIAN
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	bool
-	default y
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-
 config ARCH_HAS_ILOG2_U32
 	bool
 
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index a51b965b3b823..a7a9a9d59f65e 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -58,15 +58,9 @@ config CPU_LITTLE_ENDIAN
 
 endchoice
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
 config ZONE_DMA
 	def_bool y
 
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-
 config ARCH_HAS_ILOG2_U32
 	def_bool n
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4a5f5b0ee9a9e..b9c48b27162dc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1037,13 +1037,6 @@ source "arch/mips/paravirt/Kconfig"
 
 endmenu
 
-config RWSEM_GENERIC_SPINLOCK
-	bool
-	default y
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index addb7f5f52645..55559ca0efe40 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -60,9 +60,6 @@ config GENERIC_LOCKBREAK
         def_bool y
 	depends on PREEMPT
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 4ef15a61b7bc3..56685fd45ed00 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -40,9 +40,6 @@ config NO_IOPORT_MAP
 config FPU
 	def_bool n
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool n
 
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index a5e361fbb75a0..683511b8c9df8 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -43,12 +43,6 @@ config CPU_BIG_ENDIAN
 config MMU
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
-	def_bool n
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index c8e621296092d..f1ed8ddfe4869 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -75,12 +75,6 @@ config GENERIC_LOCKBREAK
 	default y
 	depends on SMP && PREEMPT
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-
 config ARCH_HAS_ILOG2_U32
 	bool
 	default n
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d0be82c30619..e5dd6aafaf68e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -103,13 +103,6 @@ config LOCKDEP_SUPPORT
 	bool
 	default y
 
-config RWSEM_GENERIC_SPINLOCK
-	bool
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default y
-
 config GENERIC_LOCKBREAK
 	bool
 	default y
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index eb56c82d8aa14..0582260fb6c20 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -69,9 +69,6 @@ config STACKTRACE_SUPPORT
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b6e3d0653002a..c9300b4371956 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -14,12 +14,6 @@ config LOCKDEP_SUPPORT
 config STACKTRACE_SUPPORT
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	bool
-
-config RWSEM_XCHGADD_ALGORITHM
-	def_bool y
-
 config ARCH_HAS_ILOG2_U32
 	def_bool n
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b1c91ea9a958e..0be08d586d40c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -90,12 +90,6 @@ config ARCH_DEFCONFIG
 	default "arch/sh/configs/shx3_defconfig" if SUPERH32
 	default "arch/sh/configs/cayman_defconfig" if SUPERH64
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-
 config GENERIC_BUG
 	def_bool y
 	depends on BUG && SUPERH32
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 40f8f4f73fe8f..16b6202378161 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -191,14 +191,6 @@ config NR_CPUS
 
 source "kernel/Kconfig.hz"
 
-config RWSEM_GENERIC_SPINLOCK
-	bool
-	default y if SPARC32
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default y if SPARC64
-
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 817d82608712a..0d5869c9bf039 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -38,12 +38,6 @@ config STACKTRACE_SUPPORT
 config LOCKDEP_SUPPORT
 	def_bool y
 
-config RWSEM_GENERIC_SPINLOCK
-	def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-
 config ARCH_HAS_ILOG2_U32
 	bool
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19c..84b184e016cd4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -268,9 +268,6 @@ config ARCH_MAY_HAVE_PC_FDC
 	def_bool y
 	depends on ISA_DMA_API
 
-config RWSEM_XCHGADD_ALGORITHM
-	def_bool y
-
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index a9e80e44178c7..a8985e1f7432f 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -32,12 +32,6 @@ config ARCH_DEFCONFIG
 	default "arch/um/configs/i386_defconfig" if X86_32
 	default "arch/um/configs/x86_64_defconfig" if X86_64
 
-config RWSEM_XCHGADD_ALGORITHM
-	def_bool 64BIT
-
-config RWSEM_GENERIC_SPINLOCK
-	def_bool !RWSEM_XCHGADD_ALGORITHM
-
 config 3_LEVEL_PGTABLES
 	bool "Three-level pagetables" if !64BIT
 	default 64BIT
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 4b9aafe766c58..35c8d91e61069 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -46,9 +46,6 @@ config XTENSA
 	  with reasonable minimum requirements.  The Xtensa Linux project has
 	  a home page at <http://www.linux-xtensa.org/>.
 
-config RWSEM_XCHGADD_ALGORITHM
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
deleted file mode 100644
index e47568363e5ef..0000000000000
--- a/include/linux/rwsem-spinlock.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* rwsem-spinlock.h: fallback C implementation
- *
- * Copyright (c) 2001   David Howells (dhowells@redhat.com).
- * - Derived partially from ideas by Andrea Arcangeli <andrea@suse.de>
- * - Derived also from comments by Linus
- */
-
-#ifndef _LINUX_RWSEM_SPINLOCK_H
-#define _LINUX_RWSEM_SPINLOCK_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-/*
- * the rw-semaphore definition
- * - if count is 0 then there are no active readers or writers
- * - if count is +ve then that is the number of active readers
- * - if count is -1 then there is one active writer
- * - if wait_list is not empty, then there are processes waiting for the semaphore
- */
-struct rw_semaphore {
-	__s32			count;
-	raw_spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-};
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-
-extern void __down_read(struct rw_semaphore *sem);
-extern int __must_check __down_read_killable(struct rw_semaphore *sem);
-extern int __down_read_trylock(struct rw_semaphore *sem);
-extern void __down_write(struct rw_semaphore *sem);
-extern int __must_check __down_write_killable(struct rw_semaphore *sem);
-extern int __down_write_trylock(struct rw_semaphore *sem);
-extern void __up_read(struct rw_semaphore *sem);
-extern void __up_write(struct rw_semaphore *sem);
-extern void __downgrade_write(struct rw_semaphore *sem);
-extern int rwsem_is_locked(struct rw_semaphore *sem);
-
-#endif /* __KERNEL__ */
-#endif /* _LINUX_RWSEM_SPINLOCK_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 6e56006b2cb6b..0fc41062c6495 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -22,10 +22,6 @@
 
 struct rw_semaphore;
 
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-#include <linux/rwsem-spinlock.h> /* use a generic implementation */
-#define __RWSEM_INIT_COUNT(name)	.count = RWSEM_UNLOCKED_VALUE
-#else
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
 	atomic_long_t count;
@@ -65,7 +61,6 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 
 #define RWSEM_UNLOCKED_VALUE		0L
 #define __RWSEM_INIT_COUNT(name)	.count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
-#endif
 
 /* Common initializer macros and functions */
 
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index fbba478ae5229..e335953fa7040 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -229,7 +229,7 @@ config MUTEX_SPIN_ON_OWNER
 
 config RWSEM_SPIN_ON_OWNER
        def_bool y
-       depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
+       depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW
 
 config LOCK_SPIN_ON_OWNER
        def_bool y
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 392c7f23af765..1af83e9ce57d5 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -3,7 +3,7 @@
 # and is generally not a function of system call inputs.
 KCOV_INSTRUMENT		:= n
 
-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
+obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
@@ -25,8 +25,6 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
-obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
-obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
 obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
 obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
deleted file mode 100644
index a7ffb2a96ede0..0000000000000
--- a/kernel/locking/rwsem-spinlock.c
+++ /dev/null
@@ -1,339 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
- * generic spinlock implementation
- *
- * Copyright (c) 2001   David Howells (dhowells@redhat.com).
- * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
- * - Derived also from comments by Linus
- */
-#include <linux/rwsem.h>
-#include <linux/sched/signal.h>
-#include <linux/sched/debug.h>
-#include <linux/export.h>
-
-enum rwsem_waiter_type {
-	RWSEM_WAITING_FOR_WRITE,
-	RWSEM_WAITING_FOR_READ
-};
-
-struct rwsem_waiter {
-	struct list_head list;
-	struct task_struct *task;
-	enum rwsem_waiter_type type;
-};
-
-int rwsem_is_locked(struct rw_semaphore *sem)
-{
-	int ret = 1;
-	unsigned long flags;
-
-	if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
-		ret = (sem->count != 0);
-		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-	}
-	return ret;
-}
-EXPORT_SYMBOL(rwsem_is_locked);
-
-/*
- * initialise the semaphore
- */
-void __init_rwsem(struct rw_semaphore *sem, const char *name,
-		  struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/*
-	 * Make sure we are not reinitializing a held semaphore:
-	 */
-	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
-	lockdep_init_map(&sem->dep_map, name, key, 0);
-#endif
-	sem->count = 0;
-	raw_spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-}
-EXPORT_SYMBOL(__init_rwsem);
-
-/*
- * handle the lock release when processes blocked on it that can now run
- * - if we come here, then:
- *   - the 'active count' _reached_ zero
- *   - the 'waiting count' is non-zero
- * - the spinlock must be held by the caller
- * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if wakewrite is non-zero
- */
-static inline struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
-{
-	struct rwsem_waiter *waiter;
-	struct task_struct *tsk;
-	int woken;
-
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-
-	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-		if (wakewrite)
-			/* Wake up a writer. Note that we do not grant it the
-			 * lock - it will have to acquire it when it runs. */
-			wake_up_process(waiter->task);
-		goto out;
-	}
-
-	/* grant an infinite number of read locks to the front of the queue */
-	woken = 0;
-	do {
-		struct list_head *next = waiter->list.next;
-
-		list_del(&waiter->list);
-		tsk = waiter->task;
-		/*
-		 * Make sure we do not wakeup the next reader before
-		 * setting the nil condition to grant the next reader;
-		 * otherwise we could miss the wakeup on the other
-		 * side and end up sleeping again. See the pairing
-		 * in rwsem_down_read_failed().
-		 */
-		smp_mb();
-		waiter->task = NULL;
-		wake_up_process(tsk);
-		put_task_struct(tsk);
-		woken++;
-		if (next == &sem->wait_list)
-			break;
-		waiter = list_entry(next, struct rwsem_waiter, list);
-	} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
-	sem->count += woken;
-
- out:
-	return sem;
-}
-
-/*
- * wake a single writer
- */
-static inline struct rw_semaphore *
-__rwsem_wake_one_writer(struct rw_semaphore *sem)
-{
-	struct rwsem_waiter *waiter;
-
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-	wake_up_process(waiter->task);
-
-	return sem;
-}
-
-/*
- * get a read lock on the semaphore
- */
-int __sched __down_read_common(struct rw_semaphore *sem, int state)
-{
-	struct rwsem_waiter waiter;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (sem->count >= 0 && list_empty(&sem->wait_list)) {
-		/* granted */
-		sem->count++;
-		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-		goto out;
-	}
-
-	/* set up my own style of waitqueue */
-	waiter.task = current;
-	waiter.type = RWSEM_WAITING_FOR_READ;
-	get_task_struct(current);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* wait to be given the lock */
-	for (;;) {
-		if (!waiter.task)
-			break;
-		if (signal_pending_state(state, current))
-			goto out_nolock;
-		set_current_state(state);
-		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-		schedule();
-		raw_spin_lock_irqsave(&sem->wait_lock, flags);
-	}
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- out:
-	return 0;
-
-out_nolock:
-	/*
-	 * We didn't take the lock, so that there is a writer, which
-	 * is owner or the first waiter of the sem. If it's a waiter,
-	 * it will be woken by current owner. Not need to wake anybody.
-	 */
-	list_del(&waiter.list);
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-	return -EINTR;
-}
-
-void __sched __down_read(struct rw_semaphore *sem)
-{
-	__down_read_common(sem, TASK_UNINTERRUPTIBLE);
-}
-
-int __sched __down_read_killable(struct rw_semaphore *sem)
-{
-	return __down_read_common(sem, TASK_KILLABLE);
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-int __down_read_trylock(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (sem->count >= 0 && list_empty(&sem->wait_list)) {
-		/* granted */
-		sem->count++;
-		ret = 1;
-	}
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	return ret;
-}
-
-/*
- * get a write lock on the semaphore
- */
-int __sched __down_write_common(struct rw_semaphore *sem, int state)
-{
-	struct rwsem_waiter waiter;
-	unsigned long flags;
-	int ret = 0;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	/* set up my own style of waitqueue */
-	waiter.task = current;
-	waiter.type = RWSEM_WAITING_FOR_WRITE;
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* wait for someone to release the lock */
-	for (;;) {
-		/*
-		 * That is the key to support write lock stealing: allows the
-		 * task already on CPU to get the lock soon rather than put
-		 * itself into sleep and waiting for system woke it or someone
-		 * else in the head of the wait list up.
-		 */
-		if (sem->count == 0)
-			break;
-		if (signal_pending_state(state, current))
-			goto out_nolock;
-
-		set_current_state(state);
-		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-		schedule();
-		raw_spin_lock_irqsave(&sem->wait_lock, flags);
-	}
-	/* got the lock */
-	sem->count = -1;
-	list_del(&waiter.list);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	return ret;
-
-out_nolock:
-	list_del(&waiter.list);
-	if (!list_empty(&sem->wait_list) && sem->count >= 0)
-		__rwsem_do_wake(sem, 0);
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	return -EINTR;
-}
-
-void __sched __down_write(struct rw_semaphore *sem)
-{
-	__down_write_common(sem, TASK_UNINTERRUPTIBLE);
-}
-
-int __sched __down_write_killable(struct rw_semaphore *sem)
-{
-	return __down_write_common(sem, TASK_KILLABLE);
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-int __down_write_trylock(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (sem->count == 0) {
-		/* got the lock */
-		sem->count = -1;
-		ret = 1;
-	}
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	return ret;
-}
-
-/*
- * release a read lock on the semaphore
- */
-void __up_read(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (--sem->count == 0 && !list_empty(&sem->wait_list))
-		sem = __rwsem_wake_one_writer(sem);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-/*
- * release a write lock on the semaphore
- */
-void __up_write(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	sem->count = 0;
-	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, 1);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-/*
- * downgrade a write lock into a read lock
- * - just wake up any readers at the front of the queue
- */
-void __downgrade_write(struct rw_semaphore *sem)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
-	sem->count = 1;
-	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, 0);
-
-	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 067e265fa5c1d..45ee00236e039 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -153,7 +153,6 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
 }
 #endif
 
-#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
 /*
  * lock for reading
  */
@@ -260,5 +259,3 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 	if (tmp < 0)
 		rwsem_downgrade_wake(sem);
 }
-
-#endif /* CONFIG_RWSEM_XCHGADD_ALGORITHM */
-- 
GitLab


From ddb20d1d3aed8f130519c0a29cd5392efcc067b8 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Fri, 22 Mar 2019 10:30:08 -0400
Subject: [PATCH 0579/1861] locking/rwsem: Optimize down_read_trylock()

Modify __down_read_trylock() to optimize for an unlocked rwsem and make
it generate slightly better code.

Before this patch, down_read_trylock:

   0x0000000000000000 <+0>:     callq  0x5 <down_read_trylock+5>
   0x0000000000000005 <+5>:     jmp    0x18 <down_read_trylock+24>
   0x0000000000000007 <+7>:     lea    0x1(%rdx),%rcx
   0x000000000000000b <+11>:    mov    %rdx,%rax
   0x000000000000000e <+14>:    lock cmpxchg %rcx,(%rdi)
   0x0000000000000013 <+19>:    cmp    %rax,%rdx
   0x0000000000000016 <+22>:    je     0x23 <down_read_trylock+35>
   0x0000000000000018 <+24>:    mov    (%rdi),%rdx
   0x000000000000001b <+27>:    test   %rdx,%rdx
   0x000000000000001e <+30>:    jns    0x7 <down_read_trylock+7>
   0x0000000000000020 <+32>:    xor    %eax,%eax
   0x0000000000000022 <+34>:    retq
   0x0000000000000023 <+35>:    mov    %gs:0x0,%rax
   0x000000000000002c <+44>:    or     $0x3,%rax
   0x0000000000000030 <+48>:    mov    %rax,0x20(%rdi)
   0x0000000000000034 <+52>:    mov    $0x1,%eax
   0x0000000000000039 <+57>:    retq

After patch, down_read_trylock:

   0x0000000000000000 <+0>:	callq  0x5 <down_read_trylock+5>
   0x0000000000000005 <+5>:	xor    %eax,%eax
   0x0000000000000007 <+7>:	lea    0x1(%rax),%rdx
   0x000000000000000b <+11>:	lock cmpxchg %rdx,(%rdi)
   0x0000000000000010 <+16>:	jne    0x29 <down_read_trylock+41>
   0x0000000000000012 <+18>:	mov    %gs:0x0,%rax
   0x000000000000001b <+27>:	or     $0x3,%rax
   0x000000000000001f <+31>:	mov    %rax,0x20(%rdi)
   0x0000000000000023 <+35>:	mov    $0x1,%eax
   0x0000000000000028 <+40>:	retq
   0x0000000000000029 <+41>:	test   %rax,%rax
   0x000000000000002c <+44>:	jns    0x7 <down_read_trylock+7>
   0x000000000000002e <+46>:	xor    %eax,%eax
   0x0000000000000030 <+48>:	retq

By using a rwsem microbenchmark, the down_read_trylock() rate (with a
load of 10 to lengthen the lock critical section) on a x86-64 system
before and after the patch were:

                 Before Patch    After Patch
   # of Threads     rlock           rlock
   ------------     -----           -----
        1           14,496          14,716
        2            8,644           8,453
	4            6,799           6,983
	8            5,664           7,190

On a ARM64 system, the performance results were:

                 Before Patch    After Patch
   # of Threads     rlock           rlock
   ------------     -----           -----
        1           23,676          24,488
        2            7,697           9,502
        4            4,945           3,440
        8            2,641           1,603

For the uncontended case (1 thread), the new down_read_trylock() is a
little bit faster. For the contended cases, the new down_read_trylock()
perform pretty well in x86-64, but performance degrades at high
contention level on ARM64.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: linux-m68k@lists.linux-m68k.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-um@lists.infradead.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: nios2-dev@lists.rocketboards.org
Cc: openrisc@lists.librecores.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Link: https://lkml.kernel.org/r/20190322143008.21313-4-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 45ee00236e039..1f5775aa6a1df 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -174,14 +174,17 @@ static inline int __down_read_killable(struct rw_semaphore *sem)
 
 static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
-	long tmp;
+	/*
+	 * Optimize for the case when the rwsem is not locked at all.
+	 */
+	long tmp = RWSEM_UNLOCKED_VALUE;
 
-	while ((tmp = atomic_long_read(&sem->count)) >= 0) {
-		if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
+	do {
+		if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+					tmp + RWSEM_ACTIVE_READ_BIAS)) {
 			return 1;
 		}
-	}
+	} while (tmp >= 0);
 	return 0;
 }
 
-- 
GitLab


From a6cbfbe6677efb5ca47bb7958c2718236c25126e Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 29 Mar 2019 22:46:52 +0100
Subject: [PATCH 0580/1861] x86/uaccess: Fix implicit cast of __user pointer

The first two arguments of __user_atomic_cmpxchg_inatomic() are:

 - @uval is a kernel pointer into which the old value should be stored
 - @ptr is the user pointer on which the cmpxchg should operate

This means that casting @uval to __typeof__(ptr) is wrong. Since @uval
is only used once inside the macro, just get rid of __uval and use
(uval) directly.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190329214652.258477-4-jannh@google.com
---
 arch/x86/include/asm/uaccess.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1954dd5552a2e..a21f2a2f17bf7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -585,7 +585,6 @@ extern void __cmpxchg_wrong_size(void)
 #define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size)	\
 ({									\
 	int __ret = 0;							\
-	__typeof__(ptr) __uval = (uval);				\
 	__typeof__(*(ptr)) __old = (old);				\
 	__typeof__(*(ptr)) __new = (new);				\
 	__uaccess_begin_nospec();					\
@@ -661,7 +660,7 @@ extern void __cmpxchg_wrong_size(void)
 		__cmpxchg_wrong_size();					\
 	}								\
 	__uaccess_end();						\
-	*__uval = __old;						\
+	*(uval) = __old;						\
 	__ret;								\
 })
 
-- 
GitLab


From 2c3af7d901c61c101c02f431cfb520af9ff56ab4 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 1 Apr 2019 13:57:30 -0700
Subject: [PATCH 0581/1861] selftests/bpf: fix vlan handling in flow dissector
 program

When we tail call PROG(VLAN) from parse_eth_proto we don't need to peek
back to handle vlan proto because we didn't adjust nhoff/thoff yet. Use
flow_keys->n_proto, that we set in parse_eth_proto instead and
properly increment nhoff as well.

Also, always use skb->protocol and don't look at skb->vlan_present.
skb->vlan_present indicates that vlan information is stored out-of-band
in skb->vlan_{tci,proto} and vlan header is already pulled from skb.
That means, skb->vlan_present == true is not relevant for BPF flow
dissector.

Add simple test cases with VLAN tagged frames:
  * single vlan for ipv4
  * double vlan for ipv6

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/prog_tests/flow_dissector.c | 68 +++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_flow.c  | 15 ++--
 2 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index bcbd928c96aba..fc818bc1d7294 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -39,6 +39,58 @@ static struct bpf_flow_keys pkt_v6_flow_keys = {
 	.n_proto = __bpf_constant_htons(ETH_P_IPV6),
 };
 
+#define VLAN_HLEN	4
+
+static struct {
+	struct ethhdr eth;
+	__u16 vlan_tci;
+	__u16 vlan_proto;
+	struct iphdr iph;
+	struct tcphdr tcp;
+} __packed pkt_vlan_v4 = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_8021Q),
+	.vlan_proto = __bpf_constant_htons(ETH_P_IP),
+	.iph.ihl = 5,
+	.iph.protocol = IPPROTO_TCP,
+	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+	.tcp.urg_ptr = 123,
+	.tcp.doff = 5,
+};
+
+static struct bpf_flow_keys pkt_vlan_v4_flow_keys = {
+	.nhoff = VLAN_HLEN,
+	.thoff = VLAN_HLEN + sizeof(struct iphdr),
+	.addr_proto = ETH_P_IP,
+	.ip_proto = IPPROTO_TCP,
+	.n_proto = __bpf_constant_htons(ETH_P_IP),
+};
+
+static struct {
+	struct ethhdr eth;
+	__u16 vlan_tci;
+	__u16 vlan_proto;
+	__u16 vlan_tci2;
+	__u16 vlan_proto2;
+	struct ipv6hdr iph;
+	struct tcphdr tcp;
+} __packed pkt_vlan_v6 = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_8021AD),
+	.vlan_proto = __bpf_constant_htons(ETH_P_8021Q),
+	.vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6),
+	.iph.nexthdr = IPPROTO_TCP,
+	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+	.tcp.urg_ptr = 123,
+	.tcp.doff = 5,
+};
+
+static struct bpf_flow_keys pkt_vlan_v6_flow_keys = {
+	.nhoff = VLAN_HLEN * 2,
+	.thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
+	.addr_proto = ETH_P_IPV6,
+	.ip_proto = IPPROTO_TCP,
+	.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+};
+
 void test_flow_dissector(void)
 {
 	struct bpf_flow_keys flow_keys;
@@ -68,5 +120,21 @@ void test_flow_dissector(void)
 	      err, errno, retval, duration, size, sizeof(flow_keys));
 	CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys);
 
+	err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v4, sizeof(pkt_vlan_v4),
+				&flow_keys, &size, &retval, &duration);
+	CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv4",
+	      "err %d errno %d retval %d duration %d size %u/%lu\n",
+	      err, errno, retval, duration, size, sizeof(flow_keys));
+	CHECK_FLOW_KEYS("vlan_ipv4_flow_keys", flow_keys,
+			pkt_vlan_v4_flow_keys);
+
+	err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v6, sizeof(pkt_vlan_v6),
+				&flow_keys, &size, &retval, &duration);
+	CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv6",
+	      "err %d errno %d retval %d duration %d size %u/%lu\n",
+	      err, errno, retval, duration, size, sizeof(flow_keys));
+	CHECK_FLOW_KEYS("vlan_ipv6_flow_keys", flow_keys,
+			pkt_vlan_v6_flow_keys);
+
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 284660f5aa953..f177c7a6a6c79 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -119,10 +119,7 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
 SEC("flow_dissector")
 int _dissect(struct __sk_buff *skb)
 {
-	if (!skb->vlan_present)
-		return parse_eth_proto(skb, skb->protocol);
-	else
-		return parse_eth_proto(skb, skb->vlan_proto);
+	return parse_eth_proto(skb, skb->protocol);
 }
 
 /* Parses on IPPROTO_* */
@@ -336,15 +333,9 @@ PROG(VLAN)(struct __sk_buff *skb)
 {
 	struct bpf_flow_keys *keys = skb->flow_keys;
 	struct vlan_hdr *vlan, _vlan;
-	__be16 proto;
-
-	/* Peek back to see if single or double-tagging */
-	if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
-			       sizeof(proto)))
-		return BPF_DROP;
 
 	/* Account for double-tagging */
-	if (proto == bpf_htons(ETH_P_8021AD)) {
+	if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
 		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
 		if (!vlan)
 			return BPF_DROP;
@@ -352,6 +343,7 @@ PROG(VLAN)(struct __sk_buff *skb)
 		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
 			return BPF_DROP;
 
+		keys->nhoff += sizeof(*vlan);
 		keys->thoff += sizeof(*vlan);
 	}
 
@@ -359,6 +351,7 @@ PROG(VLAN)(struct __sk_buff *skb)
 	if (!vlan)
 		return BPF_DROP;
 
+	keys->nhoff += sizeof(*vlan);
 	keys->thoff += sizeof(*vlan);
 	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
 	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
-- 
GitLab


From 822fe61795018265ae14731d4e5399e5bde36864 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 1 Apr 2019 13:57:31 -0700
Subject: [PATCH 0582/1861] net/flow_dissector: pass flow_keys->n_proto to BPF
 programs

This is a preparation for the next commit that would prohibit access to
the most fields of __sk_buff from the BPF programs.

Instead of requiring BPF flow dissector programs to look into skb,
pass all input data in the flow_keys.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/flow_dissector.c                    | 1 +
 tools/testing/selftests/bpf/progs/bpf_flow.c | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index bb1a54747d648..9b84250039df1 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -707,6 +707,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 	/* Pass parameters to the BPF program */
 	memset(flow_keys, 0, sizeof(*flow_keys));
 	cb->qdisc_cb.flow_keys = flow_keys;
+	flow_keys->n_proto = skb->protocol;
 	flow_keys->nhoff = skb_network_offset(skb);
 	flow_keys->thoff = flow_keys->nhoff;
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index f177c7a6a6c79..75b17cada5393 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -92,7 +92,6 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
 {
 	struct bpf_flow_keys *keys = skb->flow_keys;
 
-	keys->n_proto = proto;
 	switch (proto) {
 	case bpf_htons(ETH_P_IP):
 		bpf_tail_call(skb, &jmp_table, IP);
@@ -119,7 +118,9 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
 SEC("flow_dissector")
 int _dissect(struct __sk_buff *skb)
 {
-	return parse_eth_proto(skb, skb->protocol);
+	struct bpf_flow_keys *keys = skb->flow_keys;
+
+	return parse_eth_proto(skb, keys->n_proto);
 }
 
 /* Parses on IPPROTO_* */
@@ -358,6 +359,7 @@ PROG(VLAN)(struct __sk_buff *skb)
 	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
 		return BPF_DROP;
 
+	keys->n_proto = vlan->h_vlan_encapsulated_proto;
 	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
 }
 
-- 
GitLab


From b9e9c8599f0f23e3d2051befc9966a84b639f64f Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 1 Apr 2019 13:57:32 -0700
Subject: [PATCH 0583/1861] flow_dissector: fix clamping of BPF flow_keys for
 non-zero nhoff

Don't allow BPF program to set flow_keys->nhoff to less than initial
value. We currently don't read the value afterwards in anything but
the tests, but it's still a good practice to return consistent
values to the test programs.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/flow_dissector.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 9b84250039df1..94a450b2191a9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -717,7 +717,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
 	/* Restore state */
 	memcpy(cb, &cb_saved, sizeof(cb_saved));
 
-	flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len);
+	flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff,
+				   skb_network_offset(skb), skb->len);
 	flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
 				   flow_keys->nhoff, skb->len);
 
-- 
GitLab


From 2ee7fba0d62d638d8b6dbe30cada3a531ec042af Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 1 Apr 2019 13:57:33 -0700
Subject: [PATCH 0584/1861] flow_dissector: allow access only to a subset of
 __sk_buff fields

Use whitelist instead of a blacklist and allow only a small set of
fields that might be relevant in the context of flow dissector:
  * data
  * data_end
  * flow_keys

This is required for the eth_get_headlen case where we have only a
chunk of data to dissect (i.e. trying to read the other skb fields
doesn't make sense).

Note, that it is a breaking API change! However, we've provided
flow_keys->n_proto as a substitute for skb->protocol; and there is
no need to manually handle skb->vlan_present. So even if we
break somebody, the migration is trivial. Unfortunately, we can't
support eth_get_headlen use-case without those breaking changes.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 647c63a7b25b6..fc92ebc4e200c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6613,14 +6613,8 @@ static bool flow_dissector_is_valid_access(int off, int size,
 					   const struct bpf_prog *prog,
 					   struct bpf_insn_access_aux *info)
 {
-	if (type == BPF_WRITE) {
-		switch (off) {
-		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
-			break;
-		default:
-			return false;
-		}
-	}
+	if (type == BPF_WRITE)
+		return false;
 
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, data):
@@ -6632,11 +6626,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
 		info->reg_type = PTR_TO_FLOW_KEYS;
 		break;
-	case bpf_ctx_range(struct __sk_buff, tc_classid):
-	case bpf_ctx_range(struct __sk_buff, data_meta):
-	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
-	case bpf_ctx_range(struct __sk_buff, tstamp):
-	case bpf_ctx_range(struct __sk_buff, wire_len):
+	default:
 		return false;
 	}
 
-- 
GitLab


From ae82899bbe92a7777ded9a562ee602dd5917bcd8 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 1 Apr 2019 13:57:34 -0700
Subject: [PATCH 0585/1861] flow_dissector: document BPF flow dissector
 environment

Short doc on what BPF flow dissector should expect in the input
__sk_buff and flow_keys.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../networking/bpf_flow_dissector.txt         | 115 ++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 Documentation/networking/bpf_flow_dissector.txt

diff --git a/Documentation/networking/bpf_flow_dissector.txt b/Documentation/networking/bpf_flow_dissector.txt
new file mode 100644
index 0000000000000..70f66a2d57e7b
--- /dev/null
+++ b/Documentation/networking/bpf_flow_dissector.txt
@@ -0,0 +1,115 @@
+==================
+BPF Flow Dissector
+==================
+
+Overview
+========
+
+Flow dissector is a routine that parses metadata out of the packets. It's
+used in the various places in the networking subsystem (RFS, flow hash, etc).
+
+BPF flow dissector is an attempt to reimplement C-based flow dissector logic
+in BPF to gain all the benefits of BPF verifier (namely, limits on the
+number of instructions and tail calls).
+
+API
+===
+
+BPF flow dissector programs operate on an __sk_buff. However, only the
+limited set of fields is allowed: data, data_end and flow_keys. flow_keys
+is 'struct bpf_flow_keys' and contains flow dissector input and
+output arguments.
+
+The inputs are:
+  * nhoff - initial offset of the networking header
+  * thoff - initial offset of the transport header, initialized to nhoff
+  * n_proto - L3 protocol type, parsed out of L2 header
+
+Flow dissector BPF program should fill out the rest of the 'struct
+bpf_flow_keys' fields. Input arguments nhoff/thoff/n_proto should be also
+adjusted accordingly.
+
+The return code of the BPF program is either BPF_OK to indicate successful
+dissection, or BPF_DROP to indicate parsing error.
+
+__sk_buff->data
+===============
+
+In the VLAN-less case, this is what the initial state of the BPF flow
+dissector looks like:
++------+------+------------+-----------+
+| DMAC | SMAC | ETHER_TYPE | L3_HEADER |
++------+------+------------+-----------+
+                            ^
+                            |
+                            +-- flow dissector starts here
+
+skb->data + flow_keys->nhoff point to the first byte of L3_HEADER.
+flow_keys->thoff = nhoff
+flow_keys->n_proto = ETHER_TYPE
+
+
+In case of VLAN, flow dissector can be called with the two different states.
+
+Pre-VLAN parsing:
++------+------+------+-----+-----------+-----------+
+| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
++------+------+------+-----+-----------+-----------+
+                      ^
+                      |
+                      +-- flow dissector starts here
+
+skb->data + flow_keys->nhoff point the to first byte of TCI.
+flow_keys->thoff = nhoff
+flow_keys->n_proto = TPID
+
+Please note that TPID can be 802.1AD and, hence, BPF program would
+have to parse VLAN information twice for double tagged packets.
+
+
+Post-VLAN parsing:
++------+------+------+-----+-----------+-----------+
+| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
++------+------+------+-----+-----------+-----------+
+                                        ^
+                                        |
+                                        +-- flow dissector starts here
+
+skb->data + flow_keys->nhoff point the to first byte of L3_HEADER.
+flow_keys->thoff = nhoff
+flow_keys->n_proto = ETHER_TYPE
+
+In this case VLAN information has been processed before the flow dissector
+and BPF flow dissector is not required to handle it.
+
+
+The takeaway here is as follows: BPF flow dissector program can be called with
+the optional VLAN header and should gracefully handle both cases: when single
+or double VLAN is present and when it is not present. The same program
+can be called for both cases and would have to be written carefully to
+handle both cases.
+
+
+Reference Implementation
+========================
+
+See tools/testing/selftests/bpf/progs/bpf_flow.c for the reference
+implementation and tools/testing/selftests/bpf/flow_dissector_load.[hc] for
+the loader. bpftool can be used to load BPF flow dissector program as well.
+
+The reference implementation is organized as follows:
+* jmp_table map that contains sub-programs for each supported L3 protocol
+* _dissect routine - entry point; it does input n_proto parsing and does
+  bpf_tail_call to the appropriate L3 handler
+
+Since BPF at this point doesn't support looping (or any jumping back),
+jmp_table is used instead to handle multiple levels of encapsulation (and
+IPv6 options).
+
+
+Current Limitations
+===================
+BPF flow dissector doesn't support exporting all the metadata that in-kernel
+C-based implementation can export. Notable example is single VLAN (802.1Q)
+and double VLAN (802.1AD) tags. Please refer to the 'struct bpf_flow_keys'
+for a set of information that's currently can be exported from the BPF context.
-- 
GitLab


From 25adf50fe25d506d3fc12070a5ff4be858a1ac1b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 3 Apr 2019 09:52:40 -0600
Subject: [PATCH 0586/1861] io_uring: fix double free in case of fileset
 regitration failure

Will Deacon reported the following KASAN complaint:

[  149.890370] ==================================================================
[  149.891266] BUG: KASAN: double-free or invalid-free in io_sqe_files_unregister+0xa8/0x140
[  149.892218]
[  149.892411] CPU: 113 PID: 3974 Comm: io_uring_regist Tainted: G    B             5.1.0-rc3-00012-g40b114779944 #3
[  149.893623] Hardware name: linux,dummy-virt (DT)
[  149.894169] Call trace:
[  149.894539]  dump_backtrace+0x0/0x228
[  149.895172]  show_stack+0x14/0x20
[  149.895747]  dump_stack+0xe8/0x124
[  149.896335]  print_address_description+0x60/0x258
[  149.897148]  kasan_report_invalid_free+0x78/0xb8
[  149.897936]  __kasan_slab_free+0x1fc/0x228
[  149.898641]  kasan_slab_free+0x10/0x18
[  149.899283]  kfree+0x70/0x1f8
[  149.899798]  io_sqe_files_unregister+0xa8/0x140
[  149.900574]  io_ring_ctx_wait_and_kill+0x190/0x3c0
[  149.901402]  io_uring_release+0x2c/0x48
[  149.902068]  __fput+0x18c/0x510
[  149.902612]  ____fput+0xc/0x18
[  149.903146]  task_work_run+0xf0/0x148
[  149.903778]  do_notify_resume+0x554/0x748
[  149.904467]  work_pending+0x8/0x10
[  149.905060]
[  149.905331] Allocated by task 3974:
[  149.905934]  __kasan_kmalloc.isra.0.part.1+0x48/0xf8
[  149.906786]  __kasan_kmalloc.isra.0+0xb8/0xd8
[  149.907531]  kasan_kmalloc+0xc/0x18
[  149.908134]  __kmalloc+0x168/0x248
[  149.908724]  __arm64_sys_io_uring_register+0x2b8/0x15a8
[  149.909622]  el0_svc_common+0x100/0x258
[  149.910281]  el0_svc_handler+0x48/0xc0
[  149.910928]  el0_svc+0x8/0xc
[  149.911425]
[  149.911696] Freed by task 3974:
[  149.912242]  __kasan_slab_free+0x114/0x228
[  149.912955]  kasan_slab_free+0x10/0x18
[  149.913602]  kfree+0x70/0x1f8
[  149.914118]  __arm64_sys_io_uring_register+0xc2c/0x15a8
[  149.915009]  el0_svc_common+0x100/0x258
[  149.915670]  el0_svc_handler+0x48/0xc0
[  149.916317]  el0_svc+0x8/0xc
[  149.916817]
[  149.917101] The buggy address belongs to the object at ffff8004ce07ed00
[  149.917101]  which belongs to the cache kmalloc-128 of size 128
[  149.919197] The buggy address is located 0 bytes inside of
[  149.919197]  128-byte region [ffff8004ce07ed00, ffff8004ce07ed80)
[  149.921142] The buggy address belongs to the page:
[  149.921953] page:ffff7e0013381f00 count:1 mapcount:0 mapping:ffff800503417c00 index:0x0 compound_mapcount: 0
[  149.923595] flags: 0x1ffff00000010200(slab|head)
[  149.924388] raw: 1ffff00000010200 dead000000000100 dead000000000200 ffff800503417c00
[  149.925706] raw: 0000000000000000 0000000080400040 00000001ffffffff 0000000000000000
[  149.927011] page dumped because: kasan: bad access detected
[  149.927956]
[  149.928224] Memory state around the buggy address:
[  149.929054]  ffff8004ce07ec00: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc
[  149.930274]  ffff8004ce07ec80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  149.931494] >ffff8004ce07ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  149.932712]                    ^
[  149.933281]  ffff8004ce07ed80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  149.934508]  ffff8004ce07ee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  149.935725] ==================================================================

which is due to a failure in registrering a fileset. This frees the
ctx->user_files pointer, but doesn't clear it. When the io_uring
instance is later freed through the normal channels, we free this
pointer again. At this point it's invalid.

Ensure we clear the pointer when we free it for the error case.

Reported-by: Will Deacon <will.deacon@arm.com>
Tested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bbdbd56cf2ac9..07d6ef195d05a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2215,6 +2215,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 			fput(ctx->user_files[i]);
 
 		kfree(ctx->user_files);
+		ctx->user_files = NULL;
 		ctx->nr_user_files = 0;
 		return ret;
 	}
-- 
GitLab


From 58ccd2d31e502c37e108b285bf3d343eb00c235b Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 3 Apr 2019 11:37:07 +0800
Subject: [PATCH 0587/1861] paride/pf: Fix potential NULL pointer dereference

Syzkaller report this:

pf: pf version 1.04, major 47, cluster 64, nice 0
pf: No ATAPI disk detected
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN PTI
CPU: 0 PID: 9887 Comm: syz-executor.0 Tainted: G         C        5.1.0-rc3+ #8
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:pf_init+0x7af/0x1000 [pf]
Code: 46 77 d2 48 89 d8 48 c1 e8 03 80 3c 28 00 74 08 48 89 df e8 03 25 a6 d2 4c 8b 23 49 8d bc 24 80 05 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 74 05 e8 e6 24 a6 d2 49 8b bc 24 80 05 00 00 e8 79 34
RSP: 0018:ffff8881abcbf998 EFLAGS: 00010202
RAX: 00000000000000b0 RBX: ffffffffc1e4a8a8 RCX: ffffffffaec50788
RDX: 0000000000039b10 RSI: ffffc9000153c000 RDI: 0000000000000580
RBP: dffffc0000000000 R08: ffffed103ee44e59 R09: ffffed103ee44e59
R10: 0000000000000001 R11: ffffed103ee44e58 R12: 0000000000000000
R13: ffffffffc1e4b028 R14: 0000000000000000 R15: 0000000000000020
FS:  00007f1b78a91700(0000) GS:ffff8881f7200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f6d72b207f8 CR3: 00000001d5790004 CR4: 00000000007606f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 ? 0xffffffffc1e50000
 do_one_initcall+0xbc/0x47d init/main.c:901
 do_init_module+0x1b5/0x547 kernel/module.c:3456
 load_module+0x6405/0x8c10 kernel/module.c:3804
 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898
 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f1b78a90c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99
RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003
RBP: 00007f1b78a90c70 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f1b78a916bc
R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004
Modules linked in: pf(+) paride gpio_tps65218 tps65218 i2c_cht_wc ati_remote dc395x act_meta_skbtcindex act_ife ife ecdh_generic rc_xbox_dvd sky81452_regulator v4l2_fwnode leds_blinkm snd_usb_hiface comedi(C) aes_ti slhc cfi_cmdset_0020 mtd cfi_util sx8654 mdio_gpio of_mdio fixed_phy mdio_bitbang libphy alcor_pci matrix_keymap hid_uclogic usbhid scsi_transport_fc videobuf2_v4l2 videobuf2_dma_sg snd_soc_pcm179x_spi snd_soc_pcm179x_codec i2c_demux_pinctrl mdev snd_indigodj isl6405 mii enc28j60 cmac adt7316_i2c(C) adt7316(C) fmc_trivial fmc nf_reject_ipv4 authenc rc_dtt200u rtc_ds1672 dvb_usb_dibusb_mc dvb_usb_dibusb_mc_common dib3000mc dibx000_common dvb_usb_dibusb_common dvb_usb dvb_core videobuf2_common videobuf2_vmalloc videobuf2_memops regulator_haptic adf7242 mac802154 ieee802154 s5h1409 da9034_ts snd_intel8x0m wmi cx24120 usbcore sdhci_cadence sdhci_pltfm sdhci mmc_core joydev i2c_algo_bit scsi_transport_iscsi iscsi_boot_sysfs ves1820 lockd grace nfs_acl auth_rpcgss sunrp
 c
 ip_vs snd_soc_adau7002 snd_cs4281 snd_rawmidi gameport snd_opl3_lib snd_seq_device snd_hwdep snd_ac97_codec ad7418 hid_primax hid snd_soc_cs4265 snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer ac97_bus snd_compress snd soundcore ti_adc108s102 eeprom_93cx6 i2c_algo_pca mlxreg_hotplug st_pressure st_sensors industrialio_triggered_buffer kfifo_buf industrialio v4l2_common videodev media snd_soc_adau_utils rc_pinnacle_grey rc_core pps_gpio leds_lm3692x nandcore ledtrig_pattern iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel ide_pci_generic aes_x86_64 piix crypto_simd input_leds psmouse cryp
 td
 glue_helper ide_core intel_agp serio_raw intel_gtt agpgart ata_generic i2c_piix4 pata_acpi parport_pc parport rtc_cmos floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: paride]
Dumping ftrace buffer:
  (ftrace buffer empty)
---[ end trace 7a818cf5f210d79e ]---

If alloc_disk fails in pf_init_units, pf->disk will be
NULL, however in pf_detect and pf_exit, it's not check
this before free.It may result a NULL pointer dereference.

Also when register_blkdev failed, blk_cleanup_queue() and
blk_mq_free_tag_set() should be called to free resources.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 6ce59025f118 ("paride/pf: cleanup queues when detection fails")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/paride/pf.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 103b617cdc318..35e6e271b219c 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -762,6 +762,8 @@ static int pf_detect(void)
 
 	printk("%s: No ATAPI disk detected\n", name);
 	for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+		if (!pf->disk)
+			continue;
 		blk_cleanup_queue(pf->disk->queue);
 		pf->disk->queue = NULL;
 		blk_mq_free_tag_set(&pf->tag_set);
@@ -1029,8 +1031,13 @@ static int __init pf_init(void)
 	pf_busy = 0;
 
 	if (register_blkdev(major, name)) {
-		for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
+		for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+			if (!pf->disk)
+				continue;
+			blk_cleanup_queue(pf->disk->queue);
+			blk_mq_free_tag_set(&pf->tag_set);
 			put_disk(pf->disk);
+		}
 		return -EBUSY;
 	}
 
@@ -1051,6 +1058,9 @@ static void __exit pf_exit(void)
 	int unit;
 	unregister_blkdev(major, name);
 	for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+		if (!pf->disk)
+			continue;
+
 		if (pf->present)
 			del_gendisk(pf->disk);
 
-- 
GitLab


From 18bfb9c6a8a5fbfe1a732130bb8f65fcfc4e8aa2 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 3 Apr 2019 09:22:35 +0300
Subject: [PATCH 0588/1861] aio: Fix an error code in __io_submit_one()

This accidentally returns the wrong variable.  The "req->ki_eventfd"
pointer is NULL so this return success.

Fixes: 7316b49c2a11 ("aio: move sanity checks and request allocation to io_submit_one()")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/aio.c b/fs/aio.c
index a4cc2a1cccb75..7ccecaab487a1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1794,7 +1794,7 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 		 */
 		eventfd = eventfd_ctx_fdget(iocb->aio_resfd);
 		if (IS_ERR(eventfd))
-			return PTR_ERR(req->ki_eventfd);
+			return PTR_ERR(eventfd);
 
 		req->ki_eventfd = eventfd;
 	}
-- 
GitLab


From 70b3d237bd7f52d3322c750a358581bf5a267699 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 3 Apr 2019 17:58:39 +0100
Subject: [PATCH 0589/1861] arm64: mm: Ensure we ignore the initrd if it is
 placed out of range

If the initrd payload isn't completely accessible via the linear map,
then we print a warning during boot and nobble the virtual address of
the payload so that we ignore it later on.

Unfortunately, since commit c756c592e442 ("arm64: Utilize
phys_initrd_start/phys_initrd_size"), the virtual address isn't
initialised until later anyway, so we need to nobble the size of the
payload to ensure that we don't try to use it later on.

Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size")
Reported-by: Pierre Kuo <vichy.kuo@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 43fa75601b3d9..03a8a6888ec04 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -377,7 +377,7 @@ void __init arm64_memblock_init(void)
 			 base + size > memblock_start_of_DRAM() +
 				       linear_region_size,
 			"initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) {
-			initrd_start = 0;
+			phys_initrd_size = 0;
 		} else {
 			memblock_remove(base, size); /* clear MEMBLOCK_ flags */
 			memblock_add(base, size);
-- 
GitLab


From 697e96ed1720d6aad196e7a663d545df2edd44a4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 3 Apr 2019 17:48:22 +0900
Subject: [PATCH 0590/1861] arm64: vdso: fix and clean-up Makefile

- $(call if_changed,...) must have FORCE as a prerequisite

- vdso.lds is a generated file, so it should be prefixed with
  $(obj)/ instead of $(src)/.

- cmd_vdsosym is a one-liner rule, so the assignment with '='
  is simpler.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/vdso/Makefile | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index b215c712d8970..a0af6bf6c11bf 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -31,7 +31,7 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 $(obj)/vdso.o : $(obj)/vdso.so
 
 # Link rule for the .so file, .lds has to be first
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso)
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
 	$(call if_changed,vdsold)
 
 # Strip rule for the .so file
@@ -42,9 +42,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 # Generate VDSO offsets using helper script
 gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
 quiet_cmd_vdsosym = VDSOSYM $@
-define cmd_vdsosym
-	$(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-endef
+      cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
 
 include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
 	$(call if_changed,vdsosym)
@@ -55,7 +53,7 @@ $(obj-vdso): %.o: %.S FORCE
 
 # Actual build commands
 quiet_cmd_vdsold = VDSOL   $@
-      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
+      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $(real-prereqs) -o $@
 quiet_cmd_vdsoas = VDSOA   $@
       cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
 
-- 
GitLab


From 426b046b748d1f47e096e05bdcc6fb4172791307 Mon Sep 17 00:00:00 2001
From: Louis Taylor <louis@kragniz.eu>
Date: Wed, 3 Apr 2019 12:36:20 -0600
Subject: [PATCH 0591/1861] vfio/pci: use correct format characters

When compiling with -Wformat, clang emits the following warnings:

drivers/vfio/pci/vfio_pci.c:1601:5: warning: format specifies type
      'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                                vendor, device, subvendor, subdevice,
                                ^~~~~~

drivers/vfio/pci/vfio_pci.c:1601:13: warning: format specifies type
      'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                                vendor, device, subvendor, subdevice,
                                        ^~~~~~

drivers/vfio/pci/vfio_pci.c:1601:21: warning: format specifies type
      'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                                vendor, device, subvendor, subdevice,
                                                ^~~~~~~~~

drivers/vfio/pci/vfio_pci.c:1601:32: warning: format specifies type
      'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                                vendor, device, subvendor, subdevice,
                                                           ^~~~~~~~~

drivers/vfio/pci/vfio_pci.c:1605:5: warning: format specifies type
      'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                                vendor, device, subvendor, subdevice,
                                ^~~~~~

drivers/vfio/pci/vfio_pci.c:1605:13: warning: format specifies type
      'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                                vendor, device, subvendor, subdevice,
                                        ^~~~~~

drivers/vfio/pci/vfio_pci.c:1605:21: warning: format specifies type
      'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                                vendor, device, subvendor, subdevice,
                                                ^~~~~~~~~

drivers/vfio/pci/vfio_pci.c:1605:32: warning: format specifies type
      'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                                vendor, device, subvendor, subdevice,
                                                           ^~~~~~~~~
The types of these arguments are unconditionally defined, so this patch
updates the format character to the correct ones for unsigned ints.

Link: https://github.com/ClangBuiltLinux/linux/issues/378
Signed-off-by: Louis Taylor <louis@kragniz.eu>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index a25659b5a5d17..3fa20e95a6bb6 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1661,11 +1661,11 @@ static void __init vfio_pci_fill_ids(void)
 		rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
 				   subvendor, subdevice, class, class_mask, 0);
 		if (rc)
-			pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n",
+			pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n",
 				vendor, device, subvendor, subdevice,
 				class, class_mask, rc);
 		else
-			pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n",
+			pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n",
 				vendor, device, subvendor, subdevice,
 				class, class_mask);
 	}
-- 
GitLab


From e39dd513d5f2ae2041c593d42fd0d8b24e7e950b Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai26@huawei.com>
Date: Wed, 3 Apr 2019 12:36:21 -0600
Subject: [PATCH 0592/1861] vfio/spapr_tce: Make symbol 'tce_iommu_driver_ops'
 static

Fixes the following sparse warning:

drivers/vfio/vfio_iommu_spapr_tce.c:1401:36: warning:
 symbol 'tce_iommu_driver_ops' was not declared. Should it be static?

Fixes: 5ffd229c0273 ("powerpc/vfio: Implement IOMMU driver for VFIO")
Signed-off-by: Wang Hai <wanghai26@huawei.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_spapr_tce.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 8dbb270998f47..6b64e45a52691 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1398,7 +1398,7 @@ unlock_exit:
 	mutex_unlock(&container->lock);
 }
 
-const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
+static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
 	.name		= "iommu-vfio-powerpc",
 	.owner		= THIS_MODULE,
 	.open		= tce_iommu_open,
-- 
GitLab


From 492855939bdb59c6f947b0b5b44af9ad82b7e38c Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 3 Apr 2019 12:36:21 -0600
Subject: [PATCH 0593/1861] vfio/type1: Limit DMA mappings per container

Memory backed DMA mappings are accounted against a user's locked
memory limit, including multiple mappings of the same memory.  This
accounting bounds the number of such mappings that a user can create.
However, DMA mappings that are not backed by memory, such as DMA
mappings of device MMIO via mmaps, do not make use of page pinning
and therefore do not count against the user's locked memory limit.
These mappings still consume memory, but the memory is not well
associated to the process for the purpose of oom killing a task.

To add bounding on this use case, we introduce a limit to the total
number of concurrent DMA mappings that a user is allowed to create.
This limit is exposed as a tunable module option where the default
value of 64K is expected to be well in excess of any reasonable use
case (a large virtual machine configuration would typically only make
use of tens of concurrent mappings).

This fixes CVE-2019-3882.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 73652e21efec6..d0f731c9920a6 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -58,12 +58,18 @@ module_param_named(disable_hugepages,
 MODULE_PARM_DESC(disable_hugepages,
 		 "Disable VFIO IOMMU support for IOMMU hugepages.");
 
+static unsigned int dma_entry_limit __read_mostly = U16_MAX;
+module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
+MODULE_PARM_DESC(dma_entry_limit,
+		 "Maximum number of user DMA mappings per container (65535).");
+
 struct vfio_iommu {
 	struct list_head	domain_list;
 	struct vfio_domain	*external_domain; /* domain for external user */
 	struct mutex		lock;
 	struct rb_root		dma_list;
 	struct blocking_notifier_head notifier;
+	unsigned int		dma_avail;
 	bool			v2;
 	bool			nesting;
 };
@@ -836,6 +842,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
 	vfio_unlink_dma(iommu, dma);
 	put_task_struct(dma->task);
 	kfree(dma);
+	iommu->dma_avail++;
 }
 
 static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
@@ -1081,12 +1088,18 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
 		goto out_unlock;
 	}
 
+	if (!iommu->dma_avail) {
+		ret = -ENOSPC;
+		goto out_unlock;
+	}
+
 	dma = kzalloc(sizeof(*dma), GFP_KERNEL);
 	if (!dma) {
 		ret = -ENOMEM;
 		goto out_unlock;
 	}
 
+	iommu->dma_avail--;
 	dma->iova = iova;
 	dma->vaddr = vaddr;
 	dma->prot = prot;
@@ -1583,6 +1596,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
 
 	INIT_LIST_HEAD(&iommu->domain_list);
 	iommu->dma_list = RB_ROOT;
+	iommu->dma_avail = dma_entry_limit;
 	mutex_init(&iommu->lock);
 	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
 
-- 
GitLab


From bf2a7ca39fd3ab47ef71c621a7ee69d1813b1f97 Mon Sep 17 00:00:00 2001
From: Anson Huang <anson.huang@nxp.com>
Date: Wed, 3 Apr 2019 15:14:44 -0700
Subject: [PATCH 0594/1861] Input: snvs_pwrkey - initialize necessary driver
 data before enabling IRQ

SNVS IRQ is requested before necessary driver data initialized,
if there is a pending IRQ during driver probe phase, kernel
NULL pointer panic will occur in IRQ handler. To avoid such
scenario, just initialize necessary driver data before enabling
IRQ. This patch is inspired by NXP's internal kernel tree.

Fixes: d3dc6e232215 ("input: keyboard: imx: add snvs power key driver")
Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/snvs_pwrkey.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c
index effb63205d3d7..4c67cf30a5d9a 100644
--- a/drivers/input/keyboard/snvs_pwrkey.c
+++ b/drivers/input/keyboard/snvs_pwrkey.c
@@ -148,6 +148,9 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev)
 		return error;
 	}
 
+	pdata->input = input;
+	platform_set_drvdata(pdev, pdata);
+
 	error = devm_request_irq(&pdev->dev, pdata->irq,
 			       imx_snvs_pwrkey_interrupt,
 			       0, pdev->name, pdev);
@@ -163,9 +166,6 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev)
 		return error;
 	}
 
-	pdata->input = input;
-	platform_set_drvdata(pdev, pdata);
-
 	device_init_wakeup(&pdev->dev, pdata->wakeup);
 
 	return 0;
-- 
GitLab


From 7f1a93b1f1d1d2603a49a9e4226259db9272f305 Mon Sep 17 00:00:00 2001
From: Xiong Zhang <xiong.y.zhang@intel.com>
Date: Mon, 25 Mar 2019 16:29:19 +0800
Subject: [PATCH 0595/1861] drm/i915/gvt: Correct the calculation of plane size

stride isn't in unit of pixel, it is bytes, so calculation of
plane size doesn't need to multiple bpp.

Fixes: e546e281d33d ("drm/i915/gvt: Dmabuf support for GVT-g")
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/dmabuf.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 3e7e2b80c8579..5d887f7cc0d5c 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -238,9 +238,6 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		default:
 			gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled);
 		}
-
-		info->size = (((p.stride * p.height * p.bpp) / 8) +
-			      (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 	} else if (plane_id == DRM_PLANE_TYPE_CURSOR) {
 		ret = intel_vgpu_decode_cursor_plane(vgpu, &c);
 		if (ret)
@@ -262,14 +259,13 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 			info->x_hot = UINT_MAX;
 			info->y_hot = UINT_MAX;
 		}
-
-		info->size = (((info->stride * c.height * c.bpp) / 8)
-				+ (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 	} else {
 		gvt_vgpu_err("invalid plane id:%d\n", plane_id);
 		return -EINVAL;
 	}
 
+	info->size = (info->stride * info->height + PAGE_SIZE - 1)
+		      >> PAGE_SHIFT;
 	if (info->size == 0) {
 		gvt_vgpu_err("fb size is zero\n");
 		return -EINVAL;
-- 
GitLab


From cf9ed66671ec5f6cacc7b6efbad9d7c9e5e31776 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 5 Feb 2019 20:30:33 +0000
Subject: [PATCH 0596/1861] drm/i915/gvt: Fix kerneldoc typo for
 intel_vgpu_emulate_hotplug

drivers/gpu/drm/i915/gvt/display.c:457: warning: Function parameter or member 'connected' not described in 'intel_vgpu_emulate_hotplug'
drivers/gpu/drm/i915/gvt/display.c:457: warning: Excess function parameter 'conncted' description in 'intel_vgpu_emulate_hotplug'

Fixes: 1ca20f33df42 ("drm/i915/gvt: add hotplug emulation")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Hang Yuan <hang.yuan@linux.intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 035479e273bec..e3f9caa7839f7 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -448,7 +448,7 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt)
 /**
  * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU
  * @vgpu: a vGPU
- * @conncted: link state
+ * @connected: link state
  *
  * This function is used to trigger hotplug interrupt for vGPU
  *
-- 
GitLab


From 27fad74a5a77fe2e1f876db7bf27efcf2ec304b2 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 4 Apr 2019 10:31:14 +0800
Subject: [PATCH 0597/1861] iov_iter: Fix build error without CONFIG_CRYPTO

If CONFIG_CRYPTO is not set or set to m,
gcc building warn this:

lib/iov_iter.o: In function `hash_and_copy_to_iter':
iov_iter.c:(.text+0x9129): undefined reference to `crypto_stats_get'
iov_iter.c:(.text+0x9152): undefined reference to `crypto_stats_ahash_update'

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: d05f443554b3 ("iov_iter: introduce hash_and_copy_to_iter helper")
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/iov_iter.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ea36dc355da13..b396d328a7643 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1528,6 +1528,7 @@ EXPORT_SYMBOL(csum_and_copy_to_iter);
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 		struct iov_iter *i)
 {
+#ifdef CONFIG_CRYPTO
 	struct ahash_request *hash = hashp;
 	struct scatterlist sg;
 	size_t copied;
@@ -1537,6 +1538,9 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 	ahash_request_set_crypt(hash, &sg, NULL, copied);
 	crypto_ahash_update(hash);
 	return copied;
+#else
+	return 0;
+#endif
 }
 EXPORT_SYMBOL(hash_and_copy_to_iter);
 
-- 
GitLab


From 1cb1d2c64e812928fe0a40b8f7e74523d0283dbe Mon Sep 17 00:00:00 2001
From: Xose Vazquez Perez <xose.vazquez@gmail.com>
Date: Sat, 30 Mar 2019 15:43:31 +0100
Subject: [PATCH 0598/1861] scsi: core: add new RDAC LENOVO/DE_Series device

Blacklist "Universal Xport" LUN. It's used for in-band storage array
management.  Also add model to the rdac dh family.

Cc: Martin Wilck <mwilck@suse.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: NetApp RDAC team <ng-eseries-upstream-maintainers@netapp.com>
Cc: Christophe Varoqui <christophe.varoqui@opensvc.com>
Cc: James E.J. Bottomley <jejb@linux.vnet.ibm.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: SCSI ML <linux-scsi@vger.kernel.org>
Cc: DM ML <dm-devel@redhat.com>
Signed-off-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_devinfo.c | 1 +
 drivers/scsi/scsi_dh.c      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index c4cbfd07b9167..a08ff3bd63105 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -238,6 +238,7 @@ static struct {
 	{"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
 	{"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
 	{"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+	{"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
 	{"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
 	{"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
 	{"SONY", "TSL", NULL, BLIST_FORCELUN},		/* DDS3 & DDS4 autoloaders */
diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c
index 5a58cbf3a75da..c14006ac98f91 100644
--- a/drivers/scsi/scsi_dh.c
+++ b/drivers/scsi/scsi_dh.c
@@ -75,6 +75,7 @@ static const struct scsi_dh_blist scsi_dh_blist[] = {
 	{"NETAPP", "INF-01-00",		"rdac", },
 	{"LSI", "INF-01-00",		"rdac", },
 	{"ENGENIO", "INF-01-00",	"rdac", },
+	{"LENOVO", "DE_Series",		"rdac", },
 	{NULL, NULL,			NULL },
 };
 
-- 
GitLab


From 382e06d11e075a40b4094b6ef809f8d4bcc7ab2a Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Mon, 1 Apr 2019 16:10:52 +0000
Subject: [PATCH 0599/1861] scsi: storvsc: Fix calculation of sub-channel count

When the number of sub-channels offered by Hyper-V is >= the number of CPUs
in the VM, calculate the correct number of sub-channels.  The current code
produces one too many.

This scenario arises only when the number of CPUs is artificially
restricted (for example, with maxcpus=<n> on the kernel boot line), because
Hyper-V normally offers a sub-channel count < number of CPUs.  While the
current code doesn't break, the extra sub-channel is unbalanced across the
CPUs (for example, a total of 5 channels on a VM with 4 CPUs).

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Long Li <longli@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/storvsc_drv.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 84380bae20f1e..e186743033f48 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -668,13 +668,22 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
 {
 	struct device *dev = &device->device;
 	struct storvsc_device *stor_device;
-	int num_cpus = num_online_cpus();
 	int num_sc;
 	struct storvsc_cmd_request *request;
 	struct vstor_packet *vstor_packet;
 	int ret, t;
 
-	num_sc = ((max_chns > num_cpus) ? num_cpus : max_chns);
+	/*
+	 * If the number of CPUs is artificially restricted, such as
+	 * with maxcpus=1 on the kernel boot line, Hyper-V could offer
+	 * sub-channels >= the number of CPUs. These sub-channels
+	 * should not be created. The primary channel is already created
+	 * and assigned to one CPU, so check against # CPUs - 1.
+	 */
+	num_sc = min((int)(num_online_cpus() - 1), max_chns);
+	if (!num_sc)
+		return;
+
 	stor_device = get_out_stor_device(device);
 	if (!stor_device)
 		return;
-- 
GitLab


From ed2e63aaec4fbf248275d2d83c4cfd65e069594f Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Mon, 1 Apr 2019 21:42:06 +0000
Subject: [PATCH 0600/1861] scsi: storvsc: Reduce default ring buffer size to
 128 Kbytes

Reduce the default VMbus channel ring buffer size for storvsc SCSI devices
from 1 Mbyte to 128 Kbytes. Measurements show that ring buffer sizes above
128 Kbytes do not increase performance even at very high IOPS rates, so
don't waste the memory. Also remove the dependence on PAGE_SIZE, since the
ring buffer size should not change on architectures where PAGE_SIZE is not
4 Kbytes.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/storvsc_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index e186743033f48..8472de1007fff 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -385,7 +385,7 @@ enum storvsc_request_type {
  * This is the end of Protocol specific defines.
  */
 
-static int storvsc_ringbuffer_size = (256 * PAGE_SIZE);
+static int storvsc_ringbuffer_size = (128 * 1024);
 static u32 max_outstanding_req_per_channel;
 
 static int storvsc_vcpus_per_sub_channel = 4;
-- 
GitLab


From 4eb01535886644d79a58fe652e0782a194bc3402 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 3 Apr 2019 11:10:34 -0700
Subject: [PATCH 0601/1861] scsi: lpfc: Fix missing wakeups on abort threads

Abort thread wakeups, on some wqe types, are not happening.  The thread
wakeup logic is dependent upon the LPFC_DRIVER_ABORTED flag. However, on
these wqes, the completion handler running prior to the io completion
routine ends up clearing the flag.

Rework the wakeup logic to look at a non-null waitq element which must be
set if the abort thread is waiting. This is reverting the change in the
indicated patch.

Fixes: c2017260eea2d ("scsi: lpfc: Rework locking on SCSI io completion")
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_scsi.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index c98f264f1d83a..a497b2c0cb798 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3878,10 +3878,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	 * wake up the thread.
 	 */
 	spin_lock(&lpfc_cmd->buf_lock);
-	if (unlikely(lpfc_cmd->cur_iocbq.iocb_flag & LPFC_DRIVER_ABORTED)) {
-		lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
-		if (lpfc_cmd->waitq)
-			wake_up(lpfc_cmd->waitq);
+	lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
+	if (lpfc_cmd->waitq) {
+		wake_up(lpfc_cmd->waitq);
 		lpfc_cmd->waitq = NULL;
 	}
 	spin_unlock(&lpfc_cmd->buf_lock);
-- 
GitLab


From 0ab03f353d3613ea49d1f924faf98559003670a8 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 2 Apr 2019 08:16:03 +0200
Subject: [PATCH 0602/1861] net-gro: Fix GRO flush when receiving a GSO packet.

Currently we may merge incorrectly a received GSO packet
or a packet with frag_list into a packet sitting in the
gro_hash list. skb_segment() may crash case because
the assumptions on the skb layout are not met.
The correct behaviour would be to flush the packet in the
gro_hash list and send the received GSO packet directly
afterwards. Commit d61d072e87c8e ("net-gro: avoid reorders")
sets NAPI_GRO_CB(skb)->flush in this case, but this is not
checked before merging. This patch makes sure to check this
flag and to not merge in that case.

Fixes: d61d072e87c8e ("net-gro: avoid reorders")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2415d9cb9b89f..ef2cd57120989 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3801,7 +3801,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 	unsigned int delta_truesize;
 	struct sk_buff *lp;
 
-	if (unlikely(p->len + len >= 65536))
+	if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
 		return -E2BIG;
 
 	lp = NAPI_GRO_CB(p)->last;
-- 
GitLab


From ef0efcd3bd3fd0589732b67fb586ffd3c8705806 Mon Sep 17 00:00:00 2001
From: Junwei Hu <hujunwei4@huawei.com>
Date: Tue, 2 Apr 2019 19:38:04 +0800
Subject: [PATCH 0603/1861] ipv6: Fix dangling pointer when ipv6 fragment

At the beginning of ip6_fragment func, the prevhdr pointer is
obtained in the ip6_find_1stfragopt func.
However, all the pointers pointing into skb header may change
when calling skb_checksum_help func with
skb->ip_summed = CHECKSUM_PARTIAL condition.
The prevhdr pointe will be dangling if it is not reloaded after
calling __skb_linearize func in skb_checksum_help func.

Here, I add a variable, nexthdr_offset, to evaluate the offset,
which does not changes even after calling __skb_linearize func.

Fixes: 405c92f7a541 ("ipv6: add defensive check for CHECKSUM_PARTIAL skbs in ip_fragment")
Signed-off-by: Junwei Hu <hujunwei4@huawei.com>
Reported-by: Wenhao Zhang <zhangwenhao8@huawei.com>
Reported-by: syzbot+e8ce541d095e486074fc@syzkaller.appspotmail.com
Reviewed-by: Zhiqiang Liu <liuzhiqiang26@huawei.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index edbd12067170b..e51f3c648b094 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -601,7 +601,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 				inet6_sk(skb->sk) : NULL;
 	struct ipv6hdr *tmp_hdr;
 	struct frag_hdr *fh;
-	unsigned int mtu, hlen, left, len;
+	unsigned int mtu, hlen, left, len, nexthdr_offset;
 	int hroom, troom;
 	__be32 frag_id;
 	int ptr, offset = 0, err = 0;
@@ -612,6 +612,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		goto fail;
 	hlen = err;
 	nexthdr = *prevhdr;
+	nexthdr_offset = prevhdr - skb_network_header(skb);
 
 	mtu = ip6_skb_dst_mtu(skb);
 
@@ -646,6 +647,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	    (err = skb_checksum_help(skb)))
 		goto fail;
 
+	prevhdr = skb_network_header(skb) + nexthdr_offset;
 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 	if (skb_has_frag_list(skb)) {
 		unsigned int first_len = skb_pagelen(skb);
-- 
GitLab


From 7297ba6c74c5b9e78d8e936af82eecfcf7d32dfb Mon Sep 17 00:00:00 2001
From: Annaliese McDermond <nh6z@nh6z.net>
Date: Wed, 3 Apr 2019 21:17:15 -0700
Subject: [PATCH 0604/1861] ASoC: tlv320aic32x4: Change author's name

The author of these files has changed her name.  Update
instances in the code of her dead name to current legal
name.

Signed-off-by: Annaliese McDermond <nh6z@nh6z.net>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/tlv320aic32x4-i2c.c | 4 ++--
 sound/soc/codecs/tlv320aic32x4-spi.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sound/soc/codecs/tlv320aic32x4-i2c.c b/sound/soc/codecs/tlv320aic32x4-i2c.c
index 385fa2e9525ab..22c3a6bc0b6c4 100644
--- a/sound/soc/codecs/tlv320aic32x4-i2c.c
+++ b/sound/soc/codecs/tlv320aic32x4-i2c.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2011 NW Digital Radio
  *
- * Author: Jeremy McDermond <nh6z@nh6z.net>
+ * Author: Annaliese McDermond <nh6z@nh6z.net>
  *
  * Based on sound/soc/codecs/wm8974 and TI driver for kernel 2.6.27.
  *
@@ -72,5 +72,5 @@ static struct i2c_driver aic32x4_i2c_driver = {
 module_i2c_driver(aic32x4_i2c_driver);
 
 MODULE_DESCRIPTION("ASoC TLV320AIC32x4 codec driver I2C");
-MODULE_AUTHOR("Jeremy McDermond <nh6z@nh6z.net>");
+MODULE_AUTHOR("Annaliese McDermond <nh6z@nh6z.net>");
 MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/tlv320aic32x4-spi.c b/sound/soc/codecs/tlv320aic32x4-spi.c
index 07d78ae51e05c..aa5b7ba0254bc 100644
--- a/sound/soc/codecs/tlv320aic32x4-spi.c
+++ b/sound/soc/codecs/tlv320aic32x4-spi.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2011 NW Digital Radio
  *
- * Author: Jeremy McDermond <nh6z@nh6z.net>
+ * Author: Annaliese McDermond <nh6z@nh6z.net>
  *
  * Based on sound/soc/codecs/wm8974 and TI driver for kernel 2.6.27.
  *
@@ -74,5 +74,5 @@ static struct spi_driver aic32x4_spi_driver = {
 module_spi_driver(aic32x4_spi_driver);
 
 MODULE_DESCRIPTION("ASoC TLV320AIC32x4 codec driver SPI");
-MODULE_AUTHOR("Jeremy McDermond <nh6z@nh6z.net>");
+MODULE_AUTHOR("Annaliese McDermond <nh6z@nh6z.net>");
 MODULE_LICENSE("GPL");
-- 
GitLab


From 2201f31f2c6d6030cbd2f7085455e2172725b1c5 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 3 Apr 2019 20:19:25 -0700
Subject: [PATCH 0605/1861] xtensa: use actual syscall number in
 do_syscall_trace_leave

Syscall may alter pt_regs structure passed to it, resulting in a
mismatch between syscall entry end syscall exit entries in the ftrace.
Temporary restore syscall field of the pt_regs for the duration of
do_syscall_trace_leave.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/entry.S | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index e50f5124dc6f7..e54af8b7e0f8c 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1860,6 +1860,8 @@ ENTRY(system_call)
 	l32i	a7, a2, PT_SYSCALL
 
 1:
+	s32i	a7, a1, 4
+
 	/* syscall = sys_call_table[syscall_nr] */
 
 	movi	a4, sys_call_table
@@ -1893,8 +1895,12 @@ ENTRY(system_call)
 	retw
 
 1:
+	l32i	a4, a1, 4
+	l32i	a3, a2, PT_SYSCALL
+	s32i	a4, a2, PT_SYSCALL
 	mov	a6, a2
 	call4	do_syscall_trace_leave
+	s32i	a3, a2, PT_SYSCALL
 	retw
 
 ENDPROC(system_call)
-- 
GitLab


From 2663147dc7465cb29040a05cc4286fdd839978b5 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 3 Apr 2019 20:22:42 -0700
Subject: [PATCH 0606/1861] xtensa: fix initialization of pt_regs::syscall in
 start_thread

New pt_regs should indicate that there's no syscall, not that there's
syscall #0. While at it wrap macro body in do/while and parenthesize
macro arguments.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/processor.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index f7dd895b2353e..0c14018d1c260 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -187,15 +187,18 @@ struct thread_struct {
 
 /* Clearing a0 terminates the backtrace. */
 #define start_thread(regs, new_pc, new_sp) \
-	memset(regs, 0, sizeof(*regs)); \
-	regs->pc = new_pc; \
-	regs->ps = USER_PS_VALUE; \
-	regs->areg[1] = new_sp; \
-	regs->areg[0] = 0; \
-	regs->wmask = 1; \
-	regs->depc = 0; \
-	regs->windowbase = 0; \
-	regs->windowstart = 1;
+	do { \
+		memset((regs), 0, sizeof(*(regs))); \
+		(regs)->pc = (new_pc); \
+		(regs)->ps = USER_PS_VALUE; \
+		(regs)->areg[1] = (new_sp); \
+		(regs)->areg[0] = 0; \
+		(regs)->wmask = 1; \
+		(regs)->depc = 0; \
+		(regs)->windowbase = 0; \
+		(regs)->windowstart = 1; \
+		(regs)->syscall = NO_SYSCALL; \
+	} while (0)
 
 /* Forward declaration */
 struct task_struct;
-- 
GitLab


From 1054e4dd1cbbb7bf5cc836832648f9972134ef67 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Wed, 3 Apr 2019 17:14:04 +0200
Subject: [PATCH 0607/1861] clk: sunxi-ng: nkmp: Explain why zero width check
 is needed

Add an explanation why zero width check is needed when generating factor
mask using GENMASK() macro.

Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 drivers/clk/sunxi-ng/ccu_nkmp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index 69dfc6de1c4e6..cbcdf664f3360 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -186,6 +186,12 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	ccu_nkmp_find_best(parent_rate, rate, &_nkmp);
 
+	/*
+	 * If width is 0, GENMASK() macro may not generate expected mask (0)
+	 * as it falls under undefined behaviour by C standard due to shifts
+	 * which are equal or greater than width of left operand. This can
+	 * be easily avoided by explicitly checking if width is 0.
+	 */
 	if (nkmp->n.width)
 		n_mask = GENMASK(nkmp->n.width + nkmp->n.shift - 1,
 				 nkmp->n.shift);
-- 
GitLab


From c2c616021d64d952dc9d37793924ce57833d7754 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 4 Apr 2019 09:52:15 +0900
Subject: [PATCH 0608/1861] ASoC: audio-graph-card: don't select DPCM via
 audio-graph-card

commit ae3cb5790906b ("ASoC: audio-graph-card: merge
audio-graph-scu-card") merged audio-graph-scu-card which can
handle DPCM into audio-graph-card.

By this patch, the judgement to select "normal sound card" or
"DPCM sound card" is based on its OF-graph endpoint connection.
But, because of it, existing "audio-graph-card" user who is
assuming "normal sound card" might select DPCM unintentionally.

To solve this issue, this patch allows "audio-graph-card" user
can select "normal sound card", and "audio-graph-scu-card" user
can select both "normal sound card" and "DPCM sound card".
This keeps compatibility collectry.

Fixes: ae3cb5790906b ("ASoC: audio-graph-card: merge audio-graph-scu-card")
Reported-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/generic/audio-graph-card.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c
index bb12351330e8c..69bc4848d7876 100644
--- a/sound/soc/generic/audio-graph-card.c
+++ b/sound/soc/generic/audio-graph-card.c
@@ -20,6 +20,8 @@
 #include <linux/string.h>
 #include <sound/simple_card_utils.h>
 
+#define DPCM_SELECTABLE 1
+
 struct graph_priv {
 	struct snd_soc_card snd_card;
 	struct graph_dai_props {
@@ -440,6 +442,7 @@ static int graph_for_each_link(struct graph_priv *priv,
 	struct device_node *codec_port;
 	struct device_node *codec_port_old = NULL;
 	struct asoc_simple_card_data adata;
+	uintptr_t dpcm_selectable = (uintptr_t)of_device_get_match_data(dev);
 	int rc, ret;
 
 	/* loop for all listed CPU port */
@@ -470,8 +473,9 @@ static int graph_for_each_link(struct graph_priv *priv,
 			 * if Codec port has many endpoints,
 			 * or has convert-xxx property
 			 */
-			if ((of_get_child_count(codec_port) > 1) ||
-			    adata.convert_rate || adata.convert_channels)
+			if (dpcm_selectable &&
+			    ((of_get_child_count(codec_port) > 1) ||
+			     adata.convert_rate || adata.convert_channels))
 				ret = func_dpcm(priv, cpu_ep, codec_ep, li,
 						(codec_port_old == codec_port));
 			/* else normal sound */
@@ -732,7 +736,8 @@ static int graph_remove(struct platform_device *pdev)
 
 static const struct of_device_id graph_of_match[] = {
 	{ .compatible = "audio-graph-card", },
-	{ .compatible = "audio-graph-scu-card", },
+	{ .compatible = "audio-graph-scu-card",
+	  .data = (void *)DPCM_SELECTABLE },
 	{},
 };
 MODULE_DEVICE_TABLE(of, graph_of_match);
-- 
GitLab


From 42bf029a55a9bb8036f1d738a28dba2f7ec1e79d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 4 Apr 2019 09:52:52 +0900
Subject: [PATCH 0609/1861] ASoC: simple-card: don't select DPCM via
 simple-audio-card

commit da215354eb55c ("ASoC: simple-card: merge simple-scu-card")
merged simple-scu-audio-card which can handle DPCM into
simple-audio-card.

By this patch, the judgement to select "normal sound card" or
"DPCM sound card" is based on its CPU/Codec DAI count.
But, because of it, existing "simple-audio-card" user who is
assuming "normal sound card" might select DPCM unintentionally.

To solve this issue, this patch allows "simple-audio-card" user
can select "normal sound card", and "simple-scu-audio-card" user
can select both "normal sound card" and "DPCM sound card".
This keeps compatibility collectry.

Fixes: da215354eb55c ("ASoC: simple-card: merge simple-scu-card")
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/generic/simple-card.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 7147bba45a2a6..34de32efc4c4d 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -9,12 +9,15 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/string.h>
 #include <sound/simple_card.h>
 #include <sound/soc-dai.h>
 #include <sound/soc.h>
 
+#define DPCM_SELECTABLE 1
+
 struct simple_priv {
 	struct snd_soc_card snd_card;
 	struct simple_dai_props {
@@ -441,6 +444,7 @@ static int simple_for_each_link(struct simple_priv *priv,
 	struct device *dev = simple_priv_to_dev(priv);
 	struct device_node *top = dev->of_node;
 	struct device_node *node;
+	uintptr_t dpcm_selectable = (uintptr_t)of_device_get_match_data(dev);
 	bool is_top = 0;
 	int ret = 0;
 
@@ -480,8 +484,9 @@ static int simple_for_each_link(struct simple_priv *priv,
 			 * if it has many CPUs,
 			 * or has convert-xxx property
 			 */
-			if (num > 2 ||
-			    adata.convert_rate || adata.convert_channels)
+			if (dpcm_selectable &&
+			    (num > 2 ||
+			     adata.convert_rate || adata.convert_channels))
 				ret = func_dpcm(priv, np, codec, li, is_top);
 			/* else normal sound */
 			else
@@ -822,7 +827,8 @@ static int simple_remove(struct platform_device *pdev)
 
 static const struct of_device_id simple_of_match[] = {
 	{ .compatible = "simple-audio-card", },
-	{ .compatible = "simple-scu-audio-card", },
+	{ .compatible = "simple-scu-audio-card",
+	  .data = (void *)DPCM_SELECTABLE },
 	{},
 };
 MODULE_DEVICE_TABLE(of, simple_of_match);
-- 
GitLab


From c85064435fe7a216ec0f0238ef2b8f7cd850a450 Mon Sep 17 00:00:00 2001
From: Sugar Zhang <sugar.zhang@rock-chips.com>
Date: Wed, 3 Apr 2019 21:40:45 +0800
Subject: [PATCH 0610/1861] ASoC: rockchip: pdm: fix regmap_ops hang issue

This is because set_fmt ops maybe called when PD is off,
and in such case, regmap_ops will lead system hang.
enale PD before doing regmap_ops.

Signed-off-by: Sugar Zhang <sugar.zhang@rock-chips.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/rockchip/rockchip_pdm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c
index 400e29edb1c9c..8a2e3bbce3a16 100644
--- a/sound/soc/rockchip/rockchip_pdm.c
+++ b/sound/soc/rockchip/rockchip_pdm.c
@@ -208,7 +208,9 @@ static int rockchip_pdm_set_fmt(struct snd_soc_dai *cpu_dai,
 		return -EINVAL;
 	}
 
+	pm_runtime_get_sync(cpu_dai->dev);
 	regmap_update_bits(pdm->regmap, PDM_CLK_CTRL, mask, val);
+	pm_runtime_put(cpu_dai->dev);
 
 	return 0;
 }
-- 
GitLab


From 86a7b6ffd90095d81d9fa0d8b48955b7c83b2e2f Mon Sep 17 00:00:00 2001
From: Sugar Zhang <sugar.zhang@rock-chips.com>
Date: Thu, 4 Apr 2019 11:48:11 +0800
Subject: [PATCH 0611/1861] ASoC: rockchip: pdm: change dma burst to 8

This patch decreases the transfer bursts to avoid the fifo overrun.

Signed-off-by: Sugar Zhang <sugar.zhang@rock-chips.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/rockchip/rockchip_pdm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c
index 8a2e3bbce3a16..d0b403a0e27b8 100644
--- a/sound/soc/rockchip/rockchip_pdm.c
+++ b/sound/soc/rockchip/rockchip_pdm.c
@@ -24,7 +24,7 @@
 
 #include "rockchip_pdm.h"
 
-#define PDM_DMA_BURST_SIZE	(16) /* size * width: 16*4 = 64 bytes */
+#define PDM_DMA_BURST_SIZE	(8) /* size * width: 8*4 = 32 bytes */
 
 struct rk_pdm_dev {
 	struct device *dev;
-- 
GitLab


From a3f98bb22cbfaaf67717e156f79e2bfeb42d4cac Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Wed, 3 Apr 2019 16:56:45 -0400
Subject: [PATCH 0612/1861] Documentation/gpu/meson: Remove link to
 meson_canvas.c

The file was removed in the below patch and is causing this error:
WARNING: kernel-doc '../scripts/kernel-doc -rst -enable-lineno -function Canvas ../drivers/gpu/drm/meson/meson_canvas.c' failed with return code

Fixes: 2bf6b5b0e374 ("drm/meson: exclusively use the canvas provider module")
Cc: Maxime Jourdan <mjourdan@baylibre.com>
Cc: Neil Armstrong <narmstrong@baylibre.com>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-amlogic@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190403205652.183496-1-sean@poorly.run
---
 Documentation/gpu/meson.rst | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/Documentation/gpu/meson.rst b/Documentation/gpu/meson.rst
index 479f6f51a13b0..b9e2f9aa3bd80 100644
--- a/Documentation/gpu/meson.rst
+++ b/Documentation/gpu/meson.rst
@@ -42,12 +42,6 @@ Video Encoder
 .. kernel-doc:: drivers/gpu/drm/meson/meson_venc.c
    :doc: Video Encoder
 
-Video Canvas Management
-=======================
-
-.. kernel-doc:: drivers/gpu/drm/meson/meson_canvas.c
-   :doc: Canvas
-
 Video Clocks
 ============
 
-- 
GitLab


From ba5e60c9b75dec92d4c695b928f69300b17d7686 Mon Sep 17 00:00:00 2001
From: Peng Hao <peng.hao2@zte.com.cn>
Date: Tue, 2 Apr 2019 22:12:38 +0800
Subject: [PATCH 0613/1861] arm/mach-at91/pm : fix possible object reference
 leak

of_find_device_by_node() takes a reference to the struct device
when it finds a match via get_device. When returning error we should
call put_device.

Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: Peng Hao <peng.hao2@zte.com.cn>
Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
---
 arch/arm/mach-at91/pm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 51e808adb00cc..2a757dcaa1a5e 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -591,13 +591,13 @@ static int __init at91_pm_backup_init(void)
 
 	np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam");
 	if (!np)
-		goto securam_fail;
+		goto securam_fail_no_ref_dev;
 
 	pdev = of_find_device_by_node(np);
 	of_node_put(np);
 	if (!pdev) {
 		pr_warn("%s: failed to find securam device!\n", __func__);
-		goto securam_fail;
+		goto securam_fail_no_ref_dev;
 	}
 
 	sram_pool = gen_pool_get(&pdev->dev, NULL);
@@ -620,6 +620,8 @@ static int __init at91_pm_backup_init(void)
 	return 0;
 
 securam_fail:
+	put_device(&pdev->dev);
+securam_fail_no_ref_dev:
 	iounmap(pm_data.sfrbu);
 	pm_data.sfrbu = NULL;
 	return ret;
-- 
GitLab


From 6b0868c820ff7370d15d6ddfe71b1ce6bbe8a25d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Thu, 4 Apr 2019 11:54:09 +0100
Subject: [PATCH 0614/1861] mm/compaction.c: correct zone boundary handling
 when resetting pageblock skip hints

Mikhail Gavrilo reported the following bug being triggered in a Fedora
kernel based on 5.1-rc1 but it is relevant to a vanilla kernel.

 kernel: page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p))
 kernel: ------------[ cut here ]------------
 kernel: kernel BUG at include/linux/mm.h:1021!
 kernel: invalid opcode: 0000 [#1] SMP NOPTI
 kernel: CPU: 6 PID: 116 Comm: kswapd0 Tainted: G         C        5.1.0-0.rc1.git1.3.fc31.x86_64 #1
 kernel: Hardware name: System manufacturer System Product Name/ROG STRIX X470-I GAMING, BIOS 1201 12/07/2018
 kernel: RIP: 0010:__reset_isolation_pfn+0x244/0x2b0
 kernel: Code: fe 06 e8 0f 8e fc ff 44 0f b6 4c 24 04 48 85 c0 0f 85 dc fe ff ff e9 68 fe ff ff 48 c7 c6 58 b7 2e 8c 4c 89 ff e8 0c 75 00 00 <0f> 0b 48 c7 c6 58 b7 2e 8c e8 fe 74 00 00 0f 0b 48 89 fa 41 b8 01
 kernel: RSP: 0018:ffff9e2d03f0fde8 EFLAGS: 00010246
 kernel: RAX: 0000000000000034 RBX: 000000000081f380 RCX: ffff8cffbddd6c20
 kernel: RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffff8cffbddd6c20
 kernel: RBP: 0000000000000001 R08: 0000009898b94613 R09: 0000000000000000
 kernel: R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000100000
 kernel: R13: 0000000000100000 R14: 0000000000000001 R15: ffffca7de07ce000
 kernel: FS:  0000000000000000(0000) GS:ffff8cffbdc00000(0000) knlGS:0000000000000000
 kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 kernel: CR2: 00007fc1670e9000 CR3: 00000007f5276000 CR4: 00000000003406e0
 kernel: Call Trace:
 kernel:  __reset_isolation_suitable+0x62/0x120
 kernel:  reset_isolation_suitable+0x3b/0x40
 kernel:  kswapd+0x147/0x540
 kernel:  ? finish_wait+0x90/0x90
 kernel:  kthread+0x108/0x140
 kernel:  ? balance_pgdat+0x560/0x560
 kernel:  ? kthread_park+0x90/0x90
 kernel:  ret_from_fork+0x27/0x50

He bisected it down to e332f741a8dd ("mm, compaction: be selective about
what pageblocks to clear skip hints").  The problem is that the patch in
question was sloppy with respect to the handling of zone boundaries.  In
some instances, it was possible for PFNs outside of a zone to be examined
and if those were not properly initialised or poisoned then it would
trigger the VM_BUG_ON.  This patch corrects the zone boundary issues when
resetting pageblock skip hints and Mikhail reported that the bug did not
trigger after 30 hours of testing.

Link: http://lkml.kernel.org/r/20190327085424.GL3189@techsingularity.net
Fixes: e332f741a8dd ("mm, compaction: be selective about what pageblocks to clear skip hints")
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 mm/compaction.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index f171a83707ced..b4930bf93c8ac 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -242,6 +242,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
 							bool check_target)
 {
 	struct page *page = pfn_to_online_page(pfn);
+	struct page *block_page;
 	struct page *end_page;
 	unsigned long block_pfn;
 
@@ -267,20 +268,26 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
 	    get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
 		return false;
 
+	/* Ensure the start of the pageblock or zone is online and valid */
+	block_pfn = pageblock_start_pfn(pfn);
+	block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
+	if (block_page) {
+		page = block_page;
+		pfn = block_pfn;
+	}
+
+	/* Ensure the end of the pageblock or zone is online and valid */
+	block_pfn += pageblock_nr_pages;
+	block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
+	end_page = pfn_to_online_page(block_pfn);
+	if (!end_page)
+		return false;
+
 	/*
 	 * Only clear the hint if a sample indicates there is either a
 	 * free page or an LRU page in the block. One or other condition
 	 * is necessary for the block to be a migration source/target.
 	 */
-	block_pfn = pageblock_start_pfn(pfn);
-	pfn = max(block_pfn, zone->zone_start_pfn);
-	page = pfn_to_page(pfn);
-	if (zone != page_zone(page))
-		return false;
-	pfn = block_pfn + pageblock_nr_pages;
-	pfn = min(pfn, zone_end_pfn(zone));
-	end_page = pfn_to_page(pfn);
-
 	do {
 		if (pfn_valid_within(pfn)) {
 			if (check_source && PageLRU(page)) {
@@ -309,7 +316,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
 static void __reset_isolation_suitable(struct zone *zone)
 {
 	unsigned long migrate_pfn = zone->zone_start_pfn;
-	unsigned long free_pfn = zone_end_pfn(zone);
+	unsigned long free_pfn = zone_end_pfn(zone) - 1;
 	unsigned long reset_migrate = free_pfn;
 	unsigned long reset_free = migrate_pfn;
 	bool source_set = false;
-- 
GitLab


From 5b56d996dd50a9d2ca87c25ebb50c07b255b7e04 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Thu, 4 Apr 2019 11:54:41 +0100
Subject: [PATCH 0615/1861] mm/compaction.c: abort search if isolation fails

Running LTP oom01 in a tight loop or memory stress testing put the system
in a low-memory situation could triggers random memory corruption like
page flag corruption below due to in fast_isolate_freepages(), if
isolation fails, next_search_order() does not abort the search immediately
could lead to improper accesses.

UBSAN: Undefined behaviour in ./include/linux/mm.h:1195:50
index 7 is out of range for type 'zone [5]'
Call Trace:
 dump_stack+0x62/0x9a
 ubsan_epilogue+0xd/0x7f
 __ubsan_handle_out_of_bounds+0x14d/0x192
 __isolate_free_page+0x52c/0x600
 compaction_alloc+0x886/0x25f0
 unmap_and_move+0x37/0x1e70
 migrate_pages+0x2ca/0xb20
 compact_zone+0x19cb/0x3620
 kcompactd_do_work+0x2df/0x680
 kcompactd+0x1d8/0x6c0
 kthread+0x32c/0x3f0
 ret_from_fork+0x35/0x40
------------[ cut here ]------------
kernel BUG at mm/page_alloc.c:3124!
invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
RIP: 0010:__isolate_free_page+0x464/0x600
RSP: 0000:ffff888b9e1af848 EFLAGS: 00010007
RAX: 0000000030000000 RBX: ffff888c39fcf0f8 RCX: 0000000000000000
RDX: 1ffff111873f9e25 RSI: 0000000000000004 RDI: ffffed1173c35ef6
RBP: ffff888b9e1af898 R08: fffffbfff4fc2461 R09: fffffbfff4fc2460
R10: fffffbfff4fc2460 R11: ffffffffa7e12303 R12: 0000000000000008
R13: dffffc0000000000 R14: 0000000000000000 R15: 0000000000000007
FS:  0000000000000000(0000) GS:ffff888ba8e80000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fc7abc00000 CR3: 0000000752416004 CR4: 00000000001606a0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 compaction_alloc+0x886/0x25f0
 unmap_and_move+0x37/0x1e70
 migrate_pages+0x2ca/0xb20
 compact_zone+0x19cb/0x3620
 kcompactd_do_work+0x2df/0x680
 kcompactd+0x1d8/0x6c0
 kthread+0x32c/0x3f0
 ret_from_fork+0x35/0x40

Link: http://lkml.kernel.org/r/20190320192648.52499-1-cai@lca.pw
Fixes: dbe2d4e4f12e ("mm, compaction: round-robin the order while searching the free lists for a target")
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 mm/compaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index b4930bf93c8ac..3319e0872d014 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1370,7 +1370,7 @@ fast_isolate_freepages(struct compact_control *cc)
 				count_compact_events(COMPACTISOLATED, nr_isolated);
 			} else {
 				/* If isolation fails, abort the search */
-				order = -1;
+				order = cc->search_order + 1;
 				page = NULL;
 			}
 		}
-- 
GitLab


From 24e516049360eda85cf3fe9903221d43886c2689 Mon Sep 17 00:00:00 2001
From: Neil Leeder <nleeder@codeaurora.org>
Date: Tue, 26 Mar 2019 15:17:50 +0000
Subject: [PATCH 0616/1861] ACPI/IORT: Add support for PMCG

Add support for the SMMU Performance Monitor Counter Group
information from ACPI. This is in preparation for its use
in the SMMUv3 PMU driver.

Signed-off-by: Neil Leeder <nleeder@codeaurora.org>
Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c | 117 ++++++++++++++++++++++++++++++--------
 include/linux/acpi_iort.h |   7 +++
 2 files changed, 100 insertions(+), 24 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index e48894e002ba8..e2c9b26bbee66 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -356,7 +356,8 @@ static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 	if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
 		if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT ||
 		    node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX ||
-		    node->type == ACPI_IORT_NODE_SMMU_V3) {
+		    node->type == ACPI_IORT_NODE_SMMU_V3 ||
+		    node->type == ACPI_IORT_NODE_PMCG) {
 			*id_out = map->output_base;
 			return parent;
 		}
@@ -394,6 +395,8 @@ static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 		}
 
 		return smmu->id_mapping_index;
+	case ACPI_IORT_NODE_PMCG:
+		return 0;
 	default:
 		return -EINVAL;
 	}
@@ -1218,14 +1221,23 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
 	}
 }
 
-static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
+static void __init arm_smmu_v3_dma_configure(struct device *dev,
+					     struct acpi_iort_node *node)
 {
 	struct acpi_iort_smmu_v3 *smmu;
+	enum dev_dma_attr attr;
 
 	/* Retrieve SMMUv3 specific data */
 	smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 
-	return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
+	attr = (smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) ?
+			DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
+
+	/* We expect the dma masks to be equivalent for all SMMUv3 set-ups */
+	dev->dma_mask = &dev->coherent_dma_mask;
+
+	/* Configure DMA for the page table walker */
+	acpi_dma_configure(dev, attr);
 }
 
 #if defined(CONFIG_ACPI_NUMA)
@@ -1301,30 +1313,82 @@ static void __init arm_smmu_init_resources(struct resource *res,
 	}
 }
 
-static bool __init arm_smmu_is_coherent(struct acpi_iort_node *node)
+static void __init arm_smmu_dma_configure(struct device *dev,
+					  struct acpi_iort_node *node)
 {
 	struct acpi_iort_smmu *smmu;
+	enum dev_dma_attr attr;
 
 	/* Retrieve SMMU specific data */
 	smmu = (struct acpi_iort_smmu *)node->node_data;
 
-	return smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK;
+	attr = (smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK) ?
+			DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
+
+	/* We expect the dma masks to be equivalent for SMMU set-ups */
+	dev->dma_mask = &dev->coherent_dma_mask;
+
+	/* Configure DMA for the page table walker */
+	acpi_dma_configure(dev, attr);
+}
+
+static int __init arm_smmu_v3_pmcg_count_resources(struct acpi_iort_node *node)
+{
+	struct acpi_iort_pmcg *pmcg;
+
+	/* Retrieve PMCG specific data */
+	pmcg = (struct acpi_iort_pmcg *)node->node_data;
+
+	/*
+	 * There are always 2 memory resources.
+	 * If the overflow_gsiv is present then add that for a total of 3.
+	 */
+	return pmcg->overflow_gsiv ? 3 : 2;
+}
+
+static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res,
+						   struct acpi_iort_node *node)
+{
+	struct acpi_iort_pmcg *pmcg;
+
+	/* Retrieve PMCG specific data */
+	pmcg = (struct acpi_iort_pmcg *)node->node_data;
+
+	res[0].start = pmcg->page0_base_address;
+	res[0].end = pmcg->page0_base_address + SZ_4K - 1;
+	res[0].flags = IORESOURCE_MEM;
+	res[1].start = pmcg->page1_base_address;
+	res[1].end = pmcg->page1_base_address + SZ_4K - 1;
+	res[1].flags = IORESOURCE_MEM;
+
+	if (pmcg->overflow_gsiv)
+		acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow",
+				       ACPI_EDGE_SENSITIVE, &res[2]);
+}
+
+static int __init arm_smmu_v3_pmcg_add_platdata(struct platform_device *pdev)
+{
+	u32 model = IORT_SMMU_V3_PMCG_GENERIC;
+
+	return platform_device_add_data(pdev, &model, sizeof(model));
 }
 
 struct iort_dev_config {
 	const char *name;
 	int (*dev_init)(struct acpi_iort_node *node);
-	bool (*dev_is_coherent)(struct acpi_iort_node *node);
+	void (*dev_dma_configure)(struct device *dev,
+				  struct acpi_iort_node *node);
 	int (*dev_count_resources)(struct acpi_iort_node *node);
 	void (*dev_init_resources)(struct resource *res,
 				     struct acpi_iort_node *node);
 	void (*dev_set_proximity)(struct device *dev,
 				    struct acpi_iort_node *node);
+	int (*dev_add_platdata)(struct platform_device *pdev);
 };
 
 static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = {
 	.name = "arm-smmu-v3",
-	.dev_is_coherent = arm_smmu_v3_is_coherent,
+	.dev_dma_configure = arm_smmu_v3_dma_configure,
 	.dev_count_resources = arm_smmu_v3_count_resources,
 	.dev_init_resources = arm_smmu_v3_init_resources,
 	.dev_set_proximity = arm_smmu_v3_set_proximity,
@@ -1332,9 +1396,16 @@ static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = {
 
 static const struct iort_dev_config iort_arm_smmu_cfg __initconst = {
 	.name = "arm-smmu",
-	.dev_is_coherent = arm_smmu_is_coherent,
+	.dev_dma_configure = arm_smmu_dma_configure,
 	.dev_count_resources = arm_smmu_count_resources,
-	.dev_init_resources = arm_smmu_init_resources
+	.dev_init_resources = arm_smmu_init_resources,
+};
+
+static const struct iort_dev_config iort_arm_smmu_v3_pmcg_cfg __initconst = {
+	.name = "arm-smmu-v3-pmcg",
+	.dev_count_resources = arm_smmu_v3_pmcg_count_resources,
+	.dev_init_resources = arm_smmu_v3_pmcg_init_resources,
+	.dev_add_platdata = arm_smmu_v3_pmcg_add_platdata,
 };
 
 static __init const struct iort_dev_config *iort_get_dev_cfg(
@@ -1345,6 +1416,8 @@ static __init const struct iort_dev_config *iort_get_dev_cfg(
 		return &iort_arm_smmu_v3_cfg;
 	case ACPI_IORT_NODE_SMMU:
 		return &iort_arm_smmu_cfg;
+	case ACPI_IORT_NODE_PMCG:
+		return &iort_arm_smmu_v3_pmcg_cfg;
 	default:
 		return NULL;
 	}
@@ -1362,7 +1435,6 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node,
 	struct fwnode_handle *fwnode;
 	struct platform_device *pdev;
 	struct resource *r;
-	enum dev_dma_attr attr;
 	int ret, count;
 
 	pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO);
@@ -1393,19 +1465,19 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node,
 		goto dev_put;
 
 	/*
-	 * Add a copy of IORT node pointer to platform_data to
-	 * be used to retrieve IORT data information.
+	 * Platform devices based on PMCG nodes uses platform_data to
+	 * pass the hardware model info to the driver. For others, add
+	 * a copy of IORT node pointer to platform_data to be used to
+	 * retrieve IORT data information.
 	 */
-	ret = platform_device_add_data(pdev, &node, sizeof(node));
+	if (ops->dev_add_platdata)
+		ret = ops->dev_add_platdata(pdev);
+	else
+		ret = platform_device_add_data(pdev, &node, sizeof(node));
+
 	if (ret)
 		goto dev_put;
 
-	/*
-	 * We expect the dma masks to be equivalent for
-	 * all SMMUs set-ups
-	 */
-	pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
-
 	fwnode = iort_get_fwnode(node);
 
 	if (!fwnode) {
@@ -1415,11 +1487,8 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node,
 
 	pdev->dev.fwnode = fwnode;
 
-	attr = ops->dev_is_coherent && ops->dev_is_coherent(node) ?
-			DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
-
-	/* Configure DMA for the page table walker */
-	acpi_dma_configure(&pdev->dev, attr);
+	if (ops->dev_dma_configure)
+		ops->dev_dma_configure(&pdev->dev, node);
 
 	iort_set_device_domain(&pdev->dev, node);
 
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 38cd77b39a64a..052ef7b9f985a 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -26,6 +26,13 @@
 #define IORT_IRQ_MASK(irq)		(irq & 0xffffffffULL)
 #define IORT_IRQ_TRIGGER_MASK(irq)	((irq >> 32) & 0xffffffffULL)
 
+/*
+ * PMCG model identifiers for use in smmu pmu driver. Please note
+ * that this is purely for the use of software and has nothing to
+ * do with hardware or with IORT specification.
+ */
+#define IORT_SMMU_V3_PMCG_GENERIC        0x00000000 /* Generic SMMUv3 PMCG */
+
 int iort_register_domain_token(int trans_id, phys_addr_t base,
 			       struct fwnode_handle *fw_node);
 void iort_deregister_domain_token(int trans_id);
-- 
GitLab


From 8b030a57e35a0efc1a8aa18bb10555bc5066ac40 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Thu, 4 Apr 2019 15:38:38 +0300
Subject: [PATCH 0617/1861] ALSA: xen-front: Do not use stream buffer size
 before it is set

This fixes the regression introduced while moving to Xen shared
buffer implementation.

Fixes: 58f9d806d16a ("ALSA: xen-front: Use Xen common shared buffer implementation")
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Cc: <stable@vger.kernel.org> # v5.0+
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/xen/xen_snd_front_alsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/xen/xen_snd_front_alsa.c b/sound/xen/xen_snd_front_alsa.c
index a7f413cb704dc..b14ab512c2ce0 100644
--- a/sound/xen/xen_snd_front_alsa.c
+++ b/sound/xen/xen_snd_front_alsa.c
@@ -441,7 +441,7 @@ static int shbuf_setup_backstore(struct xen_snd_front_pcm_stream_info *stream,
 {
 	int i;
 
-	stream->buffer = alloc_pages_exact(stream->buffer_sz, GFP_KERNEL);
+	stream->buffer = alloc_pages_exact(buffer_sz, GFP_KERNEL);
 	if (!stream->buffer)
 		return -ENOMEM;
 
-- 
GitLab


From 631b7abacd02b88f4b0795c08b54ad4fc3e7c7c0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 7 Nov 2016 16:26:35 -0500
Subject: [PATCH 0618/1861] ptrace: Remove maxargs from task_current_syscall()

task_current_syscall() has a single user that passes in 6 for maxargs, which
is the maximum arguments that can be used to get system calls from
syscall_get_arguments(). Instead of passing in a number of arguments to
grab, just get 6 arguments. The args argument even specifies that it's an
array of 6 items.

This will also allow changing syscall_get_arguments() to not get a variable
number of arguments, but always grab 6.

Linus also suggested not passing in a bunch of arguments to
task_current_syscall() but to instead pass in a pointer to a structure, and
just fill the structure. struct seccomp_data has almost all the parameters
that is needed except for the stack pointer (sp). As seccomp_data is part of
uapi, and I'm afraid to change it, a new structure was created
"syscall_info", which includes seccomp_data and adds the "sp" field.

Link: http://lkml.kernel.org/r/20161107213233.466776454@goodmis.org

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 fs/proc/base.c         | 17 +++++++------
 include/linux/ptrace.h | 11 +++++---
 lib/syscall.c          | 57 ++++++++++++++++++------------------------
 3 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ddef482f13340..6a803a0b75df4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -616,24 +616,25 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
 			    struct pid *pid, struct task_struct *task)
 {
-	long nr;
-	unsigned long args[6], sp, pc;
+	struct syscall_info info;
+	u64 *args = &info.data.args[0];
 	int res;
 
 	res = lock_trace(task);
 	if (res)
 		return res;
 
-	if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+	if (task_current_syscall(task, &info))
 		seq_puts(m, "running\n");
-	else if (nr < 0)
-		seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+	else if (info.data.nr < 0)
+		seq_printf(m, "%d 0x%llx 0x%llx\n",
+			   info.data.nr, info.sp, info.data.instruction_pointer);
 	else
 		seq_printf(m,
-		       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
-		       nr,
+		       "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
+		       info.data.nr,
 		       args[0], args[1], args[2], args[3], args[4], args[5],
-		       sp, pc);
+		       info.sp, info.data.instruction_pointer);
 	unlock_trace(task);
 
 	return 0;
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index edb9b040c94c3..d5084ebd9f030 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -9,6 +9,13 @@
 #include <linux/bug.h>			/* For BUG_ON.  */
 #include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
 #include <uapi/linux/ptrace.h>
+#include <linux/seccomp.h>
+
+/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */
+struct syscall_info {
+	__u64			sp;
+	struct seccomp_data	data;
+};
 
 extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 			    void *buf, int len, unsigned int gup_flags);
@@ -407,9 +414,7 @@ static inline void user_single_step_report(struct pt_regs *regs)
 #define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
 #endif
 
-extern int task_current_syscall(struct task_struct *target, long *callno,
-				unsigned long args[6], unsigned int maxargs,
-				unsigned long *sp, unsigned long *pc);
+extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
 
 extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
 #endif
diff --git a/lib/syscall.c b/lib/syscall.c
index 1a7077f20eae4..e8467e17b9a20 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -5,16 +5,14 @@
 #include <linux/export.h>
 #include <asm/syscall.h>
 
-static int collect_syscall(struct task_struct *target, long *callno,
-			   unsigned long args[6], unsigned int maxargs,
-			   unsigned long *sp, unsigned long *pc)
+static int collect_syscall(struct task_struct *target, struct syscall_info *info)
 {
 	struct pt_regs *regs;
 
 	if (!try_get_task_stack(target)) {
 		/* Task has no stack, so the task isn't in a syscall. */
-		*sp = *pc = 0;
-		*callno = -1;
+		memset(info, 0, sizeof(*info));
+		info->data.nr = -1;
 		return 0;
 	}
 
@@ -24,12 +22,13 @@ static int collect_syscall(struct task_struct *target, long *callno,
 		return -EAGAIN;
 	}
 
-	*sp = user_stack_pointer(regs);
-	*pc = instruction_pointer(regs);
+	info->sp = user_stack_pointer(regs);
+	info->data.instruction_pointer = instruction_pointer(regs);
 
-	*callno = syscall_get_nr(target, regs);
-	if (*callno != -1L && maxargs > 0)
-		syscall_get_arguments(target, regs, 0, maxargs, args);
+	info->data.nr = syscall_get_nr(target, regs);
+	if (info->data.nr != -1L)
+		syscall_get_arguments(target, regs, 0, 6,
+				      (unsigned long *)&info->data.args[0]);
 
 	put_task_stack(target);
 	return 0;
@@ -38,41 +37,35 @@ static int collect_syscall(struct task_struct *target, long *callno,
 /**
  * task_current_syscall - Discover what a blocked task is doing.
  * @target:		thread to examine
- * @callno:		filled with system call number or -1
- * @args:		filled with @maxargs system call arguments
- * @maxargs:		number of elements in @args to fill
- * @sp:			filled with user stack pointer
- * @pc:			filled with user PC
+ * @info:		structure with the following fields:
+ *			 .sp        - filled with user stack pointer
+ *			 .data.nr   - filled with system call number or -1
+ *			 .data.args - filled with @maxargs system call arguments
+ *			 .data.instruction_pointer - filled with user PC
  *
- * If @target is blocked in a system call, returns zero with *@callno
- * set to the the call's number and @args filled in with its arguments.
- * Registers not used for system call arguments may not be available and
- * it is not kosher to use &struct user_regset calls while the system
+ * If @target is blocked in a system call, returns zero with @info.data.nr
+ * set to the the call's number and @info.data.args filled in with its
+ * arguments. Registers not used for system call arguments may not be available
+ * and it is not kosher to use &struct user_regset calls while the system
  * call is still in progress.  Note we may get this result if @target
  * has finished its system call but not yet returned to user mode, such
  * as when it's stopped for signal handling or syscall exit tracing.
  *
  * If @target is blocked in the kernel during a fault or exception,
- * returns zero with *@callno set to -1 and does not fill in @args.
- * If so, it's now safe to examine @target using &struct user_regset
- * get() calls as long as we're sure @target won't return to user mode.
+ * returns zero with *@info.data.nr set to -1 and does not fill in
+ * @info.data.args. If so, it's now safe to examine @target using
+ * &struct user_regset get() calls as long as we're sure @target won't return
+ * to user mode.
  *
  * Returns -%EAGAIN if @target does not remain blocked.
- *
- * Returns -%EINVAL if @maxargs is too large (maximum is six).
  */
-int task_current_syscall(struct task_struct *target, long *callno,
-			 unsigned long args[6], unsigned int maxargs,
-			 unsigned long *sp, unsigned long *pc)
+int task_current_syscall(struct task_struct *target, struct syscall_info *info)
 {
 	long state;
 	unsigned long ncsw;
 
-	if (unlikely(maxargs > 6))
-		return -EINVAL;
-
 	if (target == current)
-		return collect_syscall(target, callno, args, maxargs, sp, pc);
+		return collect_syscall(target, info);
 
 	state = target->state;
 	if (unlikely(!state))
@@ -80,7 +73,7 @@ int task_current_syscall(struct task_struct *target, long *callno,
 
 	ncsw = wait_task_inactive(target, state);
 	if (unlikely(!ncsw) ||
-	    unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
+	    unlikely(collect_syscall(target, info)) ||
 	    unlikely(wait_task_inactive(target, state) != ncsw))
 		return -EAGAIN;
 
-- 
GitLab


From d08e411397cb6fcb3d3fb075c27a41975c99e88f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 7 Nov 2016 16:26:36 -0500
Subject: [PATCH 0619/1861] tracing/syscalls: Pass in hardcoded 6 into
 syscall_get_arguments()

The only users that calls syscall_get_arguments() with a variable and not a
hard coded '6' is ftrace_syscall_enter(). syscall_get_arguments() can be
optimized by removing a variable input, and always grabbing 6 arguments
regardless of what the system call actually uses.

Change ftrace_syscall_enter() to pass the 6 args into a local stack array
and copy the necessary arguments into the trace event as needed.

This is needed to remove two parameters from syscall_get_arguments().

Link: http://lkml.kernel.org/r/20161107213233.627583542@goodmis.org

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_syscalls.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f93a56d2db275..e9f5bbbad6d94 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -314,6 +314,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 	struct ring_buffer_event *event;
 	struct ring_buffer *buffer;
 	unsigned long irq_flags;
+	unsigned long args[6];
 	int pc;
 	int syscall_nr;
 	int size;
@@ -347,7 +348,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 
 	entry = ring_buffer_event_data(event);
 	entry->nr = syscall_nr;
-	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
+	syscall_get_arguments(current, regs, 0, 6, args);
+	memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
 
 	event_trigger_unlock_commit(trace_file, buffer, event, entry,
 				    irq_flags, pc);
@@ -583,6 +585,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_enter *rec;
 	struct hlist_head *head;
+	unsigned long args[6];
 	bool valid_prog_array;
 	int syscall_nr;
 	int rctx;
@@ -613,8 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 		return;
 
 	rec->nr = syscall_nr;
-	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
-			       (unsigned long *)&rec->args);
+	syscall_get_arguments(current, regs, 0, 6, args);
+	memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
 
 	if ((valid_prog_array &&
 	     !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
-- 
GitLab


From 5eed7898626bedd6405421550c0c6e8ab9591bb2 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 3 Apr 2019 13:53:18 -0700
Subject: [PATCH 0620/1861] flow_dissector: rst'ify documentation

Rename bpf_flow_dissector.txt to bpf_flow_dissector.rst and fix
formatting. Also, link it from the Documentation/networking/index.rst.

Tested with 'make htmldocs' to make sure it looks reasonable.

Fixes: ae82899bbe92 ("flow_dissector: document BPF flow dissector environment")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../networking/bpf_flow_dissector.rst         | 126 ++++++++++++++++++
 .../networking/bpf_flow_dissector.txt         | 115 ----------------
 Documentation/networking/index.rst            |   1 +
 3 files changed, 127 insertions(+), 115 deletions(-)
 create mode 100644 Documentation/networking/bpf_flow_dissector.rst
 delete mode 100644 Documentation/networking/bpf_flow_dissector.txt

diff --git a/Documentation/networking/bpf_flow_dissector.rst b/Documentation/networking/bpf_flow_dissector.rst
new file mode 100644
index 0000000000000..b375ae2ec2c4f
--- /dev/null
+++ b/Documentation/networking/bpf_flow_dissector.rst
@@ -0,0 +1,126 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+BPF Flow Dissector
+==================
+
+Overview
+========
+
+Flow dissector is a routine that parses metadata out of the packets. It's
+used in the various places in the networking subsystem (RFS, flow hash, etc).
+
+BPF flow dissector is an attempt to reimplement C-based flow dissector logic
+in BPF to gain all the benefits of BPF verifier (namely, limits on the
+number of instructions and tail calls).
+
+API
+===
+
+BPF flow dissector programs operate on an ``__sk_buff``. However, only the
+limited set of fields is allowed: ``data``, ``data_end`` and ``flow_keys``.
+``flow_keys`` is ``struct bpf_flow_keys`` and contains flow dissector input
+and output arguments.
+
+The inputs are:
+  * ``nhoff`` - initial offset of the networking header
+  * ``thoff`` - initial offset of the transport header, initialized to nhoff
+  * ``n_proto`` - L3 protocol type, parsed out of L2 header
+
+Flow dissector BPF program should fill out the rest of the ``struct
+bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be
+also adjusted accordingly.
+
+The return code of the BPF program is either BPF_OK to indicate successful
+dissection, or BPF_DROP to indicate parsing error.
+
+__sk_buff->data
+===============
+
+In the VLAN-less case, this is what the initial state of the BPF flow
+dissector looks like::
+
+  +------+------+------------+-----------+
+  | DMAC | SMAC | ETHER_TYPE | L3_HEADER |
+  +------+------+------------+-----------+
+                              ^
+                              |
+                              +-- flow dissector starts here
+
+
+.. code:: c
+
+  skb->data + flow_keys->nhoff point to the first byte of L3_HEADER
+  flow_keys->thoff = nhoff
+  flow_keys->n_proto = ETHER_TYPE
+
+In case of VLAN, flow dissector can be called with the two different states.
+
+Pre-VLAN parsing::
+
+  +------+------+------+-----+-----------+-----------+
+  | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
+  +------+------+------+-----+-----------+-----------+
+                        ^
+                        |
+                        +-- flow dissector starts here
+
+.. code:: c
+
+  skb->data + flow_keys->nhoff point the to first byte of TCI
+  flow_keys->thoff = nhoff
+  flow_keys->n_proto = TPID
+
+Please note that TPID can be 802.1AD and, hence, BPF program would
+have to parse VLAN information twice for double tagged packets.
+
+
+Post-VLAN parsing::
+
+  +------+------+------+-----+-----------+-----------+
+  | DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
+  +------+------+------+-----+-----------+-----------+
+                                          ^
+                                          |
+                                          +-- flow dissector starts here
+
+.. code:: c
+
+  skb->data + flow_keys->nhoff point the to first byte of L3_HEADER
+  flow_keys->thoff = nhoff
+  flow_keys->n_proto = ETHER_TYPE
+
+In this case VLAN information has been processed before the flow dissector
+and BPF flow dissector is not required to handle it.
+
+
+The takeaway here is as follows: BPF flow dissector program can be called with
+the optional VLAN header and should gracefully handle both cases: when single
+or double VLAN is present and when it is not present. The same program
+can be called for both cases and would have to be written carefully to
+handle both cases.
+
+
+Reference Implementation
+========================
+
+See ``tools/testing/selftests/bpf/progs/bpf_flow.c`` for the reference
+implementation and ``tools/testing/selftests/bpf/flow_dissector_load.[hc]``
+for the loader. bpftool can be used to load BPF flow dissector program as well.
+
+The reference implementation is organized as follows:
+  * ``jmp_table`` map that contains sub-programs for each supported L3 protocol
+  * ``_dissect`` routine - entry point; it does input ``n_proto`` parsing and
+    does ``bpf_tail_call`` to the appropriate L3 handler
+
+Since BPF at this point doesn't support looping (or any jumping back),
+jmp_table is used instead to handle multiple levels of encapsulation (and
+IPv6 options).
+
+
+Current Limitations
+===================
+BPF flow dissector doesn't support exporting all the metadata that in-kernel
+C-based implementation can export. Notable example is single VLAN (802.1Q)
+and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
+for a set of information that's currently can be exported from the BPF context.
diff --git a/Documentation/networking/bpf_flow_dissector.txt b/Documentation/networking/bpf_flow_dissector.txt
deleted file mode 100644
index 70f66a2d57e7b..0000000000000
--- a/Documentation/networking/bpf_flow_dissector.txt
+++ /dev/null
@@ -1,115 +0,0 @@
-==================
-BPF Flow Dissector
-==================
-
-Overview
-========
-
-Flow dissector is a routine that parses metadata out of the packets. It's
-used in the various places in the networking subsystem (RFS, flow hash, etc).
-
-BPF flow dissector is an attempt to reimplement C-based flow dissector logic
-in BPF to gain all the benefits of BPF verifier (namely, limits on the
-number of instructions and tail calls).
-
-API
-===
-
-BPF flow dissector programs operate on an __sk_buff. However, only the
-limited set of fields is allowed: data, data_end and flow_keys. flow_keys
-is 'struct bpf_flow_keys' and contains flow dissector input and
-output arguments.
-
-The inputs are:
-  * nhoff - initial offset of the networking header
-  * thoff - initial offset of the transport header, initialized to nhoff
-  * n_proto - L3 protocol type, parsed out of L2 header
-
-Flow dissector BPF program should fill out the rest of the 'struct
-bpf_flow_keys' fields. Input arguments nhoff/thoff/n_proto should be also
-adjusted accordingly.
-
-The return code of the BPF program is either BPF_OK to indicate successful
-dissection, or BPF_DROP to indicate parsing error.
-
-__sk_buff->data
-===============
-
-In the VLAN-less case, this is what the initial state of the BPF flow
-dissector looks like:
-+------+------+------------+-----------+
-| DMAC | SMAC | ETHER_TYPE | L3_HEADER |
-+------+------+------------+-----------+
-                            ^
-                            |
-                            +-- flow dissector starts here
-
-skb->data + flow_keys->nhoff point to the first byte of L3_HEADER.
-flow_keys->thoff = nhoff
-flow_keys->n_proto = ETHER_TYPE
-
-
-In case of VLAN, flow dissector can be called with the two different states.
-
-Pre-VLAN parsing:
-+------+------+------+-----+-----------+-----------+
-| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
-+------+------+------+-----+-----------+-----------+
-                      ^
-                      |
-                      +-- flow dissector starts here
-
-skb->data + flow_keys->nhoff point the to first byte of TCI.
-flow_keys->thoff = nhoff
-flow_keys->n_proto = TPID
-
-Please note that TPID can be 802.1AD and, hence, BPF program would
-have to parse VLAN information twice for double tagged packets.
-
-
-Post-VLAN parsing:
-+------+------+------+-----+-----------+-----------+
-| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
-+------+------+------+-----+-----------+-----------+
-                                        ^
-                                        |
-                                        +-- flow dissector starts here
-
-skb->data + flow_keys->nhoff point the to first byte of L3_HEADER.
-flow_keys->thoff = nhoff
-flow_keys->n_proto = ETHER_TYPE
-
-In this case VLAN information has been processed before the flow dissector
-and BPF flow dissector is not required to handle it.
-
-
-The takeaway here is as follows: BPF flow dissector program can be called with
-the optional VLAN header and should gracefully handle both cases: when single
-or double VLAN is present and when it is not present. The same program
-can be called for both cases and would have to be written carefully to
-handle both cases.
-
-
-Reference Implementation
-========================
-
-See tools/testing/selftests/bpf/progs/bpf_flow.c for the reference
-implementation and tools/testing/selftests/bpf/flow_dissector_load.[hc] for
-the loader. bpftool can be used to load BPF flow dissector program as well.
-
-The reference implementation is organized as follows:
-* jmp_table map that contains sub-programs for each supported L3 protocol
-* _dissect routine - entry point; it does input n_proto parsing and does
-  bpf_tail_call to the appropriate L3 handler
-
-Since BPF at this point doesn't support looping (or any jumping back),
-jmp_table is used instead to handle multiple levels of encapsulation (and
-IPv6 options).
-
-
-Current Limitations
-===================
-BPF flow dissector doesn't support exporting all the metadata that in-kernel
-C-based implementation can export. Notable example is single VLAN (802.1Q)
-and double VLAN (802.1AD) tags. Please refer to the 'struct bpf_flow_keys'
-for a set of information that's currently can be exported from the BPF context.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 5449149be496f..984e68f9e0269 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -9,6 +9,7 @@ Contents:
    netdev-FAQ
    af_xdp
    batman-adv
+   bpf_flow_dissector
    can
    can_ucan_protocol
    device_drivers/freescale/dpaa2/index
-- 
GitLab


From 10a16997db3d99fc02c026cf2c6e6c670acafab0 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Fri, 29 Mar 2019 20:12:21 +0300
Subject: [PATCH 0621/1861] riscv: Fix syscall_get_arguments() and
 syscall_set_arguments()

RISC-V syscall arguments are located in orig_a0,a1..a5 fields
of struct pt_regs.

Due to an off-by-one bug and a bug in pointer arithmetic
syscall_get_arguments() was reading s3..s7 fields instead of a1..a5.
Likewise, syscall_set_arguments() was writing s3..s7 fields
instead of a1..a5.

Link: http://lkml.kernel.org/r/20190329171221.GA32456@altlinux.org

Fixes: e2c0cdfba7f69 ("RISC-V: User-facing API")
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Will Drewry <wad@chromium.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: linux-riscv@lists.infradead.org
Cc: stable@vger.kernel.org # v4.15+
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/riscv/include/asm/syscall.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index bba3da6ef1572..6ea9e18042331 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -79,10 +79,11 @@ static inline void syscall_get_arguments(struct task_struct *task,
 	if (i == 0) {
 		args[0] = regs->orig_a0;
 		args++;
-		i++;
 		n--;
+	} else {
+		i--;
 	}
-	memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+	memcpy(args, &regs->a1 + i, n * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
@@ -94,10 +95,11 @@ static inline void syscall_set_arguments(struct task_struct *task,
         if (i == 0) {
                 regs->orig_a0 = args[0];
                 args++;
-                i++;
                 n--;
-        }
-	memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+	} else {
+		i--;
+	}
+	memcpy(&regs->a1 + i, args, n * sizeof(regs->a1));
 }
 
 static inline int syscall_get_arch(void)
-- 
GitLab


From ed3bb007021b9bddb90afae28a19f08ed8890add Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Fri, 29 Mar 2019 20:12:30 +0300
Subject: [PATCH 0622/1861] csky: Fix syscall_get_arguments() and
 syscall_set_arguments()

C-SKY syscall arguments are located in orig_a0,a1,a2,a3,regs[0],regs[1]
fields of struct pt_regs.

Due to an off-by-one bug and a bug in pointer arithmetic
syscall_get_arguments() was reading orig_a0,regs[1..5] fields instead.
Likewise, syscall_set_arguments() was writing orig_a0,regs[1..5] fields
instead.

Link: http://lkml.kernel.org/r/20190329171230.GB32456@altlinux.org

Fixes: 4859bfca11c7d ("csky: System Call")
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Will Drewry <wad@chromium.org>
Cc: stable@vger.kernel.org # v4.20+
Tested-by: Guo Ren <ren_guo@c-sky.com>
Acked-by: Guo Ren <ren_guo@c-sky.com>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/csky/include/asm/syscall.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index d637445737b78..9a9cd81e66c11 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -49,10 +49,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
 	if (i == 0) {
 		args[0] = regs->orig_a0;
 		args++;
-		i++;
 		n--;
+	} else {
+		i--;
 	}
-	memcpy(args, &regs->a1 + i * sizeof(regs->a1), n * sizeof(args[0]));
+	memcpy(args, &regs->a1 + i, n * sizeof(args[0]));
 }
 
 static inline void
@@ -63,10 +64,11 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
 	if (i == 0) {
 		regs->orig_a0 = args[0];
 		args++;
-		i++;
 		n--;
+	} else {
+		i--;
 	}
-	memcpy(&regs->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0));
+	memcpy(&regs->a1 + i, args, n * sizeof(regs->a1));
 }
 
 static inline int
-- 
GitLab


From bcc816dfe51ab86ca94663c7b225f2d6eb0fddb9 Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Thu, 4 Apr 2019 10:57:44 +0800
Subject: [PATCH 0623/1861] blk-mq: do not reset plug->rq_count before the list
 is sorted

We would never be able to sort the list if we first reset plug->rq_count
which is used in conditional check later.

Fixes: ce5b009cff19 ("block: improve logic around when to sort a plug list")
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 22074a1e37cd4..ac61bcc67a89e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1711,11 +1711,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	unsigned int depth;
 
 	list_splice_init(&plug->mq_list, &list);
-	plug->rq_count = 0;
 
 	if (plug->rq_count > 2 && plug->multiple_queues)
 		list_sort(NULL, &list, plug_rq_cmp);
 
+	plug->rq_count = 0;
+
 	this_q = NULL;
 	this_hctx = NULL;
 	this_ctx = NULL;
-- 
GitLab


From 1c41860864c8ae0387ef7d44f0000e99cbb2e06d Mon Sep 17 00:00:00 2001
From: Wei Li <liwei391@huawei.com>
Date: Mon, 1 Apr 2019 11:55:57 +0800
Subject: [PATCH 0624/1861] arm64: fix wrong check of on_sdei_stack in nmi
 context

When doing unwind_frame() in the context of pseudo nmi (need enable
CONFIG_ARM64_PSEUDO_NMI), reaching the bottom of the stack (fp == 0,
pc != 0), function on_sdei_stack() will return true while the sdei acpi
table is not inited in fact. This will cause a "NULL pointer dereference"
oops when going on.

Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Wei Li <liwei391@huawei.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/sdei.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 5ba4465e44f09..ea94cf8f9dc6d 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -94,6 +94,9 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
 	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
 	unsigned long high = low + SDEI_STACK_SIZE;
 
+	if (!low)
+		return false;
+
 	if (sp < low || sp >= high)
 		return false;
 
@@ -111,6 +114,9 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
 	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
 	unsigned long high = low + SDEI_STACK_SIZE;
 
+	if (!low)
+		return false;
+
 	if (sp < low || sp >= high)
 		return false;
 
-- 
GitLab


From e7ad88553aa1d48e950ca9a4934d246c1bee4be4 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 3 Apr 2019 12:30:32 -0500
Subject: [PATCH 0625/1861] drm/amdkfd: Add picasso pci id

Picasso is a new raven variant.

Reviewed-by: Kent Russell <kent.russell@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 8be9677c0c07d..cf9a49f49d3a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -320,6 +320,7 @@ static const struct kfd_deviceid supported_devices[] = {
 	{ 0x9876, &carrizo_device_info },	/* Carrizo */
 	{ 0x9877, &carrizo_device_info },	/* Carrizo */
 	{ 0x15DD, &raven_device_info },		/* Raven */
+	{ 0x15D8, &raven_device_info },		/* Raven */
 #endif
 	{ 0x67A0, &hawaii_device_info },	/* Hawaii */
 	{ 0x67A1, &hawaii_device_info },	/* Hawaii */
-- 
GitLab


From d4162c61e253177936fcfe6c29f7b224d9a1efb8 Mon Sep 17 00:00:00 2001
From: shaoyunl <shaoyun.liu@amd.com>
Date: Mon, 1 Apr 2019 16:09:34 -0400
Subject: [PATCH 0626/1861] drm/amdgpu: Adjust IB test timeout for XGMI
 configuration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On XGMI configuration the ib test may take longer to finish

Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 0b8ef2d27d6b2..fe393a46f8811 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -35,6 +35,7 @@
 #include "amdgpu_trace.h"
 
 #define AMDGPU_IB_TEST_TIMEOUT	msecs_to_jiffies(1000)
+#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT	msecs_to_jiffies(2000)
 
 /*
  * IB
@@ -344,6 +345,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		 * cost waiting for it coming back under RUNTIME only
 		*/
 		tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	} else if (adev->gmc.xgmi.hive_id) {
+		tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT;
 	}
 
 	for (i = 0; i < adev->num_rings; ++i) {
-- 
GitLab


From 1712fb1a2f6829150032ac76eb0e39b82a549cfb Mon Sep 17 00:00:00 2001
From: wentalou <Wentao.Lou@amd.com>
Date: Tue, 2 Apr 2019 17:13:05 +0800
Subject: [PATCH 0627/1861] drm/amdgpu: amdgpu_device_recover_vram always
 failed if only one node in shadow_list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

amdgpu_bo_restore_shadow would assign zero to r if succeeded.
r would remain zero if there is only one node in shadow_list.
current code would always return failure when r <= 0.
restart the timeout for each wait was a rather problematic bug as well.
The value of tmo SHOULD be changed, otherwise we wait tmo jiffies on each loop.

Signed-off-by: Wentao Lou <Wentao.Lou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ac0d646a7b743..5d8b30fd45345 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3173,11 +3173,16 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
 			break;
 
 		if (fence) {
-			r = dma_fence_wait_timeout(fence, false, tmo);
+			tmo = dma_fence_wait_timeout(fence, false, tmo);
 			dma_fence_put(fence);
 			fence = next;
-			if (r <= 0)
+			if (tmo == 0) {
+				r = -ETIMEDOUT;
 				break;
+			} else if (tmo < 0) {
+				r = tmo;
+				break;
+			}
 		} else {
 			fence = next;
 		}
@@ -3188,8 +3193,8 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
 		tmo = dma_fence_wait_timeout(fence, false, tmo);
 	dma_fence_put(fence);
 
-	if (r <= 0 || tmo <= 0) {
-		DRM_ERROR("recover vram bo from shadow failed\n");
+	if (r < 0 || tmo <= 0) {
+		DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
 		return -EIO;
 	}
 
-- 
GitLab


From c1cefe115d1cdc460014483319d440b2f0d07c68 Mon Sep 17 00:00:00 2001
From: tiancyin <tianci.yin@amd.com>
Date: Mon, 1 Apr 2019 10:15:31 +0800
Subject: [PATCH 0628/1861] drm/amd/display: fix cursor black issue

[Why]
the member sdr_white_level of struct dc_cursor_attributes was not
initialized, then the random value result that
dcn10_set_cursor_sdr_white_level() set error hw_scale value 0x20D9(normal
value is 0x3c00), this cause the black cursor issue.

[how]
just initilize the obj of struct dc_cursor_attributes to zero to avoid
the random value.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Tianci Yin <tianci.yin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 81127f7d6ed19..3082b55b1e774 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4533,6 +4533,7 @@ static void handle_cursor_update(struct drm_plane *plane,
 	amdgpu_crtc->cursor_width = plane->state->crtc_w;
 	amdgpu_crtc->cursor_height = plane->state->crtc_h;
 
+	memset(&attributes, 0, sizeof(attributes));
 	attributes.address.high_part = upper_32_bits(address);
 	attributes.address.low_part  = lower_32_bits(address);
 	attributes.width             = plane->state->crtc_w;
-- 
GitLab


From 7d839b4b9e00645e49345d6ce5dfa8edf53c1a21 Mon Sep 17 00:00:00 2001
From: Neil Leeder <nleeder@codeaurora.org>
Date: Tue, 26 Mar 2019 15:17:51 +0000
Subject: [PATCH 0629/1861] perf/smmuv3: Add arm64 smmuv3 pmu driver

Adds a new driver to support the SMMUv3 PMU and add it into the
perf events framework.

Each SMMU node may have multiple PMUs associated with it, each of
which may support different events.

SMMUv3 PMCG devices are named as smmuv3_pmcg_<phys_addr_page> where
<phys_addr_page> is the physical page address of the SMMU PMCG
wrapped to 4K boundary. For example, the PMCG at 0xff88840000 is
named smmuv3_pmcg_ff88840

Filtering by stream id is done by specifying filtering parameters
with the event. options are:
   filter_enable    - 0 = no filtering, 1 = filtering enabled
   filter_span      - 0 = exact match, 1 = pattern match
   filter_stream_id - pattern to filter against

Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1,
                       filter_span=1,filter_stream_id=0x42/ -a netperf

Applies filter pattern 0x42 to transaction events, which means events
matching stream ids 0x42 & 0x43 are counted as only upper StreamID
bits are required to match the given filter. Further filtering
information is available in the SMMU documentation.

SMMU events are not attributable to a CPU, so task mode and sampling
are not supported.

Signed-off-by: Neil Leeder <nleeder@codeaurora.org>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
[will: fold in review feedback from Robin]
[will: rewrite Kconfig text and allow building as a module]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/Kconfig          |   9 +
 drivers/perf/Makefile         |   1 +
 drivers/perf/arm_smmuv3_pmu.c | 773 ++++++++++++++++++++++++++++++++++
 3 files changed, 783 insertions(+)
 create mode 100644 drivers/perf/arm_smmuv3_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index af9bc178495d7..a94e586a58b2f 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -52,6 +52,15 @@ config ARM_PMU_ACPI
 	depends on ARM_PMU && ACPI
 	def_bool y
 
+config ARM_SMMU_V3_PMU
+	 tristate "ARM SMMUv3 Performance Monitors Extension"
+	 depends on ARM64 && ACPI && ARM_SMMU_V3
+	   help
+	   Provides support for the ARM SMMUv3 Performance Monitor Counter
+	   Groups (PMCG), which provide monitoring of transactions passing
+	   through the SMMU and allow the resulting information to be filtered
+	   based on the Stream ID of the corresponding master.
+
 config ARM_DSU_PMU
 	tristate "ARM DynamIQ Shared Unit (DSU) PMU"
 	depends on ARM64
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 909f27fd9db35..30489941f3d6c 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_ARM_CCN) += arm-ccn.o
 obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
 obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
+obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
new file mode 100644
index 0000000000000..a6d2e3ce94dfb
--- /dev/null
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -0,0 +1,773 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This driver adds support for perf events to use the Performance
+ * Monitor Counter Groups (PMCG) associated with an SMMUv3 node
+ * to monitor that node.
+ *
+ * SMMUv3 PMCG devices are named as smmuv3_pmcg_<phys_addr_page> where
+ * <phys_addr_page> is the physical page address of the SMMU PMCG wrapped
+ * to 4K boundary. For example, the PMCG at 0xff88840000 is named
+ * smmuv3_pmcg_ff88840
+ *
+ * Filtering by stream id is done by specifying filtering parameters
+ * with the event. options are:
+ *   filter_enable    - 0 = no filtering, 1 = filtering enabled
+ *   filter_span      - 0 = exact match, 1 = pattern match
+ *   filter_stream_id - pattern to filter against
+ *
+ * To match a partial StreamID where the X most-significant bits must match
+ * but the Y least-significant bits might differ, STREAMID is programmed
+ * with a value that contains:
+ *  STREAMID[Y - 1] == 0.
+ *  STREAMID[Y - 2:0] == 1 (where Y > 1).
+ * The remainder of implemented bits of STREAMID (X bits, from bit Y upwards)
+ * contain a value to match from the corresponding bits of event StreamID.
+ *
+ * Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1,
+ *                    filter_span=1,filter_stream_id=0x42/ -a netperf
+ * Applies filter pattern 0x42 to transaction events, which means events
+ * matching stream ids 0x42 and 0x43 are counted. Further filtering
+ * information is available in the SMMU documentation.
+ *
+ * SMMU events are not attributable to a CPU, so task mode and sampling
+ * are not supported.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define SMMU_PMCG_EVCNTR0               0x0
+#define SMMU_PMCG_EVCNTR(n, stride)     (SMMU_PMCG_EVCNTR0 + (n) * (stride))
+#define SMMU_PMCG_EVTYPER0              0x400
+#define SMMU_PMCG_EVTYPER(n)            (SMMU_PMCG_EVTYPER0 + (n) * 4)
+#define SMMU_PMCG_SID_SPAN_SHIFT        29
+#define SMMU_PMCG_SMR0                  0xA00
+#define SMMU_PMCG_SMR(n)                (SMMU_PMCG_SMR0 + (n) * 4)
+#define SMMU_PMCG_CNTENSET0             0xC00
+#define SMMU_PMCG_CNTENCLR0             0xC20
+#define SMMU_PMCG_INTENSET0             0xC40
+#define SMMU_PMCG_INTENCLR0             0xC60
+#define SMMU_PMCG_OVSCLR0               0xC80
+#define SMMU_PMCG_OVSSET0               0xCC0
+#define SMMU_PMCG_CFGR                  0xE00
+#define SMMU_PMCG_CFGR_SID_FILTER_TYPE  BIT(23)
+#define SMMU_PMCG_CFGR_RELOC_CTRS       BIT(20)
+#define SMMU_PMCG_CFGR_SIZE             GENMASK(13, 8)
+#define SMMU_PMCG_CFGR_NCTR             GENMASK(5, 0)
+#define SMMU_PMCG_CR                    0xE04
+#define SMMU_PMCG_CR_ENABLE             BIT(0)
+#define SMMU_PMCG_CEID0                 0xE20
+#define SMMU_PMCG_CEID1                 0xE28
+#define SMMU_PMCG_IRQ_CTRL              0xE50
+#define SMMU_PMCG_IRQ_CTRL_IRQEN        BIT(0)
+#define SMMU_PMCG_IRQ_CFG0              0xE58
+
+#define SMMU_PMCG_DEFAULT_FILTER_SPAN   1
+#define SMMU_PMCG_DEFAULT_FILTER_SID    GENMASK(31, 0)
+
+#define SMMU_PMCG_MAX_COUNTERS          64
+#define SMMU_PMCG_ARCH_MAX_EVENTS       128
+
+#define SMMU_PMCG_PA_SHIFT              12
+
+static int cpuhp_state_num;
+
+struct smmu_pmu {
+	struct hlist_node node;
+	struct perf_event *events[SMMU_PMCG_MAX_COUNTERS];
+	DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS);
+	DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS);
+	unsigned int irq;
+	unsigned int on_cpu;
+	struct pmu pmu;
+	unsigned int num_counters;
+	struct device *dev;
+	void __iomem *reg_base;
+	void __iomem *reloc_base;
+	u64 counter_mask;
+	bool global_filter;
+	u32 global_filter_span;
+	u32 global_filter_sid;
+};
+
+#define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu))
+
+#define SMMU_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end)        \
+	static inline u32 get_##_name(struct perf_event *event)            \
+	{                                                                  \
+		return FIELD_GET(GENMASK_ULL(_end, _start),                \
+				 event->attr._config);                     \
+	}                                                                  \
+
+SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15);
+SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31);
+SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32);
+SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33);
+
+static inline void smmu_pmu_enable(struct pmu *pmu)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+
+	writel(SMMU_PMCG_IRQ_CTRL_IRQEN,
+	       smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
+	writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR);
+}
+
+static inline void smmu_pmu_disable(struct pmu *pmu)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+
+	writel(0, smmu_pmu->reg_base + SMMU_PMCG_CR);
+	writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
+}
+
+static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu,
+					      u32 idx, u64 value)
+{
+	if (smmu_pmu->counter_mask & BIT(32))
+		writeq(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8));
+	else
+		writel(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4));
+}
+
+static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx)
+{
+	u64 value;
+
+	if (smmu_pmu->counter_mask & BIT(32))
+		value = readq(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8));
+	else
+		value = readl(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4));
+
+	return value;
+}
+
+static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx)
+{
+	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0);
+}
+
+static inline void smmu_pmu_counter_disable(struct smmu_pmu *smmu_pmu, u32 idx)
+{
+	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0);
+}
+
+static inline void smmu_pmu_interrupt_enable(struct smmu_pmu *smmu_pmu, u32 idx)
+{
+	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENSET0);
+}
+
+static inline void smmu_pmu_interrupt_disable(struct smmu_pmu *smmu_pmu,
+					      u32 idx)
+{
+	writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0);
+}
+
+static inline void smmu_pmu_set_evtyper(struct smmu_pmu *smmu_pmu, u32 idx,
+					u32 val)
+{
+	writel(val, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
+}
+
+static inline void smmu_pmu_set_smr(struct smmu_pmu *smmu_pmu, u32 idx, u32 val)
+{
+	writel(val, smmu_pmu->reg_base + SMMU_PMCG_SMR(idx));
+}
+
+static void smmu_pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	u64 delta, prev, now;
+	u32 idx = hwc->idx;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = smmu_pmu_counter_get_value(smmu_pmu, idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	/* handle overflow. */
+	delta = now - prev;
+	delta &= smmu_pmu->counter_mask;
+
+	local64_add(delta, &event->count);
+}
+
+static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu,
+				struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u64 new;
+
+	/*
+	 * We limit the max period to half the max counter value of the counter
+	 * size, so that even in the case of extreme interrupt latency the
+	 * counter will (hopefully) not wrap past its initial value.
+	 */
+	new = smmu_pmu->counter_mask >> 1;
+
+	local64_set(&hwc->prev_count, new);
+	smmu_pmu_counter_set_value(smmu_pmu, idx, new);
+}
+
+static void smmu_pmu_set_event_filter(struct perf_event *event,
+				      int idx, u32 span, u32 sid)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	u32 evtyper;
+
+	evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT;
+	smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper);
+	smmu_pmu_set_smr(smmu_pmu, idx, sid);
+}
+
+static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
+				       struct perf_event *event, int idx)
+{
+	u32 span, sid;
+	unsigned int num_ctrs = smmu_pmu->num_counters;
+	bool filter_en = !!get_filter_enable(event);
+
+	span = filter_en ? get_filter_span(event) :
+			   SMMU_PMCG_DEFAULT_FILTER_SPAN;
+	sid = filter_en ? get_filter_stream_id(event) :
+			   SMMU_PMCG_DEFAULT_FILTER_SID;
+
+	/* Support individual filter settings */
+	if (!smmu_pmu->global_filter) {
+		smmu_pmu_set_event_filter(event, idx, span, sid);
+		return 0;
+	}
+
+	/* Requested settings same as current global settings*/
+	if (span == smmu_pmu->global_filter_span &&
+	    sid == smmu_pmu->global_filter_sid)
+		return 0;
+
+	if (!bitmap_empty(smmu_pmu->used_counters, num_ctrs))
+		return -EAGAIN;
+
+	smmu_pmu_set_event_filter(event, 0, span, sid);
+	smmu_pmu->global_filter_span = span;
+	smmu_pmu->global_filter_sid = sid;
+	return 0;
+}
+
+static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu,
+				  struct perf_event *event)
+{
+	int idx, err;
+	unsigned int num_ctrs = smmu_pmu->num_counters;
+
+	idx = find_first_zero_bit(smmu_pmu->used_counters, num_ctrs);
+	if (idx == num_ctrs)
+		/* The counters are all in use. */
+		return -EAGAIN;
+
+	err = smmu_pmu_apply_event_filter(smmu_pmu, event, idx);
+	if (err)
+		return err;
+
+	set_bit(idx, smmu_pmu->used_counters);
+
+	return idx;
+}
+
+/*
+ * Implementation of abstract pmu functionality required by
+ * the core perf events code.
+ */
+
+static int smmu_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	struct device *dev = smmu_pmu->dev;
+	struct perf_event *sibling;
+	u16 event_id;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (hwc->sample_period) {
+		dev_dbg(dev, "Sampling not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0) {
+		dev_dbg(dev, "Per-task mode not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Verify specified event is supported on this PMU */
+	event_id = get_event(event);
+	if (event_id < SMMU_PMCG_ARCH_MAX_EVENTS &&
+	    (!test_bit(event_id, smmu_pmu->supported_events))) {
+		dev_dbg(dev, "Invalid event %d for this PMU\n", event_id);
+		return -EINVAL;
+	}
+
+	/* Don't allow groups with mixed PMUs, except for s/w events */
+	if (event->group_leader->pmu != event->pmu &&
+	    !is_software_event(event->group_leader)) {
+		dev_dbg(dev, "Can't create mixed PMU group\n");
+		return -EINVAL;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu != event->pmu &&
+		    !is_software_event(sibling)) {
+			dev_dbg(dev, "Can't create mixed PMU group\n");
+			return -EINVAL;
+		}
+	}
+
+	hwc->idx = -1;
+
+	/*
+	 * Ensure all events are on the same cpu so all events are in the
+	 * same cpu context, to avoid races on pmu_enable etc.
+	 */
+	event->cpu = smmu_pmu->on_cpu;
+
+	return 0;
+}
+
+static void smmu_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	hwc->state = 0;
+
+	smmu_pmu_set_period(smmu_pmu, hwc);
+
+	smmu_pmu_counter_enable(smmu_pmu, idx);
+}
+
+static void smmu_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	smmu_pmu_counter_disable(smmu_pmu, idx);
+	/* As the counter gets updated on _start, ignore PERF_EF_UPDATE */
+	smmu_pmu_event_update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int smmu_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+
+	idx = smmu_pmu_get_event_idx(smmu_pmu, event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	smmu_pmu->events[idx] = event;
+	local64_set(&hwc->prev_count, 0);
+
+	smmu_pmu_interrupt_enable(smmu_pmu, idx);
+
+	if (flags & PERF_EF_START)
+		smmu_pmu_event_start(event, flags);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void smmu_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu);
+	int idx = hwc->idx;
+
+	smmu_pmu_event_stop(event, flags | PERF_EF_UPDATE);
+	smmu_pmu_interrupt_disable(smmu_pmu, idx);
+	smmu_pmu->events[idx] = NULL;
+	clear_bit(idx, smmu_pmu->used_counters);
+
+	perf_event_update_userpage(event);
+}
+
+static void smmu_pmu_event_read(struct perf_event *event)
+{
+	smmu_pmu_event_update(event);
+}
+
+/* cpumask */
+
+static ssize_t smmu_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu));
+}
+
+static struct device_attribute smmu_pmu_cpumask_attr =
+		__ATTR(cpumask, 0444, smmu_pmu_cpumask_show, NULL);
+
+static struct attribute *smmu_pmu_cpumask_attrs[] = {
+	&smmu_pmu_cpumask_attr.attr,
+	NULL
+};
+
+static struct attribute_group smmu_pmu_cpumask_group = {
+	.attrs = smmu_pmu_cpumask_attrs,
+};
+
+/* Events */
+
+static ssize_t smmu_pmu_event_show(struct device *dev,
+				   struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define SMMU_EVENT_ATTR(name, config) \
+	PMU_EVENT_ATTR(name, smmu_event_attr_##name, \
+		       config, smmu_pmu_event_show)
+SMMU_EVENT_ATTR(cycles, 0);
+SMMU_EVENT_ATTR(transaction, 1);
+SMMU_EVENT_ATTR(tlb_miss, 2);
+SMMU_EVENT_ATTR(config_cache_miss, 3);
+SMMU_EVENT_ATTR(trans_table_walk_access, 4);
+SMMU_EVENT_ATTR(config_struct_access, 5);
+SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6);
+SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7);
+
+static struct attribute *smmu_pmu_events[] = {
+	&smmu_event_attr_cycles.attr.attr,
+	&smmu_event_attr_transaction.attr.attr,
+	&smmu_event_attr_tlb_miss.attr.attr,
+	&smmu_event_attr_config_cache_miss.attr.attr,
+	&smmu_event_attr_trans_table_walk_access.attr.attr,
+	&smmu_event_attr_config_struct_access.attr.attr,
+	&smmu_event_attr_pcie_ats_trans_rq.attr.attr,
+	&smmu_event_attr_pcie_ats_trans_passed.attr.attr,
+	NULL
+};
+
+static umode_t smmu_pmu_event_is_visible(struct kobject *kobj,
+					 struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+
+	if (test_bit(pmu_attr->id, smmu_pmu->supported_events))
+		return attr->mode;
+
+	return 0;
+}
+
+static struct attribute_group smmu_pmu_events_group = {
+	.name = "events",
+	.attrs = smmu_pmu_events,
+	.is_visible = smmu_pmu_event_is_visible,
+};
+
+/* Formats */
+PMU_FORMAT_ATTR(event,		   "config:0-15");
+PMU_FORMAT_ATTR(filter_stream_id,  "config1:0-31");
+PMU_FORMAT_ATTR(filter_span,	   "config1:32");
+PMU_FORMAT_ATTR(filter_enable,	   "config1:33");
+
+static struct attribute *smmu_pmu_formats[] = {
+	&format_attr_event.attr,
+	&format_attr_filter_stream_id.attr,
+	&format_attr_filter_span.attr,
+	&format_attr_filter_enable.attr,
+	NULL
+};
+
+static struct attribute_group smmu_pmu_format_group = {
+	.name = "format",
+	.attrs = smmu_pmu_formats,
+};
+
+static const struct attribute_group *smmu_pmu_attr_grps[] = {
+	&smmu_pmu_cpumask_group,
+	&smmu_pmu_events_group,
+	&smmu_pmu_format_group,
+	NULL
+};
+
+/*
+ * Generic device handlers
+ */
+
+static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct smmu_pmu *smmu_pmu;
+	unsigned int target;
+
+	smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node);
+	if (cpu != smmu_pmu->on_cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
+	smmu_pmu->on_cpu = target;
+	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target)));
+
+	return 0;
+}
+
+static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
+{
+	struct smmu_pmu *smmu_pmu = data;
+	u64 ovsr;
+	unsigned int idx;
+
+	ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
+	if (!ovsr)
+		return IRQ_NONE;
+
+	writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
+
+	for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
+		struct perf_event *event = smmu_pmu->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (WARN_ON_ONCE(!event))
+			continue;
+
+		smmu_pmu_event_update(event);
+		hwc = &event->hw;
+
+		smmu_pmu_set_period(smmu_pmu, hwc);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
+{
+	unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
+	int irq, ret = -ENXIO;
+
+	irq = pmu->irq;
+	if (irq)
+		ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
+				       flags, "smmuv3-pmu", pmu);
+	return ret;
+}
+
+static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
+{
+	u64 counter_present_mask = GENMASK_ULL(smmu_pmu->num_counters - 1, 0);
+
+	smmu_pmu_disable(&smmu_pmu->pmu);
+
+	/* Disable counter and interrupt */
+	writeq_relaxed(counter_present_mask,
+		       smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0);
+	writeq_relaxed(counter_present_mask,
+		       smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0);
+	writeq_relaxed(counter_present_mask,
+		       smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
+}
+
+static int smmu_pmu_probe(struct platform_device *pdev)
+{
+	struct smmu_pmu *smmu_pmu;
+	struct resource *res_0, *res_1;
+	u32 cfgr, reg_size;
+	u64 ceid_64[2];
+	int irq, err;
+	char *name;
+	struct device *dev = &pdev->dev;
+
+	smmu_pmu = devm_kzalloc(dev, sizeof(*smmu_pmu), GFP_KERNEL);
+	if (!smmu_pmu)
+		return -ENOMEM;
+
+	smmu_pmu->dev = dev;
+	platform_set_drvdata(pdev, smmu_pmu);
+
+	smmu_pmu->pmu = (struct pmu) {
+		.task_ctx_nr    = perf_invalid_context,
+		.pmu_enable	= smmu_pmu_enable,
+		.pmu_disable	= smmu_pmu_disable,
+		.event_init	= smmu_pmu_event_init,
+		.add		= smmu_pmu_event_add,
+		.del		= smmu_pmu_event_del,
+		.start		= smmu_pmu_event_start,
+		.stop		= smmu_pmu_event_stop,
+		.read		= smmu_pmu_event_read,
+		.attr_groups	= smmu_pmu_attr_grps,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	res_0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	smmu_pmu->reg_base = devm_ioremap_resource(dev, res_0);
+	if (IS_ERR(smmu_pmu->reg_base))
+		return PTR_ERR(smmu_pmu->reg_base);
+
+	cfgr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CFGR);
+
+	/* Determine if page 1 is present */
+	if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) {
+		res_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		smmu_pmu->reloc_base = devm_ioremap_resource(dev, res_1);
+		if (IS_ERR(smmu_pmu->reloc_base))
+			return PTR_ERR(smmu_pmu->reloc_base);
+	} else {
+		smmu_pmu->reloc_base = smmu_pmu->reg_base;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq > 0)
+		smmu_pmu->irq = irq;
+
+	ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
+	ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
+	bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64,
+			  SMMU_PMCG_ARCH_MAX_EVENTS);
+
+	smmu_pmu->num_counters = FIELD_GET(SMMU_PMCG_CFGR_NCTR, cfgr) + 1;
+
+	smmu_pmu->global_filter = !!(cfgr & SMMU_PMCG_CFGR_SID_FILTER_TYPE);
+
+	reg_size = FIELD_GET(SMMU_PMCG_CFGR_SIZE, cfgr);
+	smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0);
+
+	smmu_pmu_reset(smmu_pmu);
+
+	err = smmu_pmu_setup_irq(smmu_pmu);
+	if (err) {
+		dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
+		return err;
+	}
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
+			      (res_0->start) >> SMMU_PMCG_PA_SHIFT);
+	if (!name) {
+		dev_err(dev, "Create name failed, PMU @%pa\n", &res_0->start);
+		return -EINVAL;
+	}
+
+	/* Pick one CPU to be the preferred one to use */
+	smmu_pmu->on_cpu = raw_smp_processor_id();
+	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
+				      cpumask_of(smmu_pmu->on_cpu)));
+
+	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
+					       &smmu_pmu->node);
+	if (err) {
+		dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
+			err, &res_0->start);
+		goto out_cpuhp_err;
+	}
+
+	err = perf_pmu_register(&smmu_pmu->pmu, name, -1);
+	if (err) {
+		dev_err(dev, "Error %d registering PMU @%pa\n",
+			err, &res_0->start);
+		goto out_unregister;
+	}
+
+	dev_info(dev, "Registered PMU @ %pa using %d counters with %s filter settings\n",
+		 &res_0->start, smmu_pmu->num_counters,
+		 smmu_pmu->global_filter ? "Global(Counter0)" :
+		 "Individual");
+
+	return 0;
+
+out_unregister:
+	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
+out_cpuhp_err:
+	put_cpu();
+	return err;
+}
+
+static int smmu_pmu_remove(struct platform_device *pdev)
+{
+	struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&smmu_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
+
+	return 0;
+}
+
+static void smmu_pmu_shutdown(struct platform_device *pdev)
+{
+	struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
+
+	smmu_pmu_disable(&smmu_pmu->pmu);
+}
+
+static struct platform_driver smmu_pmu_driver = {
+	.driver = {
+		.name = "arm-smmu-v3-pmcg",
+	},
+	.probe = smmu_pmu_probe,
+	.remove = smmu_pmu_remove,
+	.shutdown = smmu_pmu_shutdown,
+};
+
+static int __init arm_smmu_pmu_init(void)
+{
+	cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+						  "perf/arm/pmcg:online",
+						  NULL,
+						  smmu_pmu_offline_cpu);
+	if (cpuhp_state_num < 0)
+		return cpuhp_state_num;
+
+	return platform_driver_register(&smmu_pmu_driver);
+}
+module_init(arm_smmu_pmu_init);
+
+static void __exit arm_smmu_pmu_exit(void)
+{
+	platform_driver_unregister(&smmu_pmu_driver);
+	cpuhp_remove_multi_state(cpuhp_state_num);
+}
+
+module_exit(arm_smmu_pmu_exit);
+
+MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension");
+MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>");
+MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From f202cdab3b48d8c2c1846c938ea69cb8aa897699 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Tue, 26 Mar 2019 15:17:52 +0000
Subject: [PATCH 0630/1861] perf/smmuv3: Add MSI irq support

This adds support for MSI-based counter overflow interrupt.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/arm_smmuv3_pmu.c | 58 +++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index a6d2e3ce94dfb..a4f4b488a2de8 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -67,6 +67,7 @@
 #define SMMU_PMCG_OVSSET0               0xCC0
 #define SMMU_PMCG_CFGR                  0xE00
 #define SMMU_PMCG_CFGR_SID_FILTER_TYPE  BIT(23)
+#define SMMU_PMCG_CFGR_MSI              BIT(21)
 #define SMMU_PMCG_CFGR_RELOC_CTRS       BIT(20)
 #define SMMU_PMCG_CFGR_SIZE             GENMASK(13, 8)
 #define SMMU_PMCG_CFGR_NCTR             GENMASK(5, 0)
@@ -77,6 +78,12 @@
 #define SMMU_PMCG_IRQ_CTRL              0xE50
 #define SMMU_PMCG_IRQ_CTRL_IRQEN        BIT(0)
 #define SMMU_PMCG_IRQ_CFG0              0xE58
+#define SMMU_PMCG_IRQ_CFG1              0xE60
+#define SMMU_PMCG_IRQ_CFG2              0xE64
+
+/* MSI config fields */
+#define MSI_CFG0_ADDR_MASK              GENMASK_ULL(51, 2)
+#define MSI_CFG2_MEMATTR_DEVICE_nGnRE   0x1
 
 #define SMMU_PMCG_DEFAULT_FILTER_SPAN   1
 #define SMMU_PMCG_DEFAULT_FILTER_SID    GENMASK(31, 0)
@@ -580,11 +587,62 @@ static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
 	return IRQ_HANDLED;
 }
 
+static void smmu_pmu_free_msis(void *data)
+{
+	struct device *dev = data;
+
+	platform_msi_domain_free_irqs(dev);
+}
+
+static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+	phys_addr_t doorbell;
+	struct device *dev = msi_desc_to_dev(desc);
+	struct smmu_pmu *pmu = dev_get_drvdata(dev);
+
+	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
+	doorbell &= MSI_CFG0_ADDR_MASK;
+
+	writeq_relaxed(doorbell, pmu->reg_base + SMMU_PMCG_IRQ_CFG0);
+	writel_relaxed(msg->data, pmu->reg_base + SMMU_PMCG_IRQ_CFG1);
+	writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE,
+		       pmu->reg_base + SMMU_PMCG_IRQ_CFG2);
+}
+
+static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
+{
+	struct msi_desc *desc;
+	struct device *dev = pmu->dev;
+	int ret;
+
+	/* Clear MSI address reg */
+	writeq_relaxed(0, pmu->reg_base + SMMU_PMCG_IRQ_CFG0);
+
+	/* MSI supported or not */
+	if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
+		return;
+
+	ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
+	if (ret) {
+		dev_warn(dev, "failed to allocate MSIs\n");
+		return;
+	}
+
+	desc = first_msi_entry(dev);
+	if (desc)
+		pmu->irq = desc->irq;
+
+	/* Add callback to free MSIs on teardown */
+	devm_add_action(dev, smmu_pmu_free_msis, dev);
+}
+
 static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
 {
 	unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
 	int irq, ret = -ENXIO;
 
+	smmu_pmu_setup_msi(pmu);
+
 	irq = pmu->irq;
 	if (irq)
 		ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
-- 
GitLab


From 24062fe85860debfdae0eeaa495f27c9971ec163 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Tue, 26 Mar 2019 15:17:53 +0000
Subject: [PATCH 0631/1861] perf/smmuv3: Enable HiSilicon Erratum 162001800
 quirk

HiSilicon erratum 162001800 describes the limitation of
SMMUv3 PMCG implementation on HiSilicon Hip08 platforms.

On these platforms, the PMCG event counter registers
(SMMU_PMCG_EVCNTRn) are read only and as a result it
is not possible to set the initial counter period value
on event monitor start.

To work around this, the current value of the counter
is read and used for delta calculations. OEM information
from ACPI header is used to identify the affected hardware
platforms.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
[will: update silicon-errata.txt and add reason string to acpi match]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 drivers/acpi/arm64/iort.c              | 16 ++++++++-
 drivers/perf/arm_smmuv3_pmu.c          | 48 ++++++++++++++++++++++----
 include/linux/acpi_iort.h              |  1 +
 4 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index d1e2bb801e1bd..c00efb639e460 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -77,6 +77,7 @@ stable kernels.
 | Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
 | Hisilicon      | Hip0{6,7}       | #161010701      | N/A                         |
 | Hisilicon      | Hip07           | #161600802      | HISILICON_ERRATUM_161600802 |
+| Hisilicon      | Hip08 SMMU PMCG | #162001800      | N/A                         |
 |                |                 |                 |                             |
 | Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index e2c9b26bbee66..2d70b349bd6c9 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1366,9 +1366,23 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res,
 				       ACPI_EDGE_SENSITIVE, &res[2]);
 }
 
+static struct acpi_platform_list pmcg_plat_info[] __initdata = {
+	/* HiSilicon Hip08 Platform */
+	{"HISI  ", "HIP08   ", 0, ACPI_SIG_IORT, greater_than_or_equal,
+	 "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08},
+	{ }
+};
+
 static int __init arm_smmu_v3_pmcg_add_platdata(struct platform_device *pdev)
 {
-	u32 model = IORT_SMMU_V3_PMCG_GENERIC;
+	u32 model;
+	int idx;
+
+	idx = acpi_match_platform_list(pmcg_plat_info);
+	if (idx >= 0)
+		model = pmcg_plat_info[idx].data;
+	else
+		model = IORT_SMMU_V3_PMCG_GENERIC;
 
 	return platform_device_add_data(pdev, &model, sizeof(model));
 }
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index a4f4b488a2de8..da71c741cb46c 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -35,6 +35,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/acpi_iort.h>
 #include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/cpuhotplug.h>
@@ -93,6 +94,8 @@
 
 #define SMMU_PMCG_PA_SHIFT              12
 
+#define SMMU_PMCG_EVCNTR_RDONLY         BIT(0)
+
 static int cpuhp_state_num;
 
 struct smmu_pmu {
@@ -108,6 +111,7 @@ struct smmu_pmu {
 	void __iomem *reg_base;
 	void __iomem *reloc_base;
 	u64 counter_mask;
+	u32 options;
 	bool global_filter;
 	u32 global_filter_span;
 	u32 global_filter_sid;
@@ -222,15 +226,27 @@ static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu,
 	u32 idx = hwc->idx;
 	u64 new;
 
-	/*
-	 * We limit the max period to half the max counter value of the counter
-	 * size, so that even in the case of extreme interrupt latency the
-	 * counter will (hopefully) not wrap past its initial value.
-	 */
-	new = smmu_pmu->counter_mask >> 1;
+	if (smmu_pmu->options & SMMU_PMCG_EVCNTR_RDONLY) {
+		/*
+		 * On platforms that require this quirk, if the counter starts
+		 * at < half_counter value and wraps, the current logic of
+		 * handling the overflow may not work. It is expected that,
+		 * those platforms will have full 64 counter bits implemented
+		 * so that such a possibility is remote(eg: HiSilicon HIP08).
+		 */
+		new = smmu_pmu_counter_get_value(smmu_pmu, idx);
+	} else {
+		/*
+		 * We limit the max period to half the max counter value
+		 * of the counter size, so that even in the case of extreme
+		 * interrupt latency the counter will (hopefully) not wrap
+		 * past its initial value.
+		 */
+		new = smmu_pmu->counter_mask >> 1;
+		smmu_pmu_counter_set_value(smmu_pmu, idx, new);
+	}
 
 	local64_set(&hwc->prev_count, new);
-	smmu_pmu_counter_set_value(smmu_pmu, idx, new);
 }
 
 static void smmu_pmu_set_event_filter(struct perf_event *event,
@@ -665,6 +681,22 @@ static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
 		       smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
 }
 
+static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
+{
+	u32 model;
+
+	model = *(u32 *)dev_get_platdata(smmu_pmu->dev);
+
+	switch (model) {
+	case IORT_SMMU_V3_PMCG_HISI_HIP08:
+		/* HiSilicon Erratum 162001800 */
+		smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY;
+		break;
+	}
+
+	dev_notice(smmu_pmu->dev, "option mask 0x%x\n", smmu_pmu->options);
+}
+
 static int smmu_pmu_probe(struct platform_device *pdev)
 {
 	struct smmu_pmu *smmu_pmu;
@@ -744,6 +776,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	smmu_pmu_get_acpi_options(smmu_pmu);
+
 	/* Pick one CPU to be the preferred one to use */
 	smmu_pmu->on_cpu = raw_smp_processor_id();
 	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 052ef7b9f985a..723e4dfa1c149 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -32,6 +32,7 @@
  * do with hardware or with IORT specification.
  */
 #define IORT_SMMU_V3_PMCG_GENERIC        0x00000000 /* Generic SMMUv3 PMCG */
+#define IORT_SMMU_V3_PMCG_HISI_HIP08     0x00000001 /* HiSilicon HIP08 PMCG */
 
 int iort_register_domain_token(int trans_id, phys_addr_t base,
 			       struct fwnode_handle *fw_node);
-- 
GitLab


From 50398fde997f6be8faebdb5f38e9c9c467370f51 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 2 Apr 2019 18:07:38 +0800
Subject: [PATCH 0632/1861] btrfs: prop: fix zstd compression parameter
 validation

We let pass zstd compression parameter even if it is not fully valid.
For example:

  $ btrfs prop set /btrfs compression zst
  $ btrfs prop get /btrfs compression
     compression=zst

zlib and lzo are fine.

Fix it by checking the correct prefix length.

Fixes: 5c1aab1dd544 ("btrfs: Add zstd support")
CC: stable@vger.kernel.org # 4.14+
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/props.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index dc6140013ae81..fd19f30785667 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -396,7 +396,7 @@ static int prop_compression_apply(struct inode *inode,
 		btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
 	} else if (!strncmp("zlib", value, 4)) {
 		type = BTRFS_COMPRESS_ZLIB;
-	} else if (!strncmp("zstd", value, len)) {
+	} else if (!strncmp("zstd", value, 4)) {
 		type = BTRFS_COMPRESS_ZSTD;
 		btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
 	} else {
-- 
GitLab


From 272e5326c7837697882ce3162029ba893059b616 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 2 Apr 2019 18:07:40 +0800
Subject: [PATCH 0633/1861] btrfs: prop: fix vanished compression property
 after failed set

The compression property resets to NULL, instead of the old value if we
fail to set the new compression parameter.

  $ btrfs prop get /btrfs compression
    compression=lzo
  $ btrfs prop set /btrfs compression zli
    ERROR: failed to set compression for /btrfs: Invalid argument
  $ btrfs prop get /btrfs compression

This is because the compression property ->validate() is successful for
'zli' as the strncmp() used the length passed from the userspace.

Fix it by using the expected string length in strncmp().

Fixes: 63541927c8d1 ("Btrfs: add support for inode properties")
Fixes: 5c1aab1dd544 ("btrfs: Add zstd support")
CC: stable@vger.kernel.org # 4.14+
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/props.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index fd19f30785667..61d22a56c0ba4 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -366,11 +366,11 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
 
 static int prop_compression_validate(const char *value, size_t len)
 {
-	if (!strncmp("lzo", value, len))
+	if (!strncmp("lzo", value, 3))
 		return 0;
-	else if (!strncmp("zlib", value, len))
+	else if (!strncmp("zlib", value, 4))
 		return 0;
-	else if (!strncmp("zstd", value, len))
+	else if (!strncmp("zstd", value, 4))
 		return 0;
 
 	return -EINVAL;
-- 
GitLab


From 3a39a12ad364a9acd1038ba8da67cd8430f30de4 Mon Sep 17 00:00:00 2001
From: Liubin Shu <shuliubin@huawei.com>
Date: Thu, 4 Apr 2019 16:46:42 +0800
Subject: [PATCH 0634/1861] net: hns: fix KASAN: use-after-free in
 hns_nic_net_xmit_hw()

This patch is trying to fix the issue due to:
[27237.844750] BUG: KASAN: use-after-free in hns_nic_net_xmit_hw+0x708/0xa18[hns_enet_drv]

After hnae_queue_xmit() in hns_nic_net_xmit_hw(), can be
interrupted by interruptions, and than call hns_nic_tx_poll_one()
to handle the new packets, and free the skb. So, when turn back to
hns_nic_net_xmit_hw(), calling skb->len will cause use-after-free.

This patch update tx ring statistics in hns_nic_tx_poll_one() to
fix the bug.

Signed-off-by: Liubin Shu <shuliubin@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 60e7d7ae3787c..e5a7c0761dbd2 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -376,8 +376,6 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,
 	wmb(); /* commit all data before submit */
 	assert(skb->queue_mapping < priv->ae_handle->q_num);
 	hnae_queue_xmit(priv->ae_handle->qs[skb->queue_mapping], buf_num);
-	ring->stats.tx_pkts++;
-	ring->stats.tx_bytes += skb->len;
 
 	return NETDEV_TX_OK;
 
@@ -999,6 +997,9 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
 		/* issue prefetch for next Tx descriptor */
 		prefetch(&ring->desc_cb[ring->next_to_clean]);
 	}
+	/* update tx ring statistics. */
+	ring->stats.tx_pkts += pkts;
+	ring->stats.tx_bytes += bytes;
 
 	NETIF_TX_UNLOCK(ring);
 
-- 
GitLab


From acb1ce15a61154aa501891d67ebf79bc9ea26818 Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Thu, 4 Apr 2019 16:46:43 +0800
Subject: [PATCH 0635/1861] net: hns: Use NAPI_POLL_WEIGHT for hns driver

When the HNS driver loaded, always have an error print:
"netif_napi_add() called with weight 256"

This is because the kernel checks the NAPI polling weights
requested by drivers and it prints an error message if a driver
requests a weight bigger than 64.

So use NAPI_POLL_WEIGHT to fix it.

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index e5a7c0761dbd2..4cd86ba1f050d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -29,9 +29,6 @@
 
 #define SERVICE_TIMER_HZ (1 * HZ)
 
-#define NIC_TX_CLEAN_MAX_NUM 256
-#define NIC_RX_CLEAN_MAX_NUM 64
-
 #define RCB_IRQ_NOT_INITED 0
 #define RCB_IRQ_INITED 1
 #define HNS_BUFFER_SIZE_2048 2048
@@ -2153,7 +2150,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv)
 			hns_nic_tx_fini_pro_v2;
 
 		netif_napi_add(priv->netdev, &rd->napi,
-			       hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM);
+			       hns_nic_common_poll, NAPI_POLL_WEIGHT);
 		rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED;
 	}
 	for (i = h->q_num; i < h->q_num * 2; i++) {
@@ -2166,7 +2163,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv)
 			hns_nic_rx_fini_pro_v2;
 
 		netif_napi_add(priv->netdev, &rd->napi,
-			       hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM);
+			       hns_nic_common_poll, NAPI_POLL_WEIGHT);
 		rd->ring->irq_init_flag = RCB_IRQ_NOT_INITED;
 	}
 
-- 
GitLab


From c0b0984426814f3a9251873b689e67d34d8ccd84 Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Thu, 4 Apr 2019 16:46:44 +0800
Subject: [PATCH 0636/1861] net: hns: Fix probabilistic memory overwrite when
 HNS driver initialized

When reboot the system again and again, may cause a memory
overwrite.

[   15.638922] systemd[1]: Reached target Swap.
[   15.667561] tun: Universal TUN/TAP device driver, 1.6
[   15.676756] Bridge firewalling registered
[   17.344135] Unable to handle kernel paging request at virtual address 0000000200000040
[   17.352179] Mem abort info:
[   17.355007]   ESR = 0x96000004
[   17.358105]   Exception class = DABT (current EL), IL = 32 bits
[   17.364112]   SET = 0, FnV = 0
[   17.367209]   EA = 0, S1PTW = 0
[   17.370393] Data abort info:
[   17.373315]   ISV = 0, ISS = 0x00000004
[   17.377206]   CM = 0, WnR = 0
[   17.380214] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____)
[   17.386926] [0000000200000040] pgd=0000000000000000
[   17.391878] Internal error: Oops: 96000004 [#1] SMP
[   17.396824] CPU: 23 PID: 95 Comm: kworker/u130:0 Tainted: G            E     4.19.25-1.2.78.aarch64 #1
[   17.414175] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.54 08/16/2018
[   17.425615] Workqueue: events_unbound async_run_entry_fn
[   17.435151] pstate: 00000005 (nzcv daif -PAN -UAO)
[   17.444139] pc : __mutex_lock.isra.1+0x74/0x540
[   17.453002] lr : __mutex_lock.isra.1+0x3c/0x540
[   17.461701] sp : ffff000100d9bb60
[   17.469146] x29: ffff000100d9bb60 x28: 0000000000000000
[   17.478547] x27: 0000000000000000 x26: ffff802fb8945000
[   17.488063] x25: 0000000000000000 x24: ffff802fa32081a8
[   17.497381] x23: 0000000000000002 x22: ffff801fa2b15220
[   17.506701] x21: ffff000009809000 x20: ffff802fa23a0888
[   17.515980] x19: ffff801fa2b15220 x18: 0000000000000000
[   17.525272] x17: 0000000200000000 x16: 0000000200000000
[   17.534511] x15: 0000000000000000 x14: 0000000000000000
[   17.543652] x13: ffff000008d95db8 x12: 000000000000000d
[   17.552780] x11: ffff000008d95d90 x10: 0000000000000b00
[   17.561819] x9 : ffff000100d9bb90 x8 : ffff802fb89d6560
[   17.570829] x7 : 0000000000000004 x6 : 00000004a1801d05
[   17.579839] x5 : 0000000000000000 x4 : 0000000000000000
[   17.588852] x3 : ffff802fb89d5a00 x2 : 0000000000000000
[   17.597734] x1 : 0000000200000000 x0 : 0000000200000000
[   17.606631] Process kworker/u130:0 (pid: 95, stack limit = 0x(____ptrval____))
[   17.617438] Call trace:
[   17.623349]  __mutex_lock.isra.1+0x74/0x540
[   17.630927]  __mutex_lock_slowpath+0x24/0x30
[   17.638602]  mutex_lock+0x50/0x60
[   17.645295]  drain_workqueue+0x34/0x198
[   17.652623]  __sas_drain_work+0x7c/0x168
[   17.659903]  sas_drain_work+0x60/0x68
[   17.666947]  hisi_sas_scan_finished+0x30/0x40 [hisi_sas_main]
[   17.676129]  do_scsi_scan_host+0x70/0xb0
[   17.683534]  do_scan_async+0x20/0x228
[   17.690586]  async_run_entry_fn+0x4c/0x1d0
[   17.697997]  process_one_work+0x1b4/0x3f8
[   17.705296]  worker_thread+0x54/0x470

Every time the call trace is not the same, but the overwrite address
is always the same:
Unable to handle kernel paging request at virtual address 0000000200000040

The root cause is, when write the reg XGMAC_MAC_TX_LF_RF_CONTROL_REG,
didn't use the io_base offset.

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index ba4316910dea1..a60f207768fc7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -129,7 +129,7 @@ static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv)
 	dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0);
 	dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1);
 	dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0);
-	dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val);
+	dsaf_write_dev(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val);
 }
 
 /**
-- 
GitLab


From f058e46855dcbc28edb2ed4736f38a71fd19cadb Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Thu, 4 Apr 2019 16:46:45 +0800
Subject: [PATCH 0637/1861] net: hns: fix ICMP6 neighbor solicitation messages
 discard problem

ICMP6 neighbor solicitation messages will be discard by the Hip06
chips, because of not setting forwarding pool. Enable promisc mode
has the same problem.

This patch fix the wrong forwarding table configs for the multicast
vague matching when enable promisc mode, and add forwarding pool
for the forwarding table.

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns/hns_dsaf_main.c    | 33 +++++++++++++++----
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index ac55db065f167..f5ff07cb2b729 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -2750,6 +2750,17 @@ int hns_dsaf_get_regs_count(void)
 	return DSAF_DUMP_REGS_NUM;
 }
 
+static int hns_dsaf_get_port_id(u8 port)
+{
+	if (port < DSAF_SERVICE_NW_NUM)
+		return port;
+
+	if (port >= DSAF_BASE_INNER_PORT_NUM)
+		return port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM;
+
+	return -EINVAL;
+}
+
 static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port)
 {
 	struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80};
@@ -2815,23 +2826,33 @@ static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port)
 	memset(&temp_key, 0x0, sizeof(temp_key));
 	mask_entry.addr[0] = 0x01;
 	hns_dsaf_set_mac_key(dsaf_dev, &mask_key, mask_entry.in_vlan_id,
-			     port, mask_entry.addr);
+			     0xf, mask_entry.addr);
 	tbl_tcam_mcast.tbl_mcast_item_vld = 1;
 	tbl_tcam_mcast.tbl_mcast_old_en = 0;
 
-	if (port < DSAF_SERVICE_NW_NUM) {
-		mskid = port;
-	} else if (port >= DSAF_BASE_INNER_PORT_NUM) {
-		mskid = port - DSAF_BASE_INNER_PORT_NUM + DSAF_SERVICE_NW_NUM;
-	} else {
+	/* set MAC port to handle multicast */
+	mskid = hns_dsaf_get_port_id(port);
+	if (mskid == -EINVAL) {
 		dev_err(dsaf_dev->dev, "%s,pnum(%d)error,key(%#x:%#x)\n",
 			dsaf_dev->ae_dev.name, port,
 			mask_key.high.val, mask_key.low.val);
 		return;
 	}
+	dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32],
+		     mskid % 32, 1);
 
+	/* set pool bit map to handle multicast */
+	mskid = hns_dsaf_get_port_id(port_num);
+	if (mskid == -EINVAL) {
+		dev_err(dsaf_dev->dev,
+			"%s, pool bit map pnum(%d)error,key(%#x:%#x)\n",
+			dsaf_dev->ae_dev.name, port_num,
+			mask_key.high.val, mask_key.low.val);
+		return;
+	}
 	dsaf_set_bit(tbl_tcam_mcast.tbl_mcast_port_msk[mskid / 32],
 		     mskid % 32, 1);
+
 	memcpy(&temp_key, &mask_key, sizeof(mask_key));
 	hns_dsaf_tcam_mc_cfg_vague(dsaf_dev, entry_index, &tbl_tcam_data_mc,
 				   (struct dsaf_tbl_tcam_data *)(&mask_key),
-- 
GitLab


From 8601a99d7c0256b7a7fdd1ab14cf6c1f1dfcadc6 Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Thu, 4 Apr 2019 16:46:46 +0800
Subject: [PATCH 0638/1861] net: hns: Fix WARNING when remove HNS driver with
 SMMU enabled

When enable SMMU, remove HNS driver will cause a WARNING:

[  141.924177] WARNING: CPU: 36 PID: 2708 at drivers/iommu/dma-iommu.c:443 __iommu_dma_unmap+0xc0/0xc8
[  141.954673] Modules linked in: hns_enet_drv(-)
[  141.963615] CPU: 36 PID: 2708 Comm: rmmod Tainted: G        W         5.0.0-rc1-28723-gb729c57de95c-dirty #32
[  141.983593] Hardware name: Huawei D05/D05, BIOS Hisilicon D05 UEFI Nemo 1.8 RC0 08/31/2017
[  142.000244] pstate: 60000005 (nZCv daif -PAN -UAO)
[  142.009886] pc : __iommu_dma_unmap+0xc0/0xc8
[  142.018476] lr : __iommu_dma_unmap+0xc0/0xc8
[  142.027066] sp : ffff000013533b90
[  142.033728] x29: ffff000013533b90 x28: ffff8013e6983600
[  142.044420] x27: 0000000000000000 x26: 0000000000000000
[  142.055113] x25: 0000000056000000 x24: 0000000000000015
[  142.065806] x23: 0000000000000028 x22: ffff8013e66eee68
[  142.076499] x21: ffff8013db919800 x20: 0000ffffefbff000
[  142.087192] x19: 0000000000001000 x18: 0000000000000007
[  142.097885] x17: 000000000000000e x16: 0000000000000001
[  142.108578] x15: 0000000000000019 x14: 363139343a70616d
[  142.119270] x13: 6e75656761705f67 x12: 0000000000000000
[  142.129963] x11: 00000000ffffffff x10: 0000000000000006
[  142.140656] x9 : 1346c1aa88093500 x8 : ffff0000114de4e0
[  142.151349] x7 : 6662666578303d72 x6 : ffff0000105ffec8
[  142.162042] x5 : 0000000000000000 x4 : 0000000000000000
[  142.172734] x3 : 00000000ffffffff x2 : ffff0000114de500
[  142.183427] x1 : 0000000000000000 x0 : 0000000000000035
[  142.194120] Call trace:
[  142.199030]  __iommu_dma_unmap+0xc0/0xc8
[  142.206920]  iommu_dma_unmap_page+0x20/0x28
[  142.215335]  __iommu_unmap_page+0x40/0x60
[  142.223399]  hnae_unmap_buffer+0x110/0x134
[  142.231639]  hnae_free_desc+0x6c/0x10c
[  142.239177]  hnae_fini_ring+0x14/0x34
[  142.246540]  hnae_fini_queue+0x2c/0x40
[  142.254080]  hnae_put_handle+0x38/0xcc
[  142.261619]  hns_nic_dev_remove+0x54/0xfc [hns_enet_drv]
[  142.272312]  platform_drv_remove+0x24/0x64
[  142.280552]  device_release_driver_internal+0x17c/0x20c
[  142.291070]  driver_detach+0x4c/0x90
[  142.298259]  bus_remove_driver+0x5c/0xd8
[  142.306148]  driver_unregister+0x2c/0x54
[  142.314037]  platform_driver_unregister+0x10/0x18
[  142.323505]  hns_nic_dev_driver_exit+0x14/0xf0c [hns_enet_drv]
[  142.335248]  __arm64_sys_delete_module+0x214/0x25c
[  142.344891]  el0_svc_common+0xb0/0x10c
[  142.352430]  el0_svc_handler+0x24/0x80
[  142.359968]  el0_svc+0x8/0x7c0
[  142.366104] ---[ end trace 60ad1cd58e63c407 ]---

The tx ring buffer map when xmit and unmap when xmit done. So in
hnae_init_ring() did not map tx ring buffer, but in hnae_fini_ring()
have a unmap operation for tx ring buffer, which is already unmapped
when xmit done, than cause this WARNING.

The hnae_alloc_buffers() is called in hnae_init_ring(),
so the hnae_free_buffers() should be in hnae_fini_ring(), not in
hnae_free_desc().

In hnae_fini_ring(), adds a check is_rx_ring() as in hnae_init_ring().
When the ring buffer is tx ring, adds a piece of code to ensure that
the tx ring is unmap.

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hnae.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 79d03f8ee7b18..c7fa97a7e1f4d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -150,7 +150,6 @@ out_buffer_fail:
 /* free desc along with its attached buffer */
 static void hnae_free_desc(struct hnae_ring *ring)
 {
-	hnae_free_buffers(ring);
 	dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr,
 			 ring->desc_num * sizeof(ring->desc[0]),
 			 ring_to_dma_dir(ring));
@@ -183,6 +182,9 @@ static int hnae_alloc_desc(struct hnae_ring *ring)
 /* fini ring, also free the buffer for the ring */
 static void hnae_fini_ring(struct hnae_ring *ring)
 {
+	if (is_rx_ring(ring))
+		hnae_free_buffers(ring);
+
 	hnae_free_desc(ring);
 	kfree(ring->desc_cb);
 	ring->desc_cb = NULL;
-- 
GitLab


From 15400663aba5de11e99a9a2a35bfb2bae65e28e0 Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Thu, 4 Apr 2019 16:46:47 +0800
Subject: [PATCH 0639/1861] net: hns: Fix sparse: some warnings in HNS drivers

There are some sparse warnings in the HNS drivers:

warning: incorrect type in assignment (different address spaces)
    expected void [noderef] <asn:2> *io_base
    got void *vaddr
warning: cast removes address space '<asn:2>' of expression
[...]

Add __iomem and change all the u8 __iomem to void __iomem to
fix these kind of  warnings.

warning: incorrect type in argument 1 (different address spaces)
    expected void [noderef] <asn:2> *base
    got unsigned char [usertype] *base_addr
warning: cast to restricted __le16
warning: incorrect type in assignment (different base types)
    expected unsigned int [usertype] tbl_tcam_data_high
    got restricted __le32 [usertype]
warning: cast to restricted __le32
[...]

These variables used u32/u16 as their type, and finally as a
parameter of writel(), writel() will do the cpu_to_le32 coversion
so remove the little endian covert code to fix these kind of warnings.

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hnae.h     |  2 +-
 .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c |  2 +-
 .../net/ethernet/hisilicon/hns/hns_dsaf_mac.h |  4 ++--
 .../ethernet/hisilicon/hns/hns_dsaf_main.c    | 20 ++++++-------------
 .../ethernet/hisilicon/hns/hns_dsaf_main.h    |  2 ++
 .../ethernet/hisilicon/hns/hns_dsaf_misc.c    |  2 +-
 .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.c |  6 +++---
 .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.h |  4 ++--
 .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.c |  4 ++--
 .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 12 +++++------
 drivers/net/ethernet/hisilicon/hns_mdio.c     | 18 +++++++----------
 11 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 08a750fb60c49..d6fb834372304 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -357,7 +357,7 @@ struct hnae_buf_ops {
 };
 
 struct hnae_queue {
-	void __iomem *io_base;
+	u8 __iomem *io_base;
 	phys_addr_t phy_base;
 	struct hnae_ae_dev *dev;	/* the device who use this queue */
 	struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index a97228c93831d..6c0507921623b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -370,7 +370,7 @@ int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn)
 static void hns_mac_param_get(struct mac_params *param,
 			      struct hns_mac_cb *mac_cb)
 {
-	param->vaddr = (void *)mac_cb->vaddr;
+	param->vaddr = mac_cb->vaddr;
 	param->mac_mode = hns_get_enet_interface(mac_cb);
 	ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr);
 	param->mac_id = mac_cb->mac_id;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index fbc75341bef76..22589799f1a57 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -187,7 +187,7 @@ struct mac_statistics {
 /*mac para struct ,mac get param from nic or dsaf when initialize*/
 struct mac_params {
 	char addr[ETH_ALEN];
-	void *vaddr; /*virtual address*/
+	u8 __iomem *vaddr; /*virtual address*/
 	struct device *dev;
 	u8 mac_id;
 	/**< Ethernet operation mode (MAC-PHY interface and speed) */
@@ -402,7 +402,7 @@ struct mac_driver {
 	enum mac_mode mac_mode;
 	u8 mac_id;
 	struct hns_mac_cb *mac_cb;
-	void __iomem *io_base;
+	u8 __iomem *io_base;
 	unsigned int mac_en_flg;/*you'd better don't enable mac twice*/
 	unsigned int virt_dev_num;
 	struct device *dev;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index f5ff07cb2b729..61eea6ac846fc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -1602,8 +1602,6 @@ static void hns_dsaf_set_mac_key(
 		       DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
 	dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,
 		       DSAF_TBL_TCAM_KEY_PORT_S, port);
-
-	mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan);
 }
 
 /**
@@ -1663,8 +1661,8 @@ int hns_dsaf_set_mac_uc_entry(
 	/* default config dvc to 0 */
 	mac_data.tbl_ucast_dvc = 0;
 	mac_data.tbl_ucast_out_port = mac_entry->port_num;
-	tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
-	tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+	tcam_data.tbl_tcam_data_high = mac_key.high.val;
+	tcam_data.tbl_tcam_data_low = mac_key.low.val;
 
 	hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data);
 
@@ -1786,9 +1784,6 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 				     0xff,
 				     mc_mask);
 
-		mask_key.high.val = le32_to_cpu(mask_key.high.val);
-		mask_key.low.val = le32_to_cpu(mask_key.low.val);
-
 		pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
 	}
 
@@ -1840,8 +1835,8 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 		dsaf_dev->ae_dev.name, mac_key.high.val,
 		mac_key.low.val, entry_index);
 
-	tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
-	tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+	tcam_data.tbl_tcam_data_high = mac_key.high.val;
+	tcam_data.tbl_tcam_data_low = mac_key.low.val;
 
 	/* config mc entry with mask */
 	hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data,
@@ -1956,9 +1951,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 		/* config key mask */
 		hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_mask);
 
-		mask_key.high.val = le32_to_cpu(mask_key.high.val);
-		mask_key.low.val = le32_to_cpu(mask_key.low.val);
-
 		pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
 	}
 
@@ -2012,8 +2004,8 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 		soft_mac_entry += entry_index;
 		soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX;
 	} else { /* not zero, just del port, update */
-		tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
-		tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+		tcam_data.tbl_tcam_data_high = mac_key.high.val;
+		tcam_data.tbl_tcam_data_low = mac_key.low.val;
 
 		hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index,
 				     &tcam_data,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 0e1cd99831a60..76cc8887e1a83 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -467,4 +467,6 @@ int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev,
 			     u8 mac_id, u8 port_num);
 int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port);
 
+int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
+
 #endif /* __HNS_DSAF_MAIN_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 16294cd3c9545..19b94879691f8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -670,7 +670,7 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en)
 		dsaf_set_field(origin, 1ull << 10, 10, en);
 		dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin);
 	} else {
-		u8 *base_addr = (u8 *)mac_cb->serdes_vaddr +
+		u8 __iomem *base_addr = mac_cb->serdes_vaddr +
 				(mac_cb->mac_id <= 3 ? 0x00280000 : 0x00200000);
 		dsaf_set_reg_field(base_addr, reg_offset, 1ull << 10, 10, en);
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index 3d07c8a7639da..17c019106e6e4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -61,7 +61,7 @@ void hns_ppe_set_indir_table(struct hns_ppe_cb *ppe_cb,
 	}
 }
 
-static void __iomem *
+static u8 __iomem *
 hns_ppe_common_get_ioaddr(struct ppe_common_cb *ppe_common)
 {
 	return ppe_common->dsaf_dev->ppe_base + PPE_COMMON_REG_OFFSET;
@@ -111,8 +111,8 @@ hns_ppe_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index)
 	dsaf_dev->ppe_common[comm_index] = NULL;
 }
 
-static void __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common,
-					int ppe_idx)
+static u8 __iomem *hns_ppe_get_iobase(struct ppe_common_cb *ppe_common,
+				      int ppe_idx)
 {
 	return ppe_common->dsaf_dev->ppe_base + ppe_idx * PPE_REG_OFFSET;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
index f670e63a5a018..110c6e8222c70 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
@@ -80,7 +80,7 @@ struct hns_ppe_cb {
 	struct hns_ppe_hw_stats hw_stats;
 
 	u8 index;	/* index in a ppe common device */
-	void __iomem *io_base;
+	u8 __iomem *io_base;
 	int virq;
 	u32 rss_indir_table[HNS_PPEV2_RSS_IND_TBL_SIZE]; /*shadow indir tab */
 	u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]; /* rss hash key */
@@ -89,7 +89,7 @@ struct hns_ppe_cb {
 struct ppe_common_cb {
 	struct device *dev;
 	struct dsaf_device *dsaf_dev;
-	void __iomem *io_base;
+	u8 __iomem *io_base;
 
 	enum ppe_common_mode ppe_mode;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 6bf346c11b25a..ac3518ca4d7be 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -458,7 +458,7 @@ static void hns_rcb_ring_get_cfg(struct hnae_queue *q, int ring_type)
 		mdnum_ppkt = HNS_RCB_RING_MAX_BD_PER_PKT;
 	} else {
 		ring = &q->tx_ring;
-		ring->io_base = (u8 __iomem *)ring_pair_cb->q.io_base +
+		ring->io_base = ring_pair_cb->q.io_base +
 			HNS_RCB_TX_REG_OFFSET;
 		irq_idx = HNS_RCB_IRQ_IDX_TX;
 		mdnum_ppkt = is_ver1 ? HNS_RCB_RING_MAX_TXBD_PER_PKT :
@@ -764,7 +764,7 @@ static int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev)
 	}
 }
 
-static void __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common)
+static u8 __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common)
 {
 	struct dsaf_device *dsaf_dev = rcb_common->dsaf_dev;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index b9733b0b84826..b9e7f11f08968 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -1018,7 +1018,7 @@
 #define XGMAC_PAUSE_CTL_RSP_MODE_B	2
 #define XGMAC_PAUSE_CTL_TX_XOFF_B	3
 
-static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value)
+static inline void dsaf_write_reg(u8 __iomem *base, u32 reg, u32 value)
 {
 	writel(value, base + reg);
 }
@@ -1053,7 +1053,7 @@ static inline int dsaf_read_syscon(struct regmap *base, u32 reg, u32 *val)
 #define dsaf_set_bit(origin, shift, val) \
 	dsaf_set_field((origin), (1ull << (shift)), (shift), (val))
 
-static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask,
+static inline void dsaf_set_reg_field(u8 __iomem *base, u32 reg, u32 mask,
 				      u32 shift, u32 val)
 {
 	u32 origin = dsaf_read_reg(base, reg);
@@ -1073,7 +1073,7 @@ static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask,
 #define dsaf_get_bit(origin, shift) \
 	dsaf_get_field((origin), (1ull << (shift)), (shift))
 
-static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask,
+static inline u32 dsaf_get_reg_field(u8 __iomem *base, u32 reg, u32 mask,
 				     u32 shift)
 {
 	u32 origin;
@@ -1089,11 +1089,11 @@ static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask,
 	dsaf_get_reg_field((dev)->io_base, (reg), (1ull << (bit)), (bit))
 
 #define dsaf_write_b(addr, data)\
-	writeb((data), (__iomem unsigned char *)(addr))
+	writeb((data), (__iomem u8 *)(addr))
 #define dsaf_read_b(addr)\
-	readb((__iomem unsigned char *)(addr))
+	readb((__iomem u8 *)(addr))
 
 #define hns_mac_reg_read64(drv, offset) \
-	readq((__iomem void *)(((u8 *)(drv)->io_base + 0xc00 + (offset))))
+	readq((__iomem void *)(((drv)->io_base + 0xc00 + (offset))))
 
 #endif	/* _DSAF_REG_H */
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index baf5cc251f329..8b8a7d00e8e0c 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -39,7 +39,7 @@ struct hns_mdio_sc_reg {
 };
 
 struct hns_mdio_device {
-	void *vbase;		/* mdio reg base address */
+	u8 __iomem *vbase;		/* mdio reg base address */
 	struct regmap *subctrl_vbase;
 	struct hns_mdio_sc_reg sc_reg;
 };
@@ -96,21 +96,17 @@ enum mdio_c45_op_seq {
 #define MDIO_SC_CLK_ST		0x531C
 #define MDIO_SC_RESET_ST	0x5A1C
 
-static void mdio_write_reg(void *base, u32 reg, u32 value)
+static void mdio_write_reg(u8 __iomem *base, u32 reg, u32 value)
 {
-	u8 __iomem *reg_addr = (u8 __iomem *)base;
-
-	writel_relaxed(value, reg_addr + reg);
+	writel_relaxed(value, base + reg);
 }
 
 #define MDIO_WRITE_REG(a, reg, value) \
 	mdio_write_reg((a)->vbase, (reg), (value))
 
-static u32 mdio_read_reg(void *base, u32 reg)
+static u32 mdio_read_reg(u8 __iomem *base, u32 reg)
 {
-	u8 __iomem *reg_addr = (u8 __iomem *)base;
-
-	return readl_relaxed(reg_addr + reg);
+	return readl_relaxed(base + reg);
 }
 
 #define mdio_set_field(origin, mask, shift, val) \
@@ -121,7 +117,7 @@ static u32 mdio_read_reg(void *base, u32 reg)
 
 #define mdio_get_field(origin, mask, shift) (((origin) >> (shift)) & (mask))
 
-static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift,
+static void mdio_set_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift,
 			       u32 val)
 {
 	u32 origin = mdio_read_reg(base, reg);
@@ -133,7 +129,7 @@ static void mdio_set_reg_field(void *base, u32 reg, u32 mask, u32 shift,
 #define MDIO_SET_REG_FIELD(dev, reg, mask, shift, val) \
 	mdio_set_reg_field((dev)->vbase, (reg), (mask), (shift), (val))
 
-static u32 mdio_get_reg_field(void *base, u32 reg, u32 mask, u32 shift)
+static u32 mdio_get_reg_field(u8 __iomem *base, u32 reg, u32 mask, u32 shift)
 {
 	u32 origin;
 
-- 
GitLab


From 2ec1ed2aa68782b342458681aa4d16b65c9014d6 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 4 Apr 2019 12:16:27 +0200
Subject: [PATCH 0640/1861] net: thunderx: fix NULL pointer dereference in
 nicvf_open/nicvf_stop

When a bpf program is uploaded, the driver computes the number of
xdp tx queues resulting in the allocation of additional qsets.
Starting from commit '2ecbe4f4a027 ("net: thunderx: replace global
nicvf_rx_mode_wq work queue for all VFs to private for each of them")'
the driver runs link state polling for each VF resulting in the
following NULL pointer dereference:

[   56.169256] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020
[   56.178032] Mem abort info:
[   56.180834]   ESR = 0x96000005
[   56.183877]   Exception class = DABT (current EL), IL = 32 bits
[   56.189792]   SET = 0, FnV = 0
[   56.192834]   EA = 0, S1PTW = 0
[   56.195963] Data abort info:
[   56.198831]   ISV = 0, ISS = 0x00000005
[   56.202662]   CM = 0, WnR = 0
[   56.205619] user pgtable: 64k pages, 48-bit VAs, pgdp = 0000000021f0c7a0
[   56.212315] [0000000000000020] pgd=0000000000000000, pud=0000000000000000
[   56.219094] Internal error: Oops: 96000005 [#1] SMP
[   56.260459] CPU: 39 PID: 2034 Comm: ip Not tainted 5.1.0-rc3+ #3
[   56.266452] Hardware name: GIGABYTE R120-T33/MT30-GS1, BIOS T49 02/02/2018
[   56.273315] pstate: 80000005 (Nzcv daif -PAN -UAO)
[   56.278098] pc : __ll_sc___cmpxchg_case_acq_64+0x4/0x20
[   56.283312] lr : mutex_lock+0x2c/0x50
[   56.286962] sp : ffff0000219af1b0
[   56.290264] x29: ffff0000219af1b0 x28: ffff800f64de49a0
[   56.295565] x27: 0000000000000000 x26: 0000000000000015
[   56.300865] x25: 0000000000000000 x24: 0000000000000000
[   56.306165] x23: 0000000000000000 x22: ffff000011117000
[   56.311465] x21: ffff800f64dfc080 x20: 0000000000000020
[   56.316766] x19: 0000000000000020 x18: 0000000000000001
[   56.322066] x17: 0000000000000000 x16: ffff800f2e077080
[   56.327367] x15: 0000000000000004 x14: 0000000000000000
[   56.332667] x13: ffff000010964438 x12: 0000000000000002
[   56.337967] x11: 0000000000000000 x10: 0000000000000c70
[   56.343268] x9 : ffff0000219af120 x8 : ffff800f2e077d50
[   56.348568] x7 : 0000000000000027 x6 : 000000062a9d6a84
[   56.353869] x5 : 0000000000000000 x4 : ffff800f2e077480
[   56.359169] x3 : 0000000000000008 x2 : ffff800f2e077080
[   56.364469] x1 : 0000000000000000 x0 : 0000000000000020
[   56.369770] Process ip (pid: 2034, stack limit = 0x00000000c862da3a)
[   56.376110] Call trace:
[   56.378546]  __ll_sc___cmpxchg_case_acq_64+0x4/0x20
[   56.383414]  drain_workqueue+0x34/0x198
[   56.387247]  nicvf_open+0x48/0x9e8 [nicvf]
[   56.391334]  nicvf_open+0x898/0x9e8 [nicvf]
[   56.395507]  nicvf_xdp+0x1bc/0x238 [nicvf]
[   56.399595]  dev_xdp_install+0x68/0x90
[   56.403333]  dev_change_xdp_fd+0xc8/0x240
[   56.407333]  do_setlink+0x8e0/0xbe8
[   56.410810]  __rtnl_newlink+0x5b8/0x6d8
[   56.414634]  rtnl_newlink+0x54/0x80
[   56.418112]  rtnetlink_rcv_msg+0x22c/0x2f8
[   56.422199]  netlink_rcv_skb+0x60/0x120
[   56.426023]  rtnetlink_rcv+0x28/0x38
[   56.429587]  netlink_unicast+0x1c8/0x258
[   56.433498]  netlink_sendmsg+0x1b4/0x350
[   56.437410]  sock_sendmsg+0x4c/0x68
[   56.440887]  ___sys_sendmsg+0x240/0x280
[   56.444711]  __sys_sendmsg+0x68/0xb0
[   56.448275]  __arm64_sys_sendmsg+0x2c/0x38
[   56.452361]  el0_svc_handler+0x9c/0x128
[   56.456186]  el0_svc+0x8/0xc
[   56.459056] Code: 35ffff91 2a1003e0 d65f03c0 f9800011 (c85ffc10)
[   56.465166] ---[ end trace 4a57fdc27b0a572c ]---
[   56.469772] Kernel panic - not syncing: Fatal exception

Fix it by checking nicvf_rx_mode_wq pointer in nicvf_open and nicvf_stop

Fixes: 2ecbe4f4a027 ("net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs to private for each of them")
Fixes: 2c632ad8bc74 ("net: thunderx: move link state polling function to VF")
Reported-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Tested-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/nicvf_main.c  | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index aa2be48071913..28eac90562113 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1328,10 +1328,11 @@ int nicvf_stop(struct net_device *netdev)
 	struct nicvf_cq_poll *cq_poll = NULL;
 	union nic_mbx mbx = {};
 
-	cancel_delayed_work_sync(&nic->link_change_work);
-
 	/* wait till all queued set_rx_mode tasks completes */
-	drain_workqueue(nic->nicvf_rx_mode_wq);
+	if (nic->nicvf_rx_mode_wq) {
+		cancel_delayed_work_sync(&nic->link_change_work);
+		drain_workqueue(nic->nicvf_rx_mode_wq);
+	}
 
 	mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
 	nicvf_send_msg_to_pf(nic, &mbx);
@@ -1452,7 +1453,8 @@ int nicvf_open(struct net_device *netdev)
 	struct nicvf_cq_poll *cq_poll = NULL;
 
 	/* wait till all queued set_rx_mode tasks completes if any */
-	drain_workqueue(nic->nicvf_rx_mode_wq);
+	if (nic->nicvf_rx_mode_wq)
+		drain_workqueue(nic->nicvf_rx_mode_wq);
 
 	netif_carrier_off(netdev);
 
@@ -1550,10 +1552,12 @@ int nicvf_open(struct net_device *netdev)
 	/* Send VF config done msg to PF */
 	nicvf_send_cfg_done(nic);
 
-	INIT_DELAYED_WORK(&nic->link_change_work,
-			  nicvf_link_status_check_task);
-	queue_delayed_work(nic->nicvf_rx_mode_wq,
-			   &nic->link_change_work, 0);
+	if (nic->nicvf_rx_mode_wq) {
+		INIT_DELAYED_WORK(&nic->link_change_work,
+				  nicvf_link_status_check_task);
+		queue_delayed_work(nic->nicvf_rx_mode_wq,
+				   &nic->link_change_work, 0);
+	}
 
 	return 0;
 cleanup:
-- 
GitLab


From fae2708174ae95d98d19f194e03d6e8f688ae195 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 4 Apr 2019 12:31:35 +0200
Subject: [PATCH 0641/1861] net/sched: act_sample: fix divide by zero in the
 traffic path

the control path of 'sample' action does not validate the value of 'rate'
provided by the user, but then it uses it as divisor in the traffic path.
Validate it in tcf_sample_init(), and return -EINVAL with a proper extack
message in case that value is zero, to fix a splat with the script below:

 # tc f a dev test0 egress matchall action sample rate 0 group 1 index 2
 # tc -s a s action sample
 total acts 1

         action order 0: sample rate 1/0 group 1 pipe
          index 2 ref 1 bind 1 installed 19 sec used 19 sec
         Action statistics:
         Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
         backlog 0b 0p requeues 0
 # ping 192.0.2.1 -I test0 -c1 -q

 divide error: 0000 [#1] SMP PTI
 CPU: 1 PID: 6192 Comm: ping Not tainted 5.1.0-rc2.diag2+ #591
 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 RIP: 0010:tcf_sample_act+0x9e/0x1e0 [act_sample]
 Code: 6a f1 85 c0 74 0d 80 3d 83 1a 00 00 00 0f 84 9c 00 00 00 4d 85 e4 0f 84 85 00 00 00 e8 9b d7 9c f1 44 8b 8b e0 00 00 00 31 d2 <41> f7 f1 85 d2 75 70 f6 85 83 00 00 00 10 48 8b 45 10 8b 88 08 01
 RSP: 0018:ffffae320190ba30 EFLAGS: 00010246
 RAX: 00000000b0677d21 RBX: ffff8af1ed9ec000 RCX: 0000000059a9fe49
 RDX: 0000000000000000 RSI: 000000000c7e33b7 RDI: ffff8af23daa0af0
 RBP: ffff8af1ee11b200 R08: 0000000074fcaf7e R09: 0000000000000000
 R10: 0000000000000050 R11: ffffffffb3088680 R12: ffff8af232307f80
 R13: 0000000000000003 R14: ffff8af1ed9ec000 R15: 0000000000000000
 FS:  00007fe9c6d2f740(0000) GS:ffff8af23da80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007fff6772f000 CR3: 00000000746a2004 CR4: 00000000001606e0
 Call Trace:
  tcf_action_exec+0x7c/0x1c0
  tcf_classify+0x57/0x160
  __dev_queue_xmit+0x3dc/0xd10
  ip_finish_output2+0x257/0x6d0
  ip_output+0x75/0x280
  ip_send_skb+0x15/0x40
  raw_sendmsg+0xae3/0x1410
  sock_sendmsg+0x36/0x40
  __sys_sendto+0x10e/0x140
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x60/0x210
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
  [...]
  Kernel panic - not syncing: Fatal exception in interrupt

Add a TDC selftest to document that 'rate' is now being validated.

Reported-by: Matteo Croce <mcroce@redhat.com>
Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Yotam Gigi <yotam.gi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_sample.c                        | 10 ++++++--
 .../tc-testing/tc-tests/actions/sample.json   | 24 +++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 4060b0955c97d..0f82d50ea2324 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -45,8 +45,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	struct nlattr *tb[TCA_SAMPLE_MAX + 1];
 	struct psample_group *psample_group;
 	struct tcf_chain *goto_ch = NULL;
+	u32 psample_group_num, rate;
 	struct tc_sample *parm;
-	u32 psample_group_num;
 	struct tcf_sample *s;
 	bool exists = false;
 	int ret, err;
@@ -85,6 +85,12 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	if (err < 0)
 		goto release_idr;
 
+	rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+	if (!rate) {
+		NL_SET_ERR_MSG(extack, "invalid sample rate");
+		err = -EINVAL;
+		goto put_chain;
+	}
 	psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
 	psample_group = psample_group_get(net, psample_group_num);
 	if (!psample_group) {
@@ -96,7 +102,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	spin_lock_bh(&s->tcf_lock);
 	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-	s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+	s->rate = rate;
 	s->psample_group_num = psample_group_num;
 	RCU_INIT_POINTER(s->psample_group, psample_group);
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
index 27f0acaed880e..ddabb160a11ba 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -143,6 +143,30 @@
             "$TC actions flush action sample"
         ]
     },
+    {
+        "id": "7571",
+        "name": "Add sample action with invalid rate",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 0 group 1 index 2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/0 group 1.*index 2 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
     {
         "id": "b6d4",
         "name": "Add sample action with mandatory arguments and invalid control action",
-- 
GitLab


From aecfde23108b8e637d9f5c5e523b24fb97035dc3 Mon Sep 17 00:00:00 2001
From: Koen De Schepper <koen.de_schepper@nokia-bell-labs.com>
Date: Thu, 4 Apr 2019 12:24:02 +0000
Subject: [PATCH 0642/1861] tcp: Ensure DCTCP reacts to losses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RFC8257 §3.5 explicitly states that "A DCTCP sender MUST react to
loss episodes in the same way as conventional TCP".

Currently, Linux DCTCP performs no cwnd reduction when losses
are encountered. Optionally, the dctcp_clamp_alpha_on_loss resets
alpha to its maximal value if a RTO happens. This behavior
is sub-optimal for at least two reasons: i) it ignores losses
triggering fast retransmissions; and ii) it causes unnecessary large
cwnd reduction in the future if the loss was isolated as it resets
the historical term of DCTCP's alpha EWMA to its maximal value (i.e.,
denoting a total congestion). The second reason has an especially
noticeable effect when using DCTCP in high BDP environments, where
alpha normally stays at low values.

This patch replace the clamping of alpha by setting ssthresh to
half of cwnd for both fast retransmissions and RTOs, at most once
per RTT. Consequently, the dctcp_clamp_alpha_on_loss module parameter
has been removed.

The table below shows experimental results where we measured the
drop probability of a PIE AQM (not applying ECN marks) at a
bottleneck in the presence of a single TCP flow with either the
alpha-clamping option enabled or the cwnd halving proposed by this
patch. Results using reno or cubic are given for comparison.

                          |  Link   |   RTT    |    Drop
                 TCP CC   |  speed  | base+AQM | probability
        ==================|=========|==========|============
                    CUBIC |  40Mbps |  7+20ms  |    0.21%
                     RENO |         |          |    0.19%
        DCTCP-CLAMP-ALPHA |         |          |   25.80%
         DCTCP-HALVE-CWND |         |          |    0.22%
        ------------------|---------|----------|------------
                    CUBIC | 100Mbps |  7+20ms  |    0.03%
                     RENO |         |          |    0.02%
        DCTCP-CLAMP-ALPHA |         |          |   23.30%
         DCTCP-HALVE-CWND |         |          |    0.04%
        ------------------|---------|----------|------------
                    CUBIC | 800Mbps |   1+1ms  |    0.04%
                     RENO |         |          |    0.05%
        DCTCP-CLAMP-ALPHA |         |          |   18.70%
         DCTCP-HALVE-CWND |         |          |    0.06%

We see that, without halving its cwnd for all source of losses,
DCTCP drives the AQM to large drop probabilities in order to keep
the queue length under control (i.e., it repeatedly faces RTOs).
Instead, if DCTCP reacts to all source of losses, it can then be
controlled by the AQM using similar drop levels than cubic or reno.

Signed-off-by: Koen De Schepper <koen.de_schepper@nokia-bell-labs.com>
Signed-off-by: Olivier Tilmans <olivier.tilmans@nokia-bell-labs.com>
Cc: Bob Briscoe <research@bobbriscoe.net>
Cc: Lawrence Brakmo <brakmo@fb.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Daniel Borkmann <borkmann@iogearbox.net>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Andrew Shewmaker <agshew@gmail.com>
Cc: Glenn Judd <glenn.judd@morganstanley.com>
Acked-by: Florian Westphal <fw@strlen.de>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_dctcp.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index cd4814f7e9622..359da68d7c062 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -67,11 +67,6 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
 module_param(dctcp_alpha_on_init, uint, 0644);
 MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
 
-static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
-module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
-MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
-		 "parameter for clamping alpha on loss");
-
 static struct tcp_congestion_ops dctcp_reno;
 
 static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
@@ -164,21 +159,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
 	}
 }
 
-static void dctcp_state(struct sock *sk, u8 new_state)
+static void dctcp_react_to_loss(struct sock *sk)
 {
-	if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
-		struct dctcp *ca = inet_csk_ca(sk);
+	struct dctcp *ca = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
 
-		/* If this extension is enabled, we clamp dctcp_alpha to
-		 * max on packet loss; the motivation is that dctcp_alpha
-		 * is an indicator to the extend of congestion and packet
-		 * loss is an indicator of extreme congestion; setting
-		 * this in practice turned out to be beneficial, and
-		 * effectively assumes total congestion which reduces the
-		 * window by half.
-		 */
-		ca->dctcp_alpha = DCTCP_MAX_ALPHA;
-	}
+	ca->loss_cwnd = tp->snd_cwnd;
+	tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
+static void dctcp_state(struct sock *sk, u8 new_state)
+{
+	if (new_state == TCP_CA_Recovery &&
+	    new_state != inet_csk(sk)->icsk_ca_state)
+		dctcp_react_to_loss(sk);
+	/* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+	 * one loss-adjustment per RTT.
+	 */
 }
 
 static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
@@ -190,6 +187,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 	case CA_EVENT_ECN_NO_CE:
 		dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
 		break;
+	case CA_EVENT_LOSS:
+		dctcp_react_to_loss(sk);
+		break;
 	default:
 		/* Don't care for the rest. */
 		break;
-- 
GitLab


From b2100cc56fca8c51d28aa42a9f1fbcb2cf351996 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 4 Apr 2019 15:01:33 +0200
Subject: [PATCH 0643/1861] sch_cake: Use tc_skb_protocol() helper for getting
 packet protocol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We shouldn't be using skb->protocol directly as that will miss cases with
hardware-accelerated VLAN tags. Use the helper instead to get the right
protocol number.

Reported-by: Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cake.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index acc9b9da985f8..a3b55e18df047 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1519,7 +1519,7 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
 {
 	u8 dscp;
 
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
 		if (wash && dscp)
-- 
GitLab


From c87b4ecdbe8db27867a7b7f840291cd843406bd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 4 Apr 2019 15:01:33 +0200
Subject: [PATCH 0644/1861] sch_cake: Make sure we can write the IP header
 before changing DSCP bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is not actually any guarantee that the IP headers are valid before we
access the DSCP bits of the packets. Fix this using the same approach taken
in sch_dsmark.

Reported-by: Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cake.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index a3b55e18df047..259d97bc2abd3 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1517,16 +1517,27 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
 
 static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
 {
+	int wlen = skb_network_offset(skb);
 	u8 dscp;
 
 	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
+		wlen += sizeof(struct iphdr);
+		if (!pskb_may_pull(skb, wlen) ||
+		    skb_try_make_writable(skb, wlen))
+			return 0;
+
 		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
 		if (wash && dscp)
 			ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
 		return dscp;
 
 	case htons(ETH_P_IPV6):
+		wlen += sizeof(struct ipv6hdr);
+		if (!pskb_may_pull(skb, wlen) ||
+		    skb_try_make_writable(skb, wlen))
+			return 0;
+
 		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
 		if (wash && dscp)
 			ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
-- 
GitLab


From 6e3572e83dc3563e3b7e742bcb225b42a60cdaeb Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Thu, 4 Apr 2019 10:25:28 +0200
Subject: [PATCH 0645/1861] MIPS: generic: Add switchdev, pinctrl and fit to
 ocelot_defconfig

Some of the configuration were not selected by default anymore, therefore
enable them again. Also remove some configs which are used for MSCC Ocelot.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: <alexandre.belloni@bootlin.com>
Cc: <UNGLinuxDriver@microchip.com>
Cc: <ralf@linux-mips.org>
Cc: <jhogan@kernel.org>
Cc: <linux-mips@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>
---
 arch/mips/configs/generic/board-ocelot.config | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
index f607888d24838..184eb65a6ba71 100644
--- a/arch/mips/configs/generic/board-ocelot.config
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -1,6 +1,10 @@
 # require CONFIG_CPU_MIPS32_R2=y
 
 CONFIG_LEGACY_BOARD_OCELOT=y
+CONFIG_FIT_IMAGE_FDT_OCELOT=y
+
+CONFIG_BRIDGE=y
+CONFIG_GENERIC_PHY=y
 
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
@@ -19,6 +23,8 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 
 CONFIG_NETDEVICES=y
+CONFIG_NET_SWITCHDEV=y
+CONFIG_NET_DSA=y
 CONFIG_MSCC_OCELOT_SWITCH=y
 CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y
 CONFIG_MDIO_MSCC_MIIM=y
@@ -35,6 +41,8 @@ CONFIG_SPI_DESIGNWARE=y
 CONFIG_SPI_DW_MMIO=y
 CONFIG_SPI_SPIDEV=y
 
+CONFIG_PINCTRL_OCELOT=y
+
 CONFIG_GPIO_SYSFS=y
 
 CONFIG_POWER_RESET=y
-- 
GitLab


From ada770b1e74a77fff2d5f539bf6c42c25f4784db Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Thu, 4 Apr 2019 11:08:40 -0700
Subject: [PATCH 0646/1861] xtensa: fix return_address

return_address returns the address that is one level higher in the call
stack than requested in its argument, because level 0 corresponds to its
caller's return address. Use requested level as the number of stack
frames to skip.

This fixes the address reported by might_sleep and friends.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/stacktrace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 174c11f13bba3..b9f82510c6501 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -253,10 +253,14 @@ static int return_address_cb(struct stackframe *frame, void *data)
 	return 1;
 }
 
+/*
+ * level == 0 is for the return address from the caller of this function,
+ * not from this function itself.
+ */
 unsigned long return_address(unsigned level)
 {
 	struct return_addr_data r = {
-		.skip = level + 1,
+		.skip = level,
 	};
 	walk_stackframe(stack_pointer(NULL), return_address_cb, &r);
 	return r.addr;
-- 
GitLab


From bcb44433bba5eaff293888ef22ffa07f1f0347d6 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 3 Apr 2019 12:23:11 -0400
Subject: [PATCH 0647/1861] dm: disable DISCARD if the underlying storage no
 longer supports it

Storage devices which report supporting discard commands like
WRITE_SAME_16 with unmap, but reject discard commands sent to the
storage device.  This is a clear storage firmware bug but it doesn't
change the fact that should a program cause discards to be sent to a
multipath device layered on this buggy storage, all paths can end up
failed at the same time from the discards, causing possible I/O loss.

The first discard to a path will fail with Illegal Request, Invalid
field in cdb, e.g.:
 kernel: sd 8:0:8:19: [sdfn] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
 kernel: sd 8:0:8:19: [sdfn] tag#0 Sense Key : Illegal Request [current]
 kernel: sd 8:0:8:19: [sdfn] tag#0 Add. Sense: Invalid field in cdb
 kernel: sd 8:0:8:19: [sdfn] tag#0 CDB: Write same(16) 93 08 00 00 00 00 00 a0 08 00 00 00 80 00 00 00
 kernel: blk_update_request: critical target error, dev sdfn, sector 10487808

The SCSI layer converts this to the BLK_STS_TARGET error number, the sd
device disables its support for discard on this path, and because of the
BLK_STS_TARGET error multipath fails the discard without failing any
path or retrying down a different path.  But subsequent discards can
cause path failures.  Any discards sent to the path which already failed
a discard ends up failing with EIO from blk_cloned_rq_check_limits with
an "over max size limit" error since the discard limit was set to 0 by
the sd driver for the path.  As the error is EIO, this now fails the
path and multipath tries to send the discard down the next path.  This
cycle continues as discards are sent until all paths fail.

Fix this by training DM core to disable DISCARD if the underlying
storage already did so.

Also, fix branching in dm_done() and clone_endio() to reflect the
mutually exclussive nature of the IO operations in question.

Cc: stable@vger.kernel.org
Reported-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-core.h |  1 +
 drivers/md/dm-rq.c   | 11 +++++++----
 drivers/md/dm.c      | 20 ++++++++++++++++----
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 95c6d86ab5e8d..c4ef1fceead6e 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -115,6 +115,7 @@ struct mapped_device {
 	struct srcu_struct io_barrier;
 };
 
+void disable_discard(struct mapped_device *md);
 void disable_write_same(struct mapped_device *md);
 void disable_write_zeroes(struct mapped_device *md);
 
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 09773636602d3..b66745bd08bbc 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -222,11 +222,14 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped)
 	}
 
 	if (unlikely(error == BLK_STS_TARGET)) {
-		if (req_op(clone) == REQ_OP_WRITE_SAME &&
-		    !clone->q->limits.max_write_same_sectors)
+		if (req_op(clone) == REQ_OP_DISCARD &&
+		    !clone->q->limits.max_discard_sectors)
+			disable_discard(tio->md);
+		else if (req_op(clone) == REQ_OP_WRITE_SAME &&
+			 !clone->q->limits.max_write_same_sectors)
 			disable_write_same(tio->md);
-		if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
-		    !clone->q->limits.max_write_zeroes_sectors)
+		else if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
+			 !clone->q->limits.max_write_zeroes_sectors)
 			disable_write_zeroes(tio->md);
 	}
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 89b04169658d2..043f0761e4a0a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -945,6 +945,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
 	}
 }
 
+void disable_discard(struct mapped_device *md)
+{
+	struct queue_limits *limits = dm_get_queue_limits(md);
+
+	/* device doesn't really support DISCARD, disable it */
+	limits->max_discard_sectors = 0;
+	blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
+}
+
 void disable_write_same(struct mapped_device *md)
 {
 	struct queue_limits *limits = dm_get_queue_limits(md);
@@ -970,11 +979,14 @@ static void clone_endio(struct bio *bio)
 	dm_endio_fn endio = tio->ti->type->end_io;
 
 	if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
-		if (bio_op(bio) == REQ_OP_WRITE_SAME &&
-		    !bio->bi_disk->queue->limits.max_write_same_sectors)
+		if (bio_op(bio) == REQ_OP_DISCARD &&
+		    !bio->bi_disk->queue->limits.max_discard_sectors)
+			disable_discard(md);
+		else if (bio_op(bio) == REQ_OP_WRITE_SAME &&
+			 !bio->bi_disk->queue->limits.max_write_same_sectors)
 			disable_write_same(md);
-		if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
-		    !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
+		else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+			 !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
 			disable_write_zeroes(md);
 	}
 
-- 
GitLab


From a5220e7d2e1b13e62c0d5eab3fbfaef401186e3b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 19 Mar 2019 13:25:03 -0700
Subject: [PATCH 0648/1861] tools/memory-model: Add support for
 synchronize_srcu_expedited()

Given that synchronize_rcu_expedited() is supported, this commit adds
support for synchronize_srcu_expedited().

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Acked-by: Andrea Parri <andrea.parri@amarulasolutions.com>
---
 tools/memory-model/linux-kernel.def | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 0c3f0ef486f46..551eeaa389d40 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -51,6 +51,7 @@ synchronize_rcu_expedited() { __fence{sync-rcu}; }
 srcu_read_lock(X)  __srcu{srcu-lock}(X)
 srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); }
 synchronize_srcu(X)  { __srcu{sync-srcu}(X); }
+synchronize_srcu_expedited(X)  { __srcu{sync-srcu}(X); }
 
 // Atomic
 atomic_read(X) READ_ONCE(*X)
-- 
GitLab


From d9b8a67b3b95a5c5aae6422b8113adc1c2485f2b Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Sun, 3 Mar 2019 15:04:18 +0800
Subject: [PATCH 0649/1861] mtd: cfi: fix deadloop in cfi_cmdset_0002.c
 do_write_buffer

In function do_write_buffer(), in the for loop, there is a case
chip_ready() returns 1 while chip_good() returns 0, so it never
break the loop.
To fix this, chip_good() is enough and it should timeout if it stay
bad for a while.

Fixes: dfeae1073583("mtd: cfi_cmdset_0002: Change write buffer to check correct value")
Signed-off-by: Yi Huaijie <yihuaijie@huawei.com>
Signed-off-by: Liu Jian <liujian56@huawei.com>
Reviewed-by: Tokunori Ikegami <ikegami_to@yahoo.co.jp>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 72428b6bfc474..7b7286b4d81ef 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1876,7 +1876,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 			continue;
 		}
 
-		if (time_after(jiffies, timeo) && !chip_ready(map, adr))
+		/*
+		 * We check "time_after" and "!chip_good" before checking "chip_good" to avoid
+		 * the failure due to scheduling.
+		 */
+		if (time_after(jiffies, timeo) && !chip_good(map, adr, datum))
 			break;
 
 		if (chip_good(map, adr, datum)) {
-- 
GitLab


From 6af1c849dfb1f1d326fbdd157c9bc882b921f450 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 4 Apr 2019 08:44:05 +0000
Subject: [PATCH 0650/1861] aio: use kmem_cache_free() instead of kfree()

memory allocated by kmem_cache_alloc() should be freed using
kmem_cache_free(), not kfree().

Fixes: fa0ca2aee3be ("deal with get_reqs_available() in aio_get_req() itself")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/aio.c b/fs/aio.c
index 7ccecaab487a1..3490d1fa0e16f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1034,7 +1034,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 		return NULL;
 
 	if (unlikely(!get_reqs_available(ctx))) {
-		kfree(req);
+		kmem_cache_free(kiocb_cachep, req);
 		return NULL;
 	}
 
-- 
GitLab


From 0a89eb92d8c335da92bd5f54d8463b87dd440d45 Mon Sep 17 00:00:00 2001
From: Chris Leech <cleech@redhat.com>
Date: Tue, 2 Apr 2019 15:06:12 -0700
Subject: [PATCH 0651/1861] vlan: conditional inclusion of FCoE hooks to match
 netdevice.h and bnx2x

Way back in 3c9c36bcedd426f2be2826da43e5163de61735f7 the
ndo_fcoe_get_wwn pointer was switched from depending on CONFIG_FCOE to
CONFIG_LIBFCOE in order to allow building FCoE support into the bnx2x
driver and used by bnx2fc without including the generic software fcoe
module.

But, FCoE is generally used over an 802.1q VLAN, and the implementation
of ndo_fcoe_get_wwn in the 8021q module was not similarly changed.  The
result is that if CONFIG_FCOE is disabled, then bnz2fc cannot make a
call to ndo_fcoe_get_wwn through the 8021q interface to the underlying
bnx2x interface.  The bnx2fc driver then falls back to a potentially
different mapping of Ethernet MAC to Fibre Channel WWN, creating an
incompatibility with the fabric and target configurations when compared
to the WWNs used by pre-boot firmware and differently-configured
kernels.

So make the conditional inclusion of FCoE code in 8021q match the
conditional inclusion in netdevice.h

Signed-off-by: Chris Leech <cleech@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 15293c2a5dd82..8d77b6ee4477d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -443,27 +443,29 @@ static int vlan_dev_fcoe_disable(struct net_device *dev)
 	return rc;
 }
 
-static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
+static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
+				    struct scatterlist *sgl, unsigned int sgc)
 {
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
-	int rc = -EINVAL;
+	int rc = 0;
+
+	if (ops->ndo_fcoe_ddp_target)
+		rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc);
 
-	if (ops->ndo_fcoe_get_wwn)
-		rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
 	return rc;
 }
+#endif
 
-static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
-				    struct scatterlist *sgl, unsigned int sgc)
+#ifdef NETDEV_FCOE_WWNN
+static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
 {
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
-	int rc = 0;
-
-	if (ops->ndo_fcoe_ddp_target)
-		rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc);
+	int rc = -EINVAL;
 
+	if (ops->ndo_fcoe_get_wwn)
+		rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type);
 	return rc;
 }
 #endif
@@ -794,9 +796,11 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
 	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
 	.ndo_fcoe_disable	= vlan_dev_fcoe_disable,
-	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
 	.ndo_fcoe_ddp_target	= vlan_dev_fcoe_ddp_target,
 #endif
+#ifdef NETDEV_FCOE_WWNN
+	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
+#endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= vlan_dev_poll_controller,
 	.ndo_netpoll_setup	= vlan_dev_netpoll_setup,
-- 
GitLab


From cc5a726c79158bd307150e8d4176ec79b52001ea Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Wed, 3 Apr 2019 17:30:14 +0530
Subject: [PATCH 0652/1861] libcxgb: fix incorrect ppmax calculation

BITS_TO_LONGS() uses DIV_ROUND_UP() because of
this ppmax value can be greater than available
per cpu page pods.

This patch removes BITS_TO_LONGS() to fix this
issue.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
index 74849be5f004f..e2919005ead3e 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
@@ -354,7 +354,10 @@ static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
 		ppmax = max;
 
 	/* pool size must be multiple of unsigned long */
-	bmap = BITS_TO_LONGS(ppmax);
+	bmap = ppmax / BITS_PER_TYPE(unsigned long);
+	if (!bmap)
+		return NULL;
+
 	ppmax = (bmap * sizeof(unsigned long)) << 3;
 
 	alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
@@ -402,6 +405,10 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
 	if (reserve_factor) {
 		ppmax_pool = ppmax / reserve_factor;
 		pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
+		if (!pool) {
+			ppmax_pool = 0;
+			reserve_factor = 0;
+		}
 
 		pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
 			 ndev->name, ppmax, ppmax_pool, pool_index_max);
-- 
GitLab


From 1515a63fc413f160d20574ab0894e7f1020c7be2 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Wed, 3 Apr 2019 23:27:24 +0300
Subject: [PATCH 0653/1861] net: bridge: always clear mcast matching struct on
 reports and leaves

We need to be careful and always zero the whole br_ip struct when it is
used for matching since the rhashtable change. This patch fixes all the
places which didn't properly clear it which in turn might've caused
mismatches.

Thanks for the great bug report with reproducing steps and bisection.

Steps to reproduce (from the bug report):
ip link add br0 type bridge mcast_querier 1
ip link set br0 up

ip link add v2 type veth peer name v3
ip link set v2 master br0
ip link set v2 up
ip link set v3 up
ip addr add 3.0.0.2/24 dev v3

ip netns add test
ip link add v1 type veth peer name v1 netns test
ip link set v1 master br0
ip link set v1 up
ip -n test link set v1 up
ip -n test addr add 3.0.0.1/24 dev v1

# Multicast receiver
ip netns exec test socat
UDP4-RECVFROM:5588,ip-add-membership=224.224.224.224:3.0.0.1,fork -

# Multicast sender
echo hello | nc -u -s 3.0.0.2 224.224.224.224 5588

Reported-by: liam.mcbirnie@boeing.com
Fixes: 19e3a9c90c53 ("net: bridge: convert multicast to generic rhashtable")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a0e369179f6d1..02da21d771c9c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -601,6 +601,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 	if (ipv4_is_local_multicast(group))
 		return 0;
 
+	memset(&br_group, 0, sizeof(br_group));
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
 	br_group.vid = vid;
@@ -1497,6 +1498,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 
 	own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
 
+	memset(&br_group, 0, sizeof(br_group));
 	br_group.u.ip4 = group;
 	br_group.proto = htons(ETH_P_IP);
 	br_group.vid = vid;
@@ -1520,6 +1522,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 
 	own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
 
+	memset(&br_group, 0, sizeof(br_group));
 	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
 	br_group.vid = vid;
-- 
GitLab


From 86baf800de84eb89615c138d368b14bff5ee7d8a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 6 Mar 2019 12:08:57 +0100
Subject: [PATCH 0654/1861] extcon: ptn5150: fix COMPILE_TEST dependencies

The PTN5150 dependencies look like they were meant to do the
right thing, but they actually should not allow building without
I2C for compile testing, as that results in a Kconfig warning
and subsequent build failure:

WARNING: unmet direct dependencies detected for REGMAP_I2C
  Depends on [m]: I2C [=m]
  Selected by [y]:
  - EXTCON_PTN5150 [=y] && EXTCON [=y] && (I2C [=m] && GPIOLIB [=y] || COMPILE_TEST [=y])
  Selected by [m]:
  - EEPROM_AT24 [=m] && I2C [=m] && SYSFS [=y]
  - KEYBOARD_CAP11XX [=m] && !UML && INPUT [=y] && INPUT_KEYBOARD [=y] && OF [=y] && I2C [=m]
  - INPUT_DRV260X_HAPTICS [=m] && !UML && INPUT_MISC [=y] && INPUT [=y] && I2C [=m] && (GPIOLIB [=y] || COMPILE_TEST [=y])
  - ... [many others]

Add parentheses around the expression so we can compile-test
without GPIOLIB but not without I2C.

Fixes: 4ed754de2d66 ("extcon: Add support for ptn5150 extcon driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index 8e17149655f06..540e8cd16ee6e 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -116,7 +116,7 @@ config EXTCON_PALMAS
 
 config EXTCON_PTN5150
 	tristate "NXP PTN5150 CC LOGIC USB EXTCON support"
-	depends on I2C && GPIOLIB || COMPILE_TEST
+	depends on I2C && (GPIOLIB || COMPILE_TEST)
 	select REGMAP_I2C
 	help
 	  Say Y here to enable support for USB peripheral and USB host
-- 
GitLab


From bb9bd814ebf04f579be466ba61fc922625508807 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 4 Apr 2019 16:37:53 +0200
Subject: [PATCH 0655/1861] ipv6: sit: reset ip header pointer in ipip6_rcv

ipip6 tunnels run iptunnel_pull_header on received skbs. This can
determine the following use-after-free accessing iph pointer since
the packet will be 'uncloned' running pskb_expand_head if it is a
cloned gso skb (e.g if the packet has been sent though a veth device)

[  706.369655] BUG: KASAN: use-after-free in ipip6_rcv+0x1678/0x16e0 [sit]
[  706.449056] Read of size 1 at addr ffffe01b6bd855f5 by task ksoftirqd/1/=
[  706.669494] Hardware name: HPE ProLiant m400 Server/ProLiant m400 Server, BIOS U02 08/19/2016
[  706.771839] Call trace:
[  706.801159]  dump_backtrace+0x0/0x2f8
[  706.845079]  show_stack+0x24/0x30
[  706.884833]  dump_stack+0xe0/0x11c
[  706.925629]  print_address_description+0x68/0x260
[  706.982070]  kasan_report+0x178/0x340
[  707.025995]  __asan_report_load1_noabort+0x30/0x40
[  707.083481]  ipip6_rcv+0x1678/0x16e0 [sit]
[  707.132623]  tunnel64_rcv+0xd4/0x200 [tunnel4]
[  707.185940]  ip_local_deliver_finish+0x3b8/0x988
[  707.241338]  ip_local_deliver+0x144/0x470
[  707.289436]  ip_rcv_finish+0x43c/0x14b0
[  707.335447]  ip_rcv+0x628/0x1138
[  707.374151]  __netif_receive_skb_core+0x1670/0x2600
[  707.432680]  __netif_receive_skb+0x28/0x190
[  707.482859]  process_backlog+0x1d0/0x610
[  707.529913]  net_rx_action+0x37c/0xf68
[  707.574882]  __do_softirq+0x288/0x1018
[  707.619852]  run_ksoftirqd+0x70/0xa8
[  707.662734]  smpboot_thread_fn+0x3a4/0x9e8
[  707.711875]  kthread+0x2c8/0x350
[  707.750583]  ret_from_fork+0x10/0x18

[  707.811302] Allocated by task 16982:
[  707.854182]  kasan_kmalloc.part.1+0x40/0x108
[  707.905405]  kasan_kmalloc+0xb4/0xc8
[  707.948291]  kasan_slab_alloc+0x14/0x20
[  707.994309]  __kmalloc_node_track_caller+0x158/0x5e0
[  708.053902]  __kmalloc_reserve.isra.8+0x54/0xe0
[  708.108280]  __alloc_skb+0xd8/0x400
[  708.150139]  sk_stream_alloc_skb+0xa4/0x638
[  708.200346]  tcp_sendmsg_locked+0x818/0x2b90
[  708.251581]  tcp_sendmsg+0x40/0x60
[  708.292376]  inet_sendmsg+0xf0/0x520
[  708.335259]  sock_sendmsg+0xac/0xf8
[  708.377096]  sock_write_iter+0x1c0/0x2c0
[  708.424154]  new_sync_write+0x358/0x4a8
[  708.470162]  __vfs_write+0xc4/0xf8
[  708.510950]  vfs_write+0x12c/0x3d0
[  708.551739]  ksys_write+0xcc/0x178
[  708.592533]  __arm64_sys_write+0x70/0xa0
[  708.639593]  el0_svc_handler+0x13c/0x298
[  708.686646]  el0_svc+0x8/0xc

[  708.739019] Freed by task 17:
[  708.774597]  __kasan_slab_free+0x114/0x228
[  708.823736]  kasan_slab_free+0x10/0x18
[  708.868703]  kfree+0x100/0x3d8
[  708.905320]  skb_free_head+0x7c/0x98
[  708.948204]  skb_release_data+0x320/0x490
[  708.996301]  pskb_expand_head+0x60c/0x970
[  709.044399]  __iptunnel_pull_header+0x3b8/0x5d0
[  709.098770]  ipip6_rcv+0x41c/0x16e0 [sit]
[  709.146873]  tunnel64_rcv+0xd4/0x200 [tunnel4]
[  709.200195]  ip_local_deliver_finish+0x3b8/0x988
[  709.255596]  ip_local_deliver+0x144/0x470
[  709.303692]  ip_rcv_finish+0x43c/0x14b0
[  709.349705]  ip_rcv+0x628/0x1138
[  709.388413]  __netif_receive_skb_core+0x1670/0x2600
[  709.446943]  __netif_receive_skb+0x28/0x190
[  709.497120]  process_backlog+0x1d0/0x610
[  709.544169]  net_rx_action+0x37c/0xf68
[  709.589131]  __do_softirq+0x288/0x1018

[  709.651938] The buggy address belongs to the object at ffffe01b6bd85580
                which belongs to the cache kmalloc-1024 of size 1024
[  709.804356] The buggy address is located 117 bytes inside of
                1024-byte region [ffffe01b6bd85580, ffffe01b6bd85980)
[  709.946340] The buggy address belongs to the page:
[  710.003824] page:ffff7ff806daf600 count:1 mapcount:0 mapping:ffffe01c4001f600 index:0x0
[  710.099914] flags: 0xfffff8000000100(slab)
[  710.149059] raw: 0fffff8000000100 dead000000000100 dead000000000200 ffffe01c4001f600
[  710.242011] raw: 0000000000000000 0000000000380038 00000001ffffffff 0000000000000000
[  710.334966] page dumped because: kasan: bad access detected

Fix it resetting iph pointer after iptunnel_pull_header

Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap")
Tested-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 07e21a82ce4cc..b2109b74857d0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -669,6 +669,10 @@ static int ipip6_rcv(struct sk_buff *skb)
 		    !net_eq(tunnel->net, dev_net(tunnel->dev))))
 			goto out;
 
+		/* skb can be uncloned in iptunnel_pull_header, so
+		 * old iph is no longer valid
+		 */
+		iph = (const struct iphdr *)skb_mac_header(skb);
 		err = IP_ECN_decapsulate(iph, skb);
 		if (unlikely(err)) {
 			if (log_ecn_error)
-- 
GitLab


From bbd669a868bba591ffd38b7bc75a7b361bb54b04 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.ibm.com>
Date: Thu, 4 Apr 2019 18:58:26 -0500
Subject: [PATCH 0656/1861] ibmvnic: Fix completion structure initialization

Fix device initialization completion handling for vNIC adapters.
Initialize the completion structure on probe and reinitialize when needed.
This also fixes a race condition during kdump where the driver can attempt
to access the completion struct before it is initialized:

Unable to handle kernel paging request for data at address 0x00000000
Faulting instruction address: 0xc0000000081acbe0
Oops: Kernel access of bad area, sig: 11 [#1]
LE SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: ibmvnic(+) ibmveth sunrpc overlay squashfs loop
CPU: 19 PID: 301 Comm: systemd-udevd Not tainted 4.18.0-64.el8.ppc64le #1
NIP:  c0000000081acbe0 LR: c0000000081ad964 CTR: c0000000081ad900
REGS: c000000027f3f990 TRAP: 0300   Not tainted  (4.18.0-64.el8.ppc64le)
MSR:  800000010280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE,TM[E]> CR: 28228288  XER: 00000006
CFAR: c000000008008934 DAR: 0000000000000000 DSISR: 40000000 IRQMASK: 1
GPR00: c0000000081ad964 c000000027f3fc10 c0000000095b5800 c0000000221b4e58
GPR04: 0000000000000003 0000000000000001 000049a086918581 00000000000000d4
GPR08: 0000000000000007 0000000000000000 ffffffffffffffe8 d0000000014dde28
GPR12: c0000000081ad900 c000000009a00c00 0000000000000001 0000000000000100
GPR16: 0000000000000038 0000000000000007 c0000000095e2230 0000000000000006
GPR20: 0000000000400140 0000000000000001 c00000000910c880 0000000000000000
GPR24: 0000000000000000 0000000000000006 0000000000000000 0000000000000003
GPR28: 0000000000000001 0000000000000001 c0000000221b4e60 c0000000221b4e58
NIP [c0000000081acbe0] __wake_up_locked+0x50/0x100
LR [c0000000081ad964] complete+0x64/0xa0
Call Trace:
[c000000027f3fc10] [c000000027f3fc60] 0xc000000027f3fc60 (unreliable)
[c000000027f3fc60] [c0000000081ad964] complete+0x64/0xa0
[c000000027f3fca0] [d0000000014dad58] ibmvnic_handle_crq+0xce0/0x1160 [ibmvnic]
[c000000027f3fd50] [d0000000014db270] ibmvnic_tasklet+0x98/0x130 [ibmvnic]
[c000000027f3fda0] [c00000000813f334] tasklet_action_common.isra.3+0xc4/0x1a0
[c000000027f3fe00] [c000000008cd13f4] __do_softirq+0x164/0x400
[c000000027f3fef0] [c00000000813ed64] irq_exit+0x184/0x1c0
[c000000027f3ff20] [c0000000080188e8] __do_irq+0xb8/0x210
[c000000027f3ff90] [c00000000802d0a4] call_do_irq+0x14/0x24
[c000000026a5b010] [c000000008018adc] do_IRQ+0x9c/0x130
[c000000026a5b060] [c000000008008ce4] hardware_interrupt_common+0x114/0x120

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5ecbb1adcf3b9..51cfe95f3e247 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1885,6 +1885,7 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
 	 */
 	adapter->state = VNIC_PROBED;
 
+	reinit_completion(&adapter->init_done);
 	rc = init_crq_queue(adapter);
 	if (rc) {
 		netdev_err(adapter->netdev,
@@ -4625,7 +4626,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
 	old_num_rx_queues = adapter->req_rx_queues;
 	old_num_tx_queues = adapter->req_tx_queues;
 
-	init_completion(&adapter->init_done);
+	reinit_completion(&adapter->init_done);
 	adapter->init_done_rc = 0;
 	ibmvnic_send_crq_init(adapter);
 	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
@@ -4680,7 +4681,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 
 	adapter->from_passive_init = false;
 
-	init_completion(&adapter->init_done);
 	adapter->init_done_rc = 0;
 	ibmvnic_send_crq_init(adapter);
 	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
@@ -4759,6 +4759,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
 	INIT_LIST_HEAD(&adapter->rwi_list);
 	spin_lock_init(&adapter->rwi_lock);
+	init_completion(&adapter->init_done);
 	adapter->resetting = false;
 
 	adapter->mac_change_pending = false;
-- 
GitLab


From ecae26fae15abb7d433557afbd15467ce1c444f5 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Thu, 4 Apr 2019 18:42:05 -0700
Subject: [PATCH 0657/1861] xtensa: fix format string warning in init_pmd

Use %lu instead of %zu to fix the following warning introduced with
recent memblock refactoring:
  xtensa/mm/mmu.c:36:9: warning: format '%zu' expects argument of type
  'size_t', but argument 3 has type 'long unsigned int

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/mm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index 2fb7d11722284..03678c4afc39b 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -33,7 +33,7 @@ static void * __init init_pmd(unsigned long vaddr, unsigned long n_pages)
 
 	pte = memblock_alloc_low(n_pages * sizeof(pte_t), PAGE_SIZE);
 	if (!pte)
-		panic("%s: Failed to allocate %zu bytes align=%lx\n",
+		panic("%s: Failed to allocate %lu bytes align=%lx\n",
 		      __func__, n_pages * sizeof(pte_t), PAGE_SIZE);
 
 	for (i = 0; i < n_pages; ++i)
-- 
GitLab


From 47c4cc08cb5b34e93ab337b924c5ede77ca3c936 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Thu, 4 Apr 2019 17:27:20 +0100
Subject: [PATCH 0658/1861] ASoC: cs35l35: Disable regulators on driver removal

The chips main power supplies VA and VP are enabled during probe but
then never disabled, this will cause warnings from the regulator
framework on driver removal. Fix this by adding a remove callback and
disabling the supplies, whilst doing so follow best practice and put the
chip back into reset as well.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs35l35.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/soc/codecs/cs35l35.c b/sound/soc/codecs/cs35l35.c
index 9f4a59871cee7..c71696146c5ec 100644
--- a/sound/soc/codecs/cs35l35.c
+++ b/sound/soc/codecs/cs35l35.c
@@ -1635,6 +1635,16 @@ err:
 	return ret;
 }
 
+static int cs35l35_i2c_remove(struct i2c_client *i2c_client)
+{
+	struct cs35l35_private *cs35l35 = i2c_get_clientdata(i2c_client);
+
+	regulator_bulk_disable(cs35l35->num_supplies, cs35l35->supplies);
+	gpiod_set_value_cansleep(cs35l35->reset_gpio, 0);
+
+	return 0;
+}
+
 static const struct of_device_id cs35l35_of_match[] = {
 	{.compatible = "cirrus,cs35l35"},
 	{},
@@ -1655,6 +1665,7 @@ static struct i2c_driver cs35l35_i2c_driver = {
 	},
 	.id_table = cs35l35_id,
 	.probe = cs35l35_i2c_probe,
+	.remove = cs35l35_i2c_remove,
 };
 
 module_i2c_driver(cs35l35_i2c_driver);
-- 
GitLab


From 2e05ddd2c9f8000751d52fcf35b8318da46026bc Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Thu, 4 Apr 2019 17:30:39 -0700
Subject: [PATCH 0659/1861] ASoC: intel: skylake: add remove() callback for
 component driver

Topology is not unloaded in the core during unregister_component()
anymore. So, add the remove() callback that will unload the
topology.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/skylake/skl-pcm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index 56099db8f86dd..57031b6d4d45e 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -1462,9 +1462,16 @@ static int skl_platform_soc_probe(struct snd_soc_component *component)
 	return 0;
 }
 
+static void skl_pcm_remove(struct snd_soc_component *component)
+{
+	/* remove topology */
+	snd_soc_tplg_component_remove(component, SND_SOC_TPLG_INDEX_ALL);
+}
+
 static const struct snd_soc_component_driver skl_component  = {
 	.name		= "pcm",
 	.probe		= skl_platform_soc_probe,
+	.remove		= skl_pcm_remove,
 	.ops		= &skl_platform_ops,
 	.pcm_new	= skl_pcm_new,
 	.pcm_free	= skl_pcm_free,
-- 
GitLab


From 7cb9eb106d7a4efab6bcf30ec9503f1d703c77f5 Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Mon, 18 Mar 2019 13:59:46 +1100
Subject: [PATCH 0660/1861] KVM: PPC: Book3S HV: Perserve PSSCR FAKE_SUSPEND
 bit on guest exit

There is a hardware bug in some POWER9 processors where a treclaim in
fake suspend mode can cause an inconsistency in the XER[SO] bit across
the threads of a core, the workaround being to force the core into SMT4
when doing the treclaim.

The FAKE_SUSPEND bit (bit 10) in the PSSCR is used to control whether a
thread is in fake suspend or real suspend. The important difference here
being that thread reconfiguration is blocked in real suspend but not
fake suspend mode.

When we exit a guest which was in fake suspend mode, we force the core
into SMT4 while we do the treclaim in kvmppc_save_tm_hv().
However on the new exit path introduced with the function
kvmhv_run_single_vcpu() we restore the host PSSCR before calling
kvmppc_save_tm_hv() which means that if we were in fake suspend mode we
put the thread into real suspend mode when we clear the
PSSCR[FAKE_SUSPEND] bit. This means that we block thread reconfiguration
and the thread which is trying to get the core into SMT4 before it can
do the treclaim spins forever since it itself is blocking thread
reconfiguration. The result is that that core is essentially lost.

This results in a trace such as:
[   93.512904] CPU: 7 PID: 13352 Comm: qemu-system-ppc Not tainted 5.0.0 #4
[   93.512905] NIP:  c000000000098a04 LR: c0000000000cc59c CTR: 0000000000000000
[   93.512908] REGS: c000003fffd2bd70 TRAP: 0100   Not tainted  (5.0.0)
[   93.512908] MSR:  9000000302883033 <SF,HV,VEC,VSX,FP,ME,IR,DR,RI,LE,TM[SE]>  CR: 22222444  XER: 00000000
[   93.512914] CFAR: c000000000098a5c IRQMASK: 3
[   93.512915] PACATMSCRATCH: 0000000000000001
[   93.512916] GPR00: 0000000000000001 c000003f6cc1b830 c000000001033100 0000000000000004
[   93.512928] GPR04: 0000000000000004 0000000000000002 0000000000000004 0000000000000007
[   93.512930] GPR08: 0000000000000000 0000000000000004 0000000000000000 0000000000000004
[   93.512932] GPR12: c000203fff7fc000 c000003fffff9500 0000000000000000 0000000000000000
[   93.512935] GPR16: 2000000000300375 000000000000059f 0000000000000000 0000000000000000
[   93.512951] GPR20: 0000000000000000 0000000000080053 004000000256f41f c000003f6aa88ef0
[   93.512953] GPR24: c000003f6aa89100 0000000000000010 0000000000000000 0000000000000000
[   93.512956] GPR28: c000003f9e9a0800 0000000000000000 0000000000000001 c000203fff7fc000
[   93.512959] NIP [c000000000098a04] pnv_power9_force_smt4_catch+0x1b4/0x2c0
[   93.512960] LR [c0000000000cc59c] kvmppc_save_tm_hv+0x40/0x88
[   93.512960] Call Trace:
[   93.512961] [c000003f6cc1b830] [0000000000080053] 0x80053 (unreliable)
[   93.512965] [c000003f6cc1b8a0] [c00800001e9cb030] kvmhv_p9_guest_entry+0x508/0x6b0 [kvm_hv]
[   93.512967] [c000003f6cc1b940] [c00800001e9cba44] kvmhv_run_single_vcpu+0x2dc/0xb90 [kvm_hv]
[   93.512968] [c000003f6cc1ba10] [c00800001e9cc948] kvmppc_vcpu_run_hv+0x650/0xb90 [kvm_hv]
[   93.512969] [c000003f6cc1bae0] [c00800001e8f620c] kvmppc_vcpu_run+0x34/0x48 [kvm]
[   93.512971] [c000003f6cc1bb00] [c00800001e8f2d4c] kvm_arch_vcpu_ioctl_run+0x2f4/0x400 [kvm]
[   93.512972] [c000003f6cc1bb90] [c00800001e8e3918] kvm_vcpu_ioctl+0x460/0x7d0 [kvm]
[   93.512974] [c000003f6cc1bd00] [c0000000003ae2c0] do_vfs_ioctl+0xe0/0x8e0
[   93.512975] [c000003f6cc1bdb0] [c0000000003aeb24] ksys_ioctl+0x64/0xe0
[   93.512978] [c000003f6cc1be00] [c0000000003aebc8] sys_ioctl+0x28/0x80
[   93.512981] [c000003f6cc1be20] [c00000000000b3a4] system_call+0x5c/0x70
[   93.512983] Instruction dump:
[   93.512986] 419dffbc e98c0000 2e8b0000 38000001 60000000 60000000 60000000 40950068
[   93.512993] 392bffff 39400000 79290020 39290001 <7d2903a6> 60000000 60000000 7d235214

To fix this we preserve the PSSCR[FAKE_SUSPEND] bit until we call
kvmppc_save_tm_hv() which will mean the core can get into SMT4 and
perform the treclaim. Note kvmppc_save_tm_hv() clears the
PSSCR[FAKE_SUSPEND] bit again so there is no need to explicitly do that.

Fixes: 95a6432ce9038 ("KVM: PPC: Book3S HV: Streamlined guest entry/exit path on P9 for radix guests")

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 06964350b97a9..b2b29d4f98428 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3423,7 +3423,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
 	vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
 
-	mtspr(SPRN_PSSCR, host_psscr);
+	/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+	mtspr(SPRN_PSSCR, host_psscr |
+	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
 	mtspr(SPRN_HFSCR, host_hfscr);
 	mtspr(SPRN_CIABR, host_ciabr);
 	mtspr(SPRN_DAWR, host_dawr);
-- 
GitLab


From 345077c8e172c255ea0707214303ccd099e5656b Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Fri, 29 Mar 2019 16:41:13 +1100
Subject: [PATCH 0661/1861] KVM: PPC: Book3S: Protect memslots while validating
 user address

Guest physical to user address translation uses KVM memslots and reading
these requires holding the kvm->srcu lock. However recently introduced
kvmppc_tce_validate() broke the rule (see the lockdep warning below).

This moves srcu_read_lock(&vcpu->kvm->srcu) earlier to protect
kvmppc_tce_validate() as well.

=============================
WARNING: suspicious RCU usage
5.1.0-rc2-le_nv2_aikATfstn1-p1 #380 Not tainted
-----------------------------
include/linux/kvm_host.h:605 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
1 lock held by qemu-system-ppc/8020:
 #0: 0000000094972fe9 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0xdc/0x850 [kvm]

stack backtrace:
CPU: 44 PID: 8020 Comm: qemu-system-ppc Not tainted 5.1.0-rc2-le_nv2_aikATfstn1-p1 #380
Call Trace:
[c000003fece8f740] [c000000000bcc134] dump_stack+0xe8/0x164 (unreliable)
[c000003fece8f790] [c000000000181be0] lockdep_rcu_suspicious+0x130/0x170
[c000003fece8f810] [c0000000000d5f50] kvmppc_tce_to_ua+0x280/0x290
[c000003fece8f870] [c00800001a7e2c78] kvmppc_tce_validate+0x80/0x1b0 [kvm]
[c000003fece8f8e0] [c00800001a7e3fac] kvmppc_h_put_tce+0x94/0x3e4 [kvm]
[c000003fece8f9a0] [c00800001a8baac4] kvmppc_pseries_do_hcall+0x30c/0xce0 [kvm_hv]
[c000003fece8fa10] [c00800001a8bd89c] kvmppc_vcpu_run_hv+0x694/0xec0 [kvm_hv]
[c000003fece8fae0] [c00800001a7d95dc] kvmppc_vcpu_run+0x34/0x48 [kvm]
[c000003fece8fb00] [c00800001a7d56bc] kvm_arch_vcpu_ioctl_run+0x2f4/0x400 [kvm]
[c000003fece8fb90] [c00800001a7c3618] kvm_vcpu_ioctl+0x460/0x850 [kvm]
[c000003fece8fd00] [c00000000041c4f4] do_vfs_ioctl+0xe4/0x930
[c000003fece8fdb0] [c00000000041ce04] ksys_ioctl+0xc4/0x110
[c000003fece8fe00] [c00000000041ce78] sys_ioctl+0x28/0x80
[c000003fece8fe20] [c00000000000b5a4] system_call+0x5c/0x70

Fixes: 42de7b9e2167 ("KVM: PPC: Validate TCEs against preregistered memory page sizes", 2018-09-10)
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_vio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index f02b049737109..f100e331e69b6 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -543,14 +543,14 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 	if (ret != H_SUCCESS)
 		return ret;
 
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+
 	ret = kvmppc_tce_validate(stt, tce);
 	if (ret != H_SUCCESS)
-		return ret;
+		goto unlock_exit;
 
 	dir = iommu_tce_direction(tce);
 
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
 	if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
 		ret = H_PARAMETER;
 		goto unlock_exit;
-- 
GitLab


From ad94dc3a7eb5fa6ff469dbcf401c44b14ad50595 Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Wed, 3 Apr 2019 07:26:36 +0200
Subject: [PATCH 0662/1861] xen: use struct_size() helper in kzalloc()

struct privcmd_buf_vma_private has a zero-sized array at the end
(pages), use the new struct_size() helper to determine the proper
allocation size and avoid potential type mistakes.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/privcmd-buf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c
index de01a6d0059dc..a1c61e351d3f7 100644
--- a/drivers/xen/privcmd-buf.c
+++ b/drivers/xen/privcmd-buf.c
@@ -140,8 +140,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
-	vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
-			   GFP_KERNEL);
+	vma_priv = kzalloc(struct_size(vma_priv, pages, count), GFP_KERNEL);
 	if (!vma_priv)
 		return -ENOMEM;
 
-- 
GitLab


From 42d8644bd77dd2d747e004e367cb0c895a606f39 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 4 Apr 2019 18:12:17 +0300
Subject: [PATCH 0663/1861] xen: Prevent buffer overflow in privcmd ioctl

The "call" variable comes from the user in privcmd_ioctl_hypercall().
It's an offset into the hypercall_page[] which has (PAGE_SIZE / 32)
elements.  We need to put an upper bound on it to prevent an out of
bounds access.

Cc: stable@vger.kernel.org
Fixes: 1246ae0bb992 ("xen: add variable hypercall caller")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/include/asm/xen/hypercall.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index de6f0d59a24f4..2863c20266558 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -206,6 +206,9 @@ xen_single_call(unsigned int call,
 	__HYPERCALL_DECLS;
 	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 
+	if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
+		return -EINVAL;
+
 	asm volatile(CALL_NOSPEC
 		     : __HYPERCALL_5PARAM
 		     : [thunk_target] "a" (&hypercall_page[call])
-- 
GitLab


From 3df1af984b76bc50cdbedbdd69d3f69192269cfe Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 4 Apr 2019 16:43:42 +0200
Subject: [PATCH 0664/1861] Revert "Documentation/gpu/meson: Remove link to
 meson_canvas.c"

This reverts commit a3f98bb22cbfaaf67717e156f79e2bfeb42d4cac.

Patch "Documentation/gpu/meson: Remove link to meson_canvas.c" was
incorrectly applied on the wrong branch not containing the fixed
commit 2bf6b5b0e374 ("drm/meson: exclusively use the canvas provider module")

Acked-by: Sean Paul <sean@poorly.run>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190404144342.15238-1-narmstrong@baylibre.com
---
 Documentation/gpu/meson.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/gpu/meson.rst b/Documentation/gpu/meson.rst
index b9e2f9aa3bd80..479f6f51a13b0 100644
--- a/Documentation/gpu/meson.rst
+++ b/Documentation/gpu/meson.rst
@@ -42,6 +42,12 @@ Video Encoder
 .. kernel-doc:: drivers/gpu/drm/meson/meson_venc.c
    :doc: Video Encoder
 
+Video Canvas Management
+=======================
+
+.. kernel-doc:: drivers/gpu/drm/meson/meson_canvas.c
+   :doc: Canvas
+
 Video Clocks
 ============
 
-- 
GitLab


From cd9063757a227cf31ebf5391ccda2bf583b0806e Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Sun, 24 Mar 2019 20:06:09 +0100
Subject: [PATCH 0665/1861] drm/sun4i: DW HDMI: Lower max. supported rate for
 H6

Currently resolutions with pixel clock higher than 340 MHz don't work
with H6 HDMI controller. They just produce a blank screen.

Limit maximum pixel clock rate to 340 MHz until scrambling is supported.

Cc: stable@vger.kernel.org # 5.0
Fixes: 40bb9d3147b2 ("drm/sun4i: Add support for H6 DW HDMI controller")
Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190324190609.32721-1-jernej.skrabec@siol.net
---
 drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
index dc47720c99ba5..39d8509d96a0d 100644
--- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
+++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
@@ -48,8 +48,13 @@ static enum drm_mode_status
 sun8i_dw_hdmi_mode_valid_h6(struct drm_connector *connector,
 			    const struct drm_display_mode *mode)
 {
-	/* This is max for HDMI 2.0b (4K@60Hz) */
-	if (mode->clock > 594000)
+	/*
+	 * Controller support maximum of 594 MHz, which correlates to
+	 * 4K@60Hz 4:4:4 or RGB. However, for frequencies greater than
+	 * 340 MHz scrambling has to be enabled. Because scrambling is
+	 * not yet implemented, just limit to 340 MHz for now.
+	 */
+	if (mode->clock > 340000)
 		return MODE_CLOCK_HIGH;
 
 	return MODE_OK;
-- 
GitLab


From 7c21383f3429dd70da39c0c7f1efa12377a47ab6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 4 Apr 2019 14:40:27 -0700
Subject: [PATCH 0666/1861] x86/build: Keep local relocations with ld.lld

The LLVM linker (ld.lld) defaults to removing local relocations, which
causes KASLR boot failures. ld.bfd and ld.gold already handle this
correctly. This adds the explicit instruction "--discard-none" during
the link phase. There is no change in output for ld.bfd and ld.gold,
but ld.lld now produces an image with all the needed relocations.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: clang-built-linux@googlegroups.com
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190404214027.GA7324@beast
Link: https://github.com/ClangBuiltLinux/linux/issues/404
---
 arch/x86/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a587805c6687f..56e748a7679f4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -47,7 +47,7 @@ export REALMODE_CFLAGS
 export BITS
 
 ifdef CONFIG_X86_NEED_RELOCS
-        LDFLAGS_vmlinux := --emit-relocs
+        LDFLAGS_vmlinux := --emit-relocs --discard-none
 endif
 
 #
-- 
GitLab


From 4df4309587e18a3c91e68138638dcb9d2a968906 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 3 Apr 2019 13:42:30 -0500
Subject: [PATCH 0667/1861] x86/kexec/crash: Use struct_size() in vzalloc()

One of the more common cases of allocation size calculations is finding the
size of a structure that has a zero-sized array at the end, along with
memory for some number of elements for that array. For example:

struct foo {
    int stuff;
    struct boo entry[];
};

instance = vzalloc(sizeof(struct foo) + count * sizeof(struct boo));

Instead of leaving these open-coded and prone to type mistakes, use the new
struct_size() helper:

   instance = vzalloc(struct_size(instance, entry, count));

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20190403184230.GA5295@embeddedor
---
 arch/x86/kernel/crash.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 17ffc869cab82..a96ca85848039 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -204,8 +204,7 @@ static struct crash_mem *fill_up_crash_elf_data(void)
 	 * another range split. So add extra two slots here.
 	 */
 	nr_ranges += 2;
-	cmem = vzalloc(sizeof(struct crash_mem) +
-			sizeof(struct crash_mem_range) * nr_ranges);
+	cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
 	if (!cmem)
 		return NULL;
 
-- 
GitLab


From 4fa5ecda2bf96be7464eb406df8aba9d89260227 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 4 Apr 2019 12:17:35 -0500
Subject: [PATCH 0668/1861] objtool: Add rewind_stack_do_exit() to the noreturn
 list

This fixes the following warning seen on GCC 7.3:

  arch/x86/kernel/dumpstack.o: warning: objtool: oops_end() falls through to next function show_regs()

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/3418ebf5a5a9f6ed7e80954c741c0b904b67b5dc.1554398240.git.jpoimboe@redhat.com
---
 tools/objtool/check.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5dde107083c60..479196aeb4096 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -165,6 +165,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 		"fortify_panic",
 		"usercopy_abort",
 		"machine_real_restart",
+		"rewind_stack_do_exit",
 	};
 
 	if (func->bind == STB_WEAK)
-- 
GitLab


From f6e564354a01dd943ba92c514ddf386080d7baa4 Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Fri, 5 Apr 2019 11:20:12 +0100
Subject: [PATCH 0669/1861] arm64: Use defines instead of magic numbers

Following assembly code is not trivial; make it slightly easier to read by
replacing some of the magic numbers with the defines which are already
present in sysreg.h.

Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h |  4 ++--
 arch/arm64/kernel/head.S           | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index c5308d01e228c..6212121968713 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -442,8 +442,8 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
  * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
  */
 	.macro	reset_pmuserenr_el0, tmpreg
-	mrs	\tmpreg, id_aa64dfr0_el1	// Check ID_AA64DFR0_EL1 PMUVer
-	sbfx	\tmpreg, \tmpreg, #8, #4
+	mrs	\tmpreg, id_aa64dfr0_el1
+	sbfx	\tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
 	cmp	\tmpreg, #1			// Skip if no PMU present
 	b.lt	9000f
 	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index eecf7927dab08..f533305849d77 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -505,7 +505,7 @@ ENTRY(el2_setup)
 	 * kernel is intended to run at EL2.
 	 */
 	mrs	x2, id_aa64mmfr1_el1
-	ubfx	x2, x2, #8, #4
+	ubfx	x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
 #else
 	mov	x2, xzr
 #endif
@@ -538,7 +538,7 @@ set_hcr:
 #ifdef CONFIG_ARM_GIC_V3
 	/* GICv3 system register access */
 	mrs	x0, id_aa64pfr0_el1
-	ubfx	x0, x0, #24, #4
+	ubfx	x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
 	cbz	x0, 3f
 
 	mrs_s	x0, SYS_ICC_SRE_EL2
@@ -564,8 +564,8 @@ set_hcr:
 #endif
 
 	/* EL2 debug */
-	mrs	x1, id_aa64dfr0_el1		// Check ID_AA64DFR0_EL1 PMUVer
-	sbfx	x0, x1, #8, #4
+	mrs	x1, id_aa64dfr0_el1
+	sbfx	x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
 	cmp	x0, #1
 	b.lt	4f				// Skip if no PMU present
 	mrs	x0, pmcr_el0			// Disable debug access traps
@@ -574,7 +574,7 @@ set_hcr:
 	csel	x3, xzr, x0, lt			// all PMU counters from EL1
 
 	/* Statistical profiling */
-	ubfx	x0, x1, #32, #4			// Check ID_AA64DFR0_EL1 PMSVer
+	ubfx	x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
 	cbz	x0, 7f				// Skip if SPE not present
 	cbnz	x2, 6f				// VHE?
 	mrs_s	x4, SYS_PMBIDR_EL1		// If SPE available at EL2,
-- 
GitLab


From 212ac181c158c09038c474ba68068be49caecebb Mon Sep 17 00:00:00 2001
From: Zubin Mithra <zsm@chromium.org>
Date: Thu, 4 Apr 2019 14:33:55 -0700
Subject: [PATCH 0670/1861] ALSA: seq: Fix OOB-reads from strlcpy

When ioctl calls are made with non-null-terminated userspace strings,
strlcpy causes an OOB-read from within strlen. Fix by changing to use
strscpy instead.

Signed-off-by: Zubin Mithra <zsm@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_clientmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 7d4640d1fe9fb..38e7deab63847 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -1252,7 +1252,7 @@ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client,
 
 	/* fill the info fields */
 	if (client_info->name[0])
-		strlcpy(client->name, client_info->name, sizeof(client->name));
+		strscpy(client->name, client_info->name, sizeof(client->name));
 
 	client->filter = client_info->filter;
 	client->event_lost = client_info->event_lost;
@@ -1530,7 +1530,7 @@ static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg)
 	/* set queue name */
 	if (!info->name[0])
 		snprintf(info->name, sizeof(info->name), "Queue-%d", q->queue);
-	strlcpy(q->name, info->name, sizeof(q->name));
+	strscpy(q->name, info->name, sizeof(q->name));
 	snd_use_lock_free(&q->use_lock);
 
 	return 0;
@@ -1592,7 +1592,7 @@ static int snd_seq_ioctl_set_queue_info(struct snd_seq_client *client,
 		queuefree(q);
 		return -EPERM;
 	}
-	strlcpy(q->name, info->name, sizeof(q->name));
+	strscpy(q->name, info->name, sizeof(q->name));
 	queuefree(q);
 
 	return 0;
-- 
GitLab


From 95c5c618fa4349b2ba13aebeabf71911208dfc5e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 29 Mar 2019 09:21:37 +0300
Subject: [PATCH 0671/1861] irqchip/irq-ls1x: Missing error code in
 ls1x_intc_of_init()

Currently, when irq_domain_add_linear() fails, the error code does not get
set so it returns zero which is wrong.  Fix it by setting the appropriate
error code.

Fixes: 9e543e22e204 ("irqchip: Add driver for Loongson-1 interrupt controller")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: kernel-janitors@vger.kernel.org
Link: https://lkml.kernel.org/r/20190329062136.GQ32613@kadam
---
 drivers/irqchip/irq-ls1x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-ls1x.c b/drivers/irqchip/irq-ls1x.c
index 86b72fbd3b45d..353111a104133 100644
--- a/drivers/irqchip/irq-ls1x.c
+++ b/drivers/irqchip/irq-ls1x.c
@@ -130,6 +130,7 @@ static int __init ls1x_intc_of_init(struct device_node *node,
 					     NULL);
 	if (!priv->domain) {
 		pr_err("ls1x-irq: cannot add IRQ domain\n");
+		err = -ENOMEM;
 		goto out_iounmap;
 	}
 
-- 
GitLab


From e8458e7afa855317b14915d7b86ab3caceea7eb6 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 4 Apr 2019 15:45:12 +0800
Subject: [PATCH 0672/1861] genirq: Initialize request_mutex if
 CONFIG_SPARSE_IRQ=n

When CONFIG_SPARSE_IRQ is disable, the request_mutex in struct irq_desc
is not initialized which causes malfunction.

Fixes: 9114014cf4e6 ("genirq: Add mutex to irq desc to serialize request/free_irq()")
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20190404074512.145533-1-wangkefeng.wang@huawei.com
---
 kernel/irq/irqdesc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 13539e12cd803..9f8a709337cf8 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -558,6 +558,7 @@ int __init early_irq_init(void)
 		alloc_masks(&desc[i], node);
 		raw_spin_lock_init(&desc[i].lock);
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
+		mutex_init(&desc[i].request_mutex);
 		desc_set_defaults(i, &desc[i], node, NULL, NULL);
 	}
 	return arch_early_irq_init();
-- 
GitLab


From b5b447b6b4e899e0978b95cb42d272978f8c7b4d Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Mon, 11 Mar 2019 22:47:51 +0000
Subject: [PATCH 0673/1861] x86/entry: Remove unneeded need_resched() loop

Since the enabling and disabling of IRQs within preempt_schedule_irq() is
contained in a need_resched() loop, there is no need for the outer
architecture specific loop.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20190311224752.8337-14-valentin.schneider@arm.com
---
 arch/x86/entry/entry_32.S | 3 +--
 arch/x86/entry/entry_64.S | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d309f30cf7af8..b1856fe9decfa 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -766,13 +766,12 @@ END(ret_from_exception)
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
 	DISABLE_INTERRUPTS(CLBR_ANY)
-.Lneed_resched:
 	cmpl	$0, PER_CPU_VAR(__preempt_count)
 	jnz	restore_all_kernel
 	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz	restore_all_kernel
 	call	preempt_schedule_irq
-	jmp	.Lneed_resched
+	jmp	restore_all_kernel
 END(resume_kernel)
 #endif
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b6294..e7e270603fe7b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -645,10 +645,9 @@ retint_kernel:
 	/* Check if we need preemption */
 	btl	$9, EFLAGS(%rsp)		/* were interrupts off? */
 	jnc	1f
-0:	cmpl	$0, PER_CPU_VAR(__preempt_count)
+	cmpl	$0, PER_CPU_VAR(__preempt_count)
 	jnz	1f
 	call	preempt_schedule_irq
-	jmp	0b
 1:
 #endif
 	/*
-- 
GitLab


From b35f549df1d7520d37ba1e6d4a8d4df6bd52d136 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 7 Nov 2016 16:26:37 -0500
Subject: [PATCH 0674/1861] syscalls: Remove start and number from
 syscall_get_arguments() args

At Linux Plumbers, Andy Lutomirski approached me and pointed out that the
function call syscall_get_arguments() implemented in x86 was horribly
written and not optimized for the standard case of passing in 0 and 6 for
the starting index and the number of system calls to get. When looking at
all the users of this function, I discovered that all instances pass in only
0 and 6 for these arguments. Instead of having this function handle
different cases that are never used, simply rewrite it to return the first 6
arguments of a system call.

This should help out the performance of tracing system calls by ptrace,
ftrace and perf.

Link: http://lkml.kernel.org/r/20161107213233.754809394@goodmis.org

Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Dave Martin <dave.martin@arm.com>
Cc: "Dmitry V. Levin" <ldv@altlinux.org>
Cc: x86@kernel.org
Cc: linux-snps-arc@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Cc: linux-hexagon@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: nios2-dev@lists.rocketboards.org
Cc: openrisc@lists.librecores.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-um@lists.infradead.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linux-arch@vger.kernel.org
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts
Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes
Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits
Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86
Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arc/include/asm/syscall.h        |  7 ++-
 arch/arm/include/asm/syscall.h        | 23 ++-------
 arch/arm64/include/asm/syscall.h      | 22 ++------
 arch/c6x/include/asm/syscall.h        | 41 +++------------
 arch/csky/include/asm/syscall.h       | 14 ++---
 arch/h8300/include/asm/syscall.h      | 34 +++----------
 arch/hexagon/include/asm/syscall.h    |  4 +-
 arch/ia64/include/asm/syscall.h       |  5 +-
 arch/microblaze/include/asm/syscall.h |  4 +-
 arch/mips/include/asm/syscall.h       |  3 +-
 arch/mips/kernel/ptrace.c             |  2 +-
 arch/nds32/include/asm/syscall.h      | 33 +++---------
 arch/nios2/include/asm/syscall.h      | 42 +++------------
 arch/openrisc/include/asm/syscall.h   |  6 +--
 arch/parisc/include/asm/syscall.h     | 30 +++--------
 arch/powerpc/include/asm/syscall.h    |  8 ++-
 arch/riscv/include/asm/syscall.h      | 13 ++---
 arch/s390/include/asm/syscall.h       | 17 ++-----
 arch/sh/include/asm/syscall_32.h      | 26 +++-------
 arch/sh/include/asm/syscall_64.h      |  4 +-
 arch/sparc/include/asm/syscall.h      |  4 +-
 arch/um/include/asm/syscall-generic.h | 39 +++-----------
 arch/x86/include/asm/syscall.h        | 73 +++++++--------------------
 arch/xtensa/include/asm/syscall.h     | 16 ++----
 include/asm-generic/syscall.h         | 11 ++--
 include/trace/events/syscalls.h       |  2 +-
 kernel/seccomp.c                      |  2 +-
 kernel/trace/trace_syscalls.c         |  4 +-
 lib/syscall.c                         |  2 +-
 29 files changed, 113 insertions(+), 378 deletions(-)

diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 29de098043064..c7a4201ed62ba 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -55,12 +55,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
  */
 static inline void
 syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, unsigned long *args)
+		      unsigned long *args)
 {
 	unsigned long *inside_ptregs = &(regs->r0);
-	inside_ptregs -= i;
-
-	BUG_ON((i + n) > 6);
+	unsigned int n = 6;
+	unsigned int i = 0;
 
 	while (n--) {
 		args[i++] = (*inside_ptregs);
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 06dea6bce293b..db969a2972aed 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -55,29 +55,12 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
-		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
-		pr_warn("%s called with max args %d, handling only %d\n",
-			__func__, i + n, SYSCALL_MAX_ARGS);
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		args[0] = regs->ARM_ORIG_r0;
-		args++;
-		i++;
-		n--;
-	}
+	args[0] = regs->ARM_ORIG_r0;
+	args++;
 
-	memcpy(args, &regs->ARM_r0 + i, n * sizeof(args[0]));
+	memcpy(args, &regs->ARM_r0 + 1, 5 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index ad8be16a39c9d..55b2dab210234 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -65,28 +65,12 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
-		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-	}
-
-	if (i == 0) {
-		args[0] = regs->orig_x0;
-		args++;
-		i++;
-		n--;
-	}
+	args[0] = regs->orig_x0;
+	args++;
 
-	memcpy(args, &regs->regs[i], n * sizeof(args[0]));
+	memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index ae2be315ee9c9..06db3251926b9 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -46,40 +46,15 @@ static inline void syscall_set_return_value(struct task_struct *task,
 }
 
 static inline void syscall_get_arguments(struct task_struct *task,
-					 struct pt_regs *regs, unsigned int i,
-					 unsigned int n, unsigned long *args)
+					 struct pt_regs *regs,
+					 unsigned long *args)
 {
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		*args++ = regs->a4;
-	case 1:
-		if (!n--)
-			break;
-		*args++ = regs->b4;
-	case 2:
-		if (!n--)
-			break;
-		*args++ = regs->a6;
-	case 3:
-		if (!n--)
-			break;
-		*args++ = regs->b6;
-	case 4:
-		if (!n--)
-			break;
-		*args++ = regs->a8;
-	case 5:
-		if (!n--)
-			break;
-		*args++ = regs->b8;
-	case 6:
-		if (!n--)
-			break;
-	default:
-		BUG();
-	}
+	*args++ = regs->a4;
+	*args++ = regs->b4;
+	*args++ = regs->a6;
+	*args++ = regs->b6;
+	*args++ = regs->a8;
+	*args   = regs->b8;
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index 9a9cd81e66c11..c691fe92edc6e 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -43,17 +43,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
 
 static inline void
 syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, unsigned long *args)
+		      unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	if (i == 0) {
-		args[0] = regs->orig_a0;
-		args++;
-		n--;
-	} else {
-		i--;
-	}
-	memcpy(args, &regs->a1 + i, n * sizeof(args[0]));
+	args[0] = regs->orig_a0;
+	args++;
+	memcpy(args, &regs->a1, 5 * sizeof(args[0]));
 }
 
 static inline void
diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h
index 9249904012371..ddd483c6ca95c 100644
--- a/arch/h8300/include/asm/syscall.h
+++ b/arch/h8300/include/asm/syscall.h
@@ -17,34 +17,14 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 
 static inline void
 syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, unsigned long *args)
+		      unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	while (n > 0) {
-		switch (i) {
-		case 0:
-			*args++ = regs->er1;
-			break;
-		case 1:
-			*args++ = regs->er2;
-			break;
-		case 2:
-			*args++ = regs->er3;
-			break;
-		case 3:
-			*args++ = regs->er4;
-			break;
-		case 4:
-			*args++ = regs->er5;
-			break;
-		case 5:
-			*args++ = regs->er6;
-			break;
-		}
-		i++;
-		n--;
-	}
+	*args++ = regs->er1;
+	*args++ = regs->er2;
+	*args++ = regs->er3;
+	*args++ = regs->er4;
+	*args++ = regs->er5;
+	*args   = regs->er6;
 }
 
 
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index 4af9c7b6f13af..ae3a1e24fabd7 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -37,10 +37,8 @@ static inline long syscall_get_nr(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(args, &(&regs->r00)[i], n * sizeof(args[0]));
+	memcpy(args, &(&regs->r00)[0], 6 * sizeof(args[0]));
 }
 #endif
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 1d0b875fec44f..8204c1ff70ce4 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -63,12 +63,9 @@ extern void ia64_syscall_get_set_arguments(struct task_struct *task,
 	unsigned long *args, int rw);
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+	ia64_syscall_get_set_arguments(task, regs, 0, 6, args, 0);
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 220decd605a4a..4b23e44e5c3c0 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -82,9 +82,11 @@ static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
+	unsigned int i = 0;
+	unsigned int n = 6;
+
 	while (n--)
 		*args++ = microblaze_get_syscall_arg(regs, i++);
 }
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 6cf8ffb5367ec..a2b4748655df4 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -116,9 +116,10 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
+	unsigned int i = 0;
+	unsigned int n = 6;
 	int ret;
 
 	/* O32 ABI syscall() */
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0057c910bc2f3..3a62f80958e17 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1419,7 +1419,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
 
 		sd.nr = syscall;
 		sd.arch = syscall_get_arch();
-		syscall_get_arguments(current, regs, 0, 6, args);
+		syscall_get_arguments(current, regs, args);
 		for (i = 0; i < 6; i++)
 			sd.args[i] = args[i];
 		sd.instruction_pointer = KSTK_EIP(current);
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index f7e5e86765fe8..89a6ec8731d8b 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -108,42 +108,21 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
  * syscall_get_arguments - extract system call parameter values
  * @task:	task of interest, must be blocked
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array filled with argument values
  *
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5).  Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call (from 0 through 5). The first
+ * argument is stored in @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 #define SYSCALL_MAX_ARGS 6
 void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n, unsigned long *args)
+			   unsigned long *args)
 {
-	if (n == 0)
-		return;
-	if (i + n > SYSCALL_MAX_ARGS) {
-		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
-		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-	}
-
-	if (i == 0) {
-		args[0] = regs->orig_r0;
-		args++;
-		i++;
-		n--;
-	}
-
-	memcpy(args, &regs->uregs[0] + i, n * sizeof(args[0]));
+	args[0] = regs->orig_r0;
+	args++;
+	memcpy(args, &regs->uregs[0] + 1, 5 * sizeof(args[0]));
 }
 
 /**
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index 9de220854c4ad..792bd449d8394 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -58,42 +58,14 @@ static inline void syscall_set_return_value(struct task_struct *task,
 }
 
 static inline void syscall_get_arguments(struct task_struct *task,
-	struct pt_regs *regs, unsigned int i, unsigned int n,
-	unsigned long *args)
+	struct pt_regs *regs, unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		*args++ = regs->r4;
-	case 1:
-		if (!n--)
-			break;
-		*args++ = regs->r5;
-	case 2:
-		if (!n--)
-			break;
-		*args++ = regs->r6;
-	case 3:
-		if (!n--)
-			break;
-		*args++ = regs->r7;
-	case 4:
-		if (!n--)
-			break;
-		*args++ = regs->r8;
-	case 5:
-		if (!n--)
-			break;
-		*args++ = regs->r9;
-	case 6:
-		if (!n--)
-			break;
-	default:
-		BUG();
-	}
+	*args++ = regs->r4;
+	*args++ = regs->r5;
+	*args++ = regs->r6;
+	*args++ = regs->r7;
+	*args++ = regs->r8;
+	*args   = regs->r9;
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index 2db9f1cf0694c..72607860cd559 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -56,11 +56,9 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
 
 static inline void
 syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, unsigned long *args)
+		      unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	memcpy(args, &regs->gpr[3 + i], n * sizeof(args[0]));
+	memcpy(args, &regs->gpr[3], 6 * sizeof(args[0]));
 }
 
 static inline void
diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index 8bff1a58c97f1..62a6d477fae01 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h
@@ -18,29 +18,15 @@ static inline long syscall_get_nr(struct task_struct *tsk,
 }
 
 static inline void syscall_get_arguments(struct task_struct *tsk,
-					 struct pt_regs *regs, unsigned int i,
-					 unsigned int n, unsigned long *args)
+					 struct pt_regs *regs,
+					 unsigned long *args)
 {
-	BUG_ON(i);
-
-	switch (n) {
-	case 6:
-		args[5] = regs->gr[21];
-	case 5:
-		args[4] = regs->gr[22];
-	case 4:
-		args[3] = regs->gr[23];
-	case 3:
-		args[2] = regs->gr[24];
-	case 2:
-		args[1] = regs->gr[25];
-	case 1:
-		args[0] = regs->gr[26];
-	case 0:
-		break;
-	default:
-		BUG();
-	}
+	args[5] = regs->gr[21];
+	args[4] = regs->gr[22];
+	args[3] = regs->gr[23];
+	args[2] = regs->gr[24];
+	args[1] = regs->gr[25];
+	args[0] = regs->gr[26];
 }
 
 static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 1a0e7a8b1c811..5c9b9dc82b7ea 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -65,22 +65,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	unsigned long val, mask = -1UL;
-
-	BUG_ON(i + n > 6);
+	unsigned int n = 6;
 
 #ifdef CONFIG_COMPAT
 	if (test_tsk_thread_flag(task, TIF_32BIT))
 		mask = 0xffffffff;
 #endif
 	while (n--) {
-		if (n == 0 && i == 0)
+		if (n == 0)
 			val = regs->orig_gpr3;
 		else
-			val = regs->gpr[3 + i + n];
+			val = regs->gpr[3 + n];
 
 		args[n] = val & mask;
 	}
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 6ea9e18042331..6adca1804be10 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -72,18 +72,11 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	if (i == 0) {
-		args[0] = regs->orig_a0;
-		args++;
-		n--;
-	} else {
-		i--;
-	}
-	memcpy(args, &regs->a1 + i, n * sizeof(args[0]));
+	args[0] = regs->orig_a0;
+	args++;
+	memcpy(args, &regs->a1, 5 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 96f9a9151fde0..ee0b1f6aa36d3 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -56,27 +56,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	unsigned long mask = -1UL;
+	unsigned int n = 6;
 
-	/*
-	 * No arguments for this syscall, there's nothing to do.
-	 */
-	if (!n)
-		return;
-
-	BUG_ON(i + n > 6);
 #ifdef CONFIG_COMPAT
 	if (test_tsk_thread_flag(task, TIF_31BIT))
 		mask = 0xffffffff;
 #endif
 	while (n-- > 0)
-		if (i + n > 0)
-			args[n] = regs->gprs[2 + i + n] & mask;
-	if (i == 0)
-		args[0] = regs->orig_gpr2 & mask;
+		if (n > 0)
+			args[n] = regs->gprs[2 + n] & mask;
+
+	args[0] = regs->orig_gpr2 & mask;
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 6e118799831c3..2bf1199a05953 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -48,30 +48,16 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	/*
-	 * Do this simply for now. If we need to start supporting
-	 * fetching arguments from arbitrary indices, this will need some
-	 * extra logic. Presently there are no in-tree users that depend
-	 * on this behaviour.
-	 */
-	BUG_ON(i);
 
 	/* Argument pattern is: R4, R5, R6, R7, R0, R1 */
-	switch (n) {
-	case 6: args[5] = regs->regs[1];
-	case 5: args[4] = regs->regs[0];
-	case 4: args[3] = regs->regs[7];
-	case 3: args[2] = regs->regs[6];
-	case 2: args[1] = regs->regs[5];
-	case 1:	args[0] = regs->regs[4];
-	case 0:
-		break;
-	default:
-		BUG();
-	}
+	args[5] = regs->regs[1];
+	args[4] = regs->regs[0];
+	args[3] = regs->regs[7];
+	args[2] = regs->regs[6];
+	args[1] = regs->regs[5];
+	args[0] = regs->regs[4];
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
index 43882580c7f99..4e8f6460c703c 100644
--- a/arch/sh/include/asm/syscall_64.h
+++ b/arch/sh/include/asm/syscall_64.h
@@ -47,11 +47,9 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(args, &regs->regs[2 + i], n * sizeof(args[0]));
+	memcpy(args, &regs->regs[2], 6 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 053989e3f6a6f..872dfee852d64 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -96,11 +96,11 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	int zero_extend = 0;
 	unsigned int j;
+	unsigned int n = 6;
 
 #ifdef CONFIG_SPARC64
 	if (test_tsk_thread_flag(task, TIF_32BIT))
@@ -108,7 +108,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
 #endif
 
 	for (j = 0; j < n; j++) {
-		unsigned long val = regs->u_regs[UREG_I0 + i + j];
+		unsigned long val = regs->u_regs[UREG_I0 + j];
 
 		if (zero_extend)
 			args[j] = (u32) val;
diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h
index 9fb9cf8cd39a3..25d00acd1322c 100644
--- a/arch/um/include/asm/syscall-generic.h
+++ b/arch/um/include/asm/syscall-generic.h
@@ -53,43 +53,16 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	const struct uml_pt_regs *r = &regs->regs;
 
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG1(r);
-	case 1:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG2(r);
-	case 2:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG3(r);
-	case 3:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG4(r);
-	case 4:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG5(r);
-	case 5:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG6(r);
-	case 6:
-		if (!n--)
-			break;
-	default:
-		BUG();
-		break;
-	}
+	*args++ = UPT_SYSCALL_ARG1(r);
+	*args++ = UPT_SYSCALL_ARG2(r);
+	*args++ = UPT_SYSCALL_ARG3(r);
+	*args++ = UPT_SYSCALL_ARG4(r);
+	*args++ = UPT_SYSCALL_ARG5(r);
+	*args   = UPT_SYSCALL_ARG6(r);
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d653139857af2..8dbb5c379450b 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -91,11 +91,9 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+	memcpy(args, &regs->bx, 6 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
@@ -116,63 +114,26 @@ static inline int syscall_get_arch(void)
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task->thread_info.status & TS_COMPAT)
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			*args++ = regs->bx;
-		case 1:
-			if (!n--) break;
-			*args++ = regs->cx;
-		case 2:
-			if (!n--) break;
-			*args++ = regs->dx;
-		case 3:
-			if (!n--) break;
-			*args++ = regs->si;
-		case 4:
-			if (!n--) break;
-			*args++ = regs->di;
-		case 5:
-			if (!n--) break;
-			*args++ = regs->bp;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
-	else
+	if (task->thread_info.status & TS_COMPAT) {
+		*args++ = regs->bx;
+		*args++ = regs->cx;
+		*args++ = regs->dx;
+		*args++ = regs->si;
+		*args++ = regs->di;
+		*args   = regs->bp;
+	} else
 # endif
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			*args++ = regs->di;
-		case 1:
-			if (!n--) break;
-			*args++ = regs->si;
-		case 2:
-			if (!n--) break;
-			*args++ = regs->dx;
-		case 3:
-			if (!n--) break;
-			*args++ = regs->r10;
-		case 4:
-			if (!n--) break;
-			*args++ = regs->r8;
-		case 5:
-			if (!n--) break;
-			*args++ = regs->r9;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
+	{
+		*args++ = regs->di;
+		*args++ = regs->si;
+		*args++ = regs->dx;
+		*args++ = regs->r10;
+		*args++ = regs->r8;
+		*args   = regs->r9;
+	}
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index a168bf81c7f47..1504ce9a233ad 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -59,23 +59,13 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
-	unsigned int j;
-
-	if (n == 0)
-		return;
-
-	WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS);
+	unsigned int i;
 
-	for (j = 0; j < n; ++j) {
-		if (i + j < SYSCALL_MAX_ARGS)
-			args[j] = regs->areg[reg[i + j]];
-		else
-			args[j] = 0;
-	}
+	for (i = 0; i < 6; ++i)
+		args[i] = regs->areg[reg[i]];
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 0c938a4354f6f..269e9412ef421 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -105,21 +105,16 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
  * syscall_get_arguments - extract system call parameter values
  * @task:	task of interest, must be blocked
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array filled with argument values
  *
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5).  Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call.  First argument is stored in
+*  @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n, unsigned long *args);
+			   unsigned long *args);
 
 /**
  * syscall_set_arguments - change system call parameter value
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index 44a3259ed4a5b..b6e0cbc2c71f1 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -28,7 +28,7 @@ TRACE_EVENT_FN(sys_enter,
 
 	TP_fast_assign(
 		__entry->id	= id;
-		syscall_get_arguments(current, regs, 0, 6, __entry->args);
+		syscall_get_arguments(current, regs, __entry->args);
 	),
 
 	TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)",
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54a0347ca8128..df27e499956a1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -149,7 +149,7 @@ static void populate_seccomp_data(struct seccomp_data *sd)
 
 	sd->nr = syscall_get_nr(task, regs);
 	sd->arch = syscall_get_arch();
-	syscall_get_arguments(task, regs, 0, 6, args);
+	syscall_get_arguments(task, regs, args);
 	sd->args[0] = args[0];
 	sd->args[1] = args[1];
 	sd->args[2] = args[2];
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index e9f5bbbad6d94..fa8fbff736d68 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -348,7 +348,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 
 	entry = ring_buffer_event_data(event);
 	entry->nr = syscall_nr;
-	syscall_get_arguments(current, regs, 0, 6, args);
+	syscall_get_arguments(current, regs, args);
 	memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
 
 	event_trigger_unlock_commit(trace_file, buffer, event, entry,
@@ -616,7 +616,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 		return;
 
 	rec->nr = syscall_nr;
-	syscall_get_arguments(current, regs, 0, 6, args);
+	syscall_get_arguments(current, regs, args);
 	memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
 
 	if ((valid_prog_array &&
diff --git a/lib/syscall.c b/lib/syscall.c
index e8467e17b9a20..fb328e7ccb089 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -27,7 +27,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
 
 	info->data.nr = syscall_get_nr(target, regs);
 	if (info->data.nr != -1L)
-		syscall_get_arguments(target, regs, 0, 6,
+		syscall_get_arguments(target, regs,
 				      (unsigned long *)&info->data.args[0]);
 
 	put_task_stack(target);
-- 
GitLab


From 32d92586629a8b3637a3c9361709818e25f327ad Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 27 Mar 2019 20:07:31 -0400
Subject: [PATCH 0675/1861] syscalls: Remove start and number from
 syscall_set_arguments() args

After removing the start and count arguments of syscall_get_arguments() it
seems reasonable to remove them from syscall_set_arguments(). Note, as of
today, there are no users of syscall_set_arguments(). But we are told that
there will be soon. But for now, at least make it consistent with
syscall_get_arguments().

Link: http://lkml.kernel.org/r/20190327222014.GA32540@altlinux.org

Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Dave Martin <dave.martin@arm.com>
Cc: "Dmitry V. Levin" <ldv@altlinux.org>
Cc: x86@kernel.org
Cc: linux-snps-arc@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Cc: linux-hexagon@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: nios2-dev@lists.rocketboards.org
Cc: openrisc@lists.librecores.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-um@lists.infradead.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linux-arch@vger.kernel.org
Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes
Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits
Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86
Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arm/include/asm/syscall.h        | 22 ++-------
 arch/arm64/include/asm/syscall.h      | 22 ++-------
 arch/c6x/include/asm/syscall.h        | 38 +++------------
 arch/csky/include/asm/syscall.h       | 14 ++----
 arch/ia64/include/asm/syscall.h       | 10 ++--
 arch/ia64/kernel/ptrace.c             |  7 ++-
 arch/microblaze/include/asm/syscall.h |  4 +-
 arch/nds32/include/asm/syscall.h      | 29 ++---------
 arch/nios2/include/asm/syscall.h      | 42 +++-------------
 arch/openrisc/include/asm/syscall.h   |  6 +--
 arch/powerpc/include/asm/syscall.h    |  7 +--
 arch/riscv/include/asm/syscall.h      | 13 ++---
 arch/s390/include/asm/syscall.h       | 11 ++---
 arch/sh/include/asm/syscall_32.h      | 21 +++-----
 arch/sh/include/asm/syscall_64.h      |  4 +-
 arch/sparc/include/asm/syscall.h      |  7 ++-
 arch/um/include/asm/syscall-generic.h | 39 +++------------
 arch/x86/include/asm/syscall.h        | 69 +++++++--------------------
 arch/xtensa/include/asm/syscall.h     | 17 ++-----
 include/asm-generic/syscall.h         | 10 +---
 20 files changed, 88 insertions(+), 304 deletions(-)

diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index db969a2972aed..080ce70cab12a 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -65,26 +65,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		pr_warn("%s called with max args %d, handling only %d\n",
-			__func__, i + n, SYSCALL_MAX_ARGS);
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		regs->ARM_ORIG_r0 = args[0];
-		args++;
-		i++;
-		n--;
-	}
-
-	memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
+	regs->ARM_ORIG_r0 = args[0];
+	args++;
+
+	memcpy(&regs->ARM_r0 + 1, args, 5 * sizeof(args[0]));
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 55b2dab210234..a179df3674a1a 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -75,26 +75,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		regs->orig_x0 = args[0];
-		args++;
-		i++;
-		n--;
-	}
-
-	memcpy(&regs->regs[i], args, n * sizeof(args[0]));
+	regs->orig_x0 = args[0];
+	args++;
+
+	memcpy(&regs->regs[1], args, 5 * sizeof(args[0]));
 }
 
 /*
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index 06db3251926b9..15ba8599858e6 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -59,40 +59,14 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		regs->a4 = *args++;
-	case 1:
-		if (!n--)
-			break;
-		regs->b4 = *args++;
-	case 2:
-		if (!n--)
-			break;
-		regs->a6 = *args++;
-	case 3:
-		if (!n--)
-			break;
-		regs->b6 = *args++;
-	case 4:
-		if (!n--)
-			break;
-		regs->a8 = *args++;
-	case 5:
-		if (!n--)
-			break;
-		regs->a9 = *args++;
-	case 6:
-		if (!n)
-			break;
-	default:
-		BUG();
-	}
+	regs->a4 = *args++;
+	regs->b4 = *args++;
+	regs->a6 = *args++;
+	regs->b6 = *args++;
+	regs->a8 = *args++;
+	regs->a9 = *args;
 }
 
 #endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index c691fe92edc6e..bda0a446c63ea 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -52,17 +52,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
 
 static inline void
 syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, const unsigned long *args)
+		      const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	if (i == 0) {
-		regs->orig_a0 = args[0];
-		args++;
-		n--;
-	} else {
-		i--;
-	}
-	memcpy(&regs->a1 + i, args, n * sizeof(regs->a1));
+	regs->orig_a0 = args[0];
+	args++;
+	memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
 }
 
 static inline int
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 8204c1ff70ce4..0d9e7fab4a79f 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -59,23 +59,19 @@ static inline void syscall_set_return_value(struct task_struct *task,
 }
 
 extern void ia64_syscall_get_set_arguments(struct task_struct *task,
-	struct pt_regs *regs, unsigned int i, unsigned int n,
-	unsigned long *args, int rw);
+	struct pt_regs *regs, unsigned long *args, int rw);
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
 					 unsigned long *args)
 {
-	ia64_syscall_get_set_arguments(task, regs, 0, 6, args, 0);
+	ia64_syscall_get_set_arguments(task, regs, args, 0);
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+	ia64_syscall_get_set_arguments(task, regs, args, 1);
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 6d50ede0ed691..bf9c24d9ce84e 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -2179,12 +2179,11 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
 }
 
 void ia64_syscall_get_set_arguments(struct task_struct *task,
-	struct pt_regs *regs, unsigned int i, unsigned int n,
-	unsigned long *args, int rw)
+	struct pt_regs *regs, unsigned long *args, int rw)
 {
 	struct syscall_get_set_args data = {
-		.i = i,
-		.n = n,
+		.i = 0,
+		.n = 6,
 		.args = args,
 		.regs = regs,
 		.rw = rw,
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 4b23e44e5c3c0..833d3a53dab30 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -93,9 +93,11 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
+	unsigned int i = 0;
+	unsigned int n = 6;
+
 	while (n--)
 		microblaze_set_syscall_arg(regs, i++, *args++);
 }
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index 89a6ec8731d8b..671ebd357496c 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -129,39 +129,20 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
  * syscall_set_arguments - change system call parameter value
  * @task:	task of interest, must be in system call entry tracing
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array of argument values to store
  *
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call. The first argument gets value
+ * @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n,
 			   const unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		pr_warn("%s called with max args %d, handling only %d\n",
-			__func__, i + n, SYSCALL_MAX_ARGS);
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		regs->orig_r0 = args[0];
-		args++;
-		i++;
-		n--;
-	}
+	regs->orig_r0 = args[0];
+	args++;
 
-	memcpy(&regs->uregs[0] + i, args, n * sizeof(args[0]));
+	memcpy(&regs->uregs[0] + 1, args, 5 * sizeof(args[0]));
 }
 #endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index 792bd449d8394..d7624ed06efb6 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -69,42 +69,14 @@ static inline void syscall_get_arguments(struct task_struct *task,
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
-	struct pt_regs *regs, unsigned int i, unsigned int n,
-	const unsigned long *args)
+	struct pt_regs *regs, const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		regs->r4 = *args++;
-	case 1:
-		if (!n--)
-			break;
-		regs->r5 = *args++;
-	case 2:
-		if (!n--)
-			break;
-		regs->r6 = *args++;
-	case 3:
-		if (!n--)
-			break;
-		regs->r7 = *args++;
-	case 4:
-		if (!n--)
-			break;
-		regs->r8 = *args++;
-	case 5:
-		if (!n--)
-			break;
-		regs->r9 = *args++;
-	case 6:
-		if (!n)
-			break;
-	default:
-		BUG();
-	}
+	regs->r4 = *args++;
+	regs->r5 = *args++;
+	regs->r6 = *args++;
+	regs->r7 = *args++;
+	regs->r8 = *args++;
+	regs->r9 = *args;
 }
 
 #endif
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index 72607860cd559..b4ff07c1baed5 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -63,11 +63,9 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
 
 static inline void
 syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, const unsigned long *args)
+		      const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+	memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 5c9b9dc82b7ea..1243045bad2d6 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -86,15 +86,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+	memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
 
 	/* Also copy the first argument into orig_gpr3 */
-	if (i == 0 && n > 0)
-		regs->orig_gpr3 = args[0];
+	regs->orig_gpr3 = args[0];
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 6adca1804be10..a3d5273ded7c6 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -81,18 +81,11 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-        if (i == 0) {
-                regs->orig_a0 = args[0];
-                args++;
-                n--;
-	} else {
-		i--;
-	}
-	memcpy(&regs->a1 + i, args, n * sizeof(regs->a1));
+	regs->orig_a0 = args[0];
+	args++;
+	memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index ee0b1f6aa36d3..59c3e91f2cdb6 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -74,15 +74,14 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
+	unsigned int n = 6;
+
 	while (n-- > 0)
-		if (i + n > 0)
-			regs->gprs[2 + i + n] = args[n];
-	if (i == 0)
-		regs->orig_gpr2 = args[0];
+		if (n > 0)
+			regs->gprs[2 + n] = args[n];
+	regs->orig_gpr2 = args[0];
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 2bf1199a05953..8c9d7e5e5dcc0 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -62,23 +62,14 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	/* Same note as above applies */
-	BUG_ON(i);
-
-	switch (n) {
-	case 6: regs->regs[1] = args[5];
-	case 5: regs->regs[0] = args[4];
-	case 4: regs->regs[7] = args[3];
-	case 3: regs->regs[6] = args[2];
-	case 2: regs->regs[5] = args[1];
-	case 1: regs->regs[4] = args[0];
-		break;
-	default:
-		BUG();
-	}
+	regs->regs[1] = args[5];
+	regs->regs[0] = args[4];
+	regs->regs[7] = args[3];
+	regs->regs[6] = args[2];
+	regs->regs[5] = args[1];
+	regs->regs[4] = args[0];
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
index 4e8f6460c703c..22fad97da0661 100644
--- a/arch/sh/include/asm/syscall_64.h
+++ b/arch/sh/include/asm/syscall_64.h
@@ -54,11 +54,9 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(&regs->regs[2 + i], args, n * sizeof(args[0]));
+	memcpy(&regs->regs[2], args, 6 * sizeof(args[0]));
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 872dfee852d64..4d075434e8164 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -119,13 +119,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	unsigned int j;
+	unsigned int i;
 
-	for (j = 0; j < n; j++)
-		regs->u_regs[UREG_I0 + i + j] = args[j];
+	for (i = 0; i < 6; i++)
+		regs->u_regs[UREG_I0 + i] = args[i];
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h
index 25d00acd1322c..98e50c50c12ef 100644
--- a/arch/um/include/asm/syscall-generic.h
+++ b/arch/um/include/asm/syscall-generic.h
@@ -67,43 +67,16 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
 	struct uml_pt_regs *r = &regs->regs;
 
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG1(r) = *args++;
-	case 1:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG2(r) = *args++;
-	case 2:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG3(r) = *args++;
-	case 3:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG4(r) = *args++;
-	case 4:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG5(r) = *args++;
-	case 5:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG6(r) = *args++;
-	case 6:
-		if (!n--)
-			break;
-	default:
-		BUG();
-		break;
-	}
+	UPT_SYSCALL_ARG1(r) = *args++;
+	UPT_SYSCALL_ARG2(r) = *args++;
+	UPT_SYSCALL_ARG3(r) = *args++;
+	UPT_SYSCALL_ARG4(r) = *args++;
+	UPT_SYSCALL_ARG5(r) = *args++;
+	UPT_SYSCALL_ARG6(r) = *args;
 }
 
 /* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 8dbb5c379450b..4c305471ec331 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -138,63 +138,26 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task->thread_info.status & TS_COMPAT)
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			regs->bx = *args++;
-		case 1:
-			if (!n--) break;
-			regs->cx = *args++;
-		case 2:
-			if (!n--) break;
-			regs->dx = *args++;
-		case 3:
-			if (!n--) break;
-			regs->si = *args++;
-		case 4:
-			if (!n--) break;
-			regs->di = *args++;
-		case 5:
-			if (!n--) break;
-			regs->bp = *args++;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
-	else
+	if (task->thread_info.status & TS_COMPAT) {
+		regs->bx = *args++;
+		regs->cx = *args++;
+		regs->dx = *args++;
+		regs->si = *args++;
+		regs->di = *args++;
+		regs->bp = *args;
+	} else
 # endif
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			regs->di = *args++;
-		case 1:
-			if (!n--) break;
-			regs->si = *args++;
-		case 2:
-			if (!n--) break;
-			regs->dx = *args++;
-		case 3:
-			if (!n--) break;
-			regs->r10 = *args++;
-		case 4:
-			if (!n--) break;
-			regs->r8 = *args++;
-		case 5:
-			if (!n--) break;
-			regs->r9 = *args++;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
+	{
+		regs->di = *args++;
+		regs->si = *args++;
+		regs->dx = *args++;
+		regs->r10 = *args++;
+		regs->r8 = *args++;
+		regs->r9 = *args;
+	}
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 1504ce9a233ad..91dc06d580603 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -70,24 +70,13 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
 	static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
-	unsigned int j;
-
-	if (n == 0)
-		return;
-
-	if (WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS)) {
-		if (i < SYSCALL_MAX_ARGS)
-			n = SYSCALL_MAX_ARGS - i;
-		else
-			return;
-	}
+	unsigned int i;
 
-	for (j = 0; j < n; ++j)
-		regs->areg[reg[i + j]] = args[j];
+	for (i = 0; i < 6; ++i)
+		regs->areg[reg[i]] = args[i];
 }
 
 asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 269e9412ef421..b88239e9efe49 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -120,21 +120,15 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
  * syscall_set_arguments - change system call parameter value
  * @task:	task of interest, must be in system call entry tracing
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array of argument values to store
  *
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call.
+ * The first argument gets value @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n,
 			   const unsigned long *args);
 
 /**
-- 
GitLab


From f0d1762554014ce0ae347b9f0d088f2c157c8c72 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 5 Apr 2019 10:14:58 +0800
Subject: [PATCH 0676/1861] paride/pcd: Fix potential NULL pointer dereference
 and mem leak

Syzkaller report this:

pcd: pcd version 1.07, major 46, nice 0
pcd0: Autoprobe failed
pcd: No CD-ROM drive found
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN PTI
CPU: 1 PID: 4525 Comm: syz-executor.0 Not tainted 5.1.0-rc3+ #8
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:pcd_init+0x95c/0x1000 [pcd]
Code: c4 ab f7 48 89 d8 48 c1 e8 03 80 3c 28 00 74 08 48 89 df e8 56 a3 da f7 4c 8b 23 49 8d bc 24 80 05 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 74 05 e8 39 a3 da f7 49 8b bc 24 80 05 00 00 e8 cc b2
RSP: 0018:ffff8881e84df880 EFLAGS: 00010202
RAX: 00000000000000b0 RBX: ffffffffc155a088 RCX: ffffffffc1508935
RDX: 0000000000040000 RSI: ffffc900014f0000 RDI: 0000000000000580
RBP: dffffc0000000000 R08: ffffed103ee658b8 R09: ffffed103ee658b8
R10: 0000000000000001 R11: ffffed103ee658b7 R12: 0000000000000000
R13: ffffffffc155a778 R14: ffffffffc155a4a8 R15: 0000000000000003
FS:  00007fe71bee3700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055a7334441a8 CR3: 00000001e9674003 CR4: 00000000007606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 ? 0xffffffffc1508000
 ? 0xffffffffc1508000
 do_one_initcall+0xbc/0x47d init/main.c:901
 do_init_module+0x1b5/0x547 kernel/module.c:3456
 load_module+0x6405/0x8c10 kernel/module.c:3804
 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898
 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fe71bee2c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99
RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003
RBP: 00007fe71bee2c70 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe71bee36bc
R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004
Modules linked in: pcd(+) paride solos_pci atm ts_fsm rtc_mt6397 mac80211 nhc_mobility nhc_udp nhc_ipv6 nhc_hop nhc_dest nhc_fragment nhc_routing 6lowpan rtc_cros_ec memconsole intel_xhci_usb_role_switch roles rtc_wm8350 usbcore industrialio_triggered_buffer kfifo_buf industrialio asc7621 dm_era dm_persistent_data dm_bufio dm_mod tpm gnss_ubx gnss_serial serdev gnss max2165 cpufreq_dt hid_penmount hid menf21bmc_wdt rc_core n_tracesink ide_gd_mod cdns_csi2tx v4l2_fwnode videodev media pinctrl_lewisburg pinctrl_intel iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 crypto_simd
 ide_pci_generic piix input_leds cryptd glue_helper psmouse ide_core intel_agp serio_raw intel_gtt ata_generic i2c_piix4 agpgart pata_acpi parport_pc parport floppy rtc_cmos sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: bmc150_magn]
Dumping ftrace buffer:
   (ftrace buffer empty)
---[ end trace d873691c3cd69f56 ]---

If alloc_disk fails in pcd_init_units, cd->disk will be
NULL, however in pcd_detect and pcd_exit, it's not check
this before free.It may result a NULL pointer dereference.

Also when register_blkdev failed, blk_cleanup_queue() and
blk_mq_free_tag_set() should be called to free resources.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 81b74ac68c28 ("paride/pcd: cleanup queues when detection fails")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/paride/pcd.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 377a694dc2281..6d415b20fb706 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -314,6 +314,7 @@ static void pcd_init_units(void)
 		disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops,
 						   1, BLK_MQ_F_SHOULD_MERGE);
 		if (IS_ERR(disk->queue)) {
+			put_disk(disk);
 			disk->queue = NULL;
 			continue;
 		}
@@ -750,6 +751,8 @@ static int pcd_detect(void)
 
 	printk("%s: No CD-ROM drive found\n", name);
 	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+		if (!cd->disk)
+			continue;
 		blk_cleanup_queue(cd->disk->queue);
 		cd->disk->queue = NULL;
 		blk_mq_free_tag_set(&cd->tag_set);
@@ -1010,8 +1013,14 @@ static int __init pcd_init(void)
 	pcd_probe_capabilities();
 
 	if (register_blkdev(major, name)) {
-		for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
+		for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+			if (!cd->disk)
+				continue;
+
+			blk_cleanup_queue(cd->disk->queue);
+			blk_mq_free_tag_set(&cd->tag_set);
 			put_disk(cd->disk);
+		}
 		return -EBUSY;
 	}
 
@@ -1032,6 +1041,9 @@ static void __exit pcd_exit(void)
 	int unit;
 
 	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+		if (!cd->disk)
+			continue;
+
 		if (cd->present) {
 			del_gendisk(cd->disk);
 			pi_release(cd->pi);
-- 
GitLab


From fd9c40f64c514bdc585a21e2e33fa5f83ca8811b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 4 Apr 2019 10:08:43 -0700
Subject: [PATCH 0677/1861] block: Revert v5.0 blk_mq_request_issue_directly()
 changes

blk_mq_try_issue_directly() can return BLK_STS*_RESOURCE for requests that
have been queued. If that happens when blk_mq_try_issue_directly() is called
by the dm-mpath driver then dm-mpath will try to resubmit a request that is
already queued and a kernel crash follows. Since it is nontrivial to fix
blk_mq_request_issue_directly(), revert the blk_mq_request_issue_directly()
changes that went into kernel v5.0.

This patch reverts the following commits:
* d6a51a97c0b2 ("blk-mq: replace and kill blk_mq_request_issue_directly") # v5.0.
* 5b7a6f128aad ("blk-mq: issue directly with bypass 'false' in blk_mq_sched_insert_requests") # v5.0.
* 7f556a44e61d ("blk-mq: refactor the code of issue request directly") # v5.0.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: James Smart <james.smart@broadcom.com>
Cc: Dongli Zhang <dongli.zhang@oracle.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: <stable@vger.kernel.org>
Reported-by: Laurence Oberman <loberman@redhat.com>
Tested-by: Laurence Oberman <loberman@redhat.com>
Fixes: 7f556a44e61d ("blk-mq: refactor the code of issue request directly") # v5.0.
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c     |   4 +-
 block/blk-mq-sched.c |   8 +--
 block/blk-mq.c       | 122 ++++++++++++++++++++++---------------------
 block/blk-mq.h       |   6 +--
 4 files changed, 71 insertions(+), 69 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 4673ebe422553..a55389ba87796 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1245,8 +1245,6 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
  */
 blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
-	blk_qc_t unused;
-
 	if (blk_cloned_rq_check_limits(q, rq))
 		return BLK_STS_IOERR;
 
@@ -1262,7 +1260,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 	 * bypass a potential scheduler on the bottom device for
 	 * insert.
 	 */
-	return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true);
+	return blk_mq_request_issue_directly(rq, true);
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 40905539afed3..aa6bc5c026438 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -423,10 +423,12 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
 		 * busy in case of 'none' scheduler, and this way may save
 		 * us one extra enqueue & dequeue to sw queue.
 		 */
-		if (!hctx->dispatch_busy && !e && !run_queue_async)
+		if (!hctx->dispatch_busy && !e && !run_queue_async) {
 			blk_mq_try_issue_list_directly(hctx, list);
-		else
-			blk_mq_insert_requests(hctx, ctx, list);
+			if (list_empty(list))
+				return;
+		}
+		blk_mq_insert_requests(hctx, ctx, list);
 	}
 
 	blk_mq_run_hw_queue(hctx, run_queue_async);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ac61bcc67a89e..a9354835cf517 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1801,74 +1801,76 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	return ret;
 }
 
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 						struct request *rq,
 						blk_qc_t *cookie,
-						bool bypass, bool last)
+						bool bypass_insert, bool last)
 {
 	struct request_queue *q = rq->q;
 	bool run_queue = true;
-	blk_status_t ret = BLK_STS_RESOURCE;
-	int srcu_idx;
-	bool force = false;
 
-	hctx_lock(hctx, &srcu_idx);
 	/*
-	 * hctx_lock is needed before checking quiesced flag.
+	 * RCU or SRCU read lock is needed before checking quiesced flag.
 	 *
-	 * When queue is stopped or quiesced, ignore 'bypass', insert
-	 * and return BLK_STS_OK to caller, and avoid driver to try to
-	 * dispatch again.
+	 * When queue is stopped or quiesced, ignore 'bypass_insert' from
+	 * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
+	 * and avoid driver to try to dispatch again.
 	 */
-	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) {
+	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
 		run_queue = false;
-		bypass = false;
-		goto out_unlock;
+		bypass_insert = false;
+		goto insert;
 	}
 
-	if (unlikely(q->elevator && !bypass))
-		goto out_unlock;
+	if (q->elevator && !bypass_insert)
+		goto insert;
 
 	if (!blk_mq_get_dispatch_budget(hctx))
-		goto out_unlock;
+		goto insert;
 
 	if (!blk_mq_get_driver_tag(rq)) {
 		blk_mq_put_dispatch_budget(hctx);
-		goto out_unlock;
+		goto insert;
 	}
 
-	/*
-	 * Always add a request that has been through
-	 *.queue_rq() to the hardware dispatch list.
-	 */
-	force = true;
-	ret = __blk_mq_issue_directly(hctx, rq, cookie, last);
-out_unlock:
+	return __blk_mq_issue_directly(hctx, rq, cookie, last);
+insert:
+	if (bypass_insert)
+		return BLK_STS_RESOURCE;
+
+	blk_mq_request_bypass_insert(rq, run_queue);
+	return BLK_STS_OK;
+}
+
+static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, blk_qc_t *cookie)
+{
+	blk_status_t ret;
+	int srcu_idx;
+
+	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
+
+	hctx_lock(hctx, &srcu_idx);
+
+	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
+	if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
+		blk_mq_request_bypass_insert(rq, true);
+	else if (ret != BLK_STS_OK)
+		blk_mq_end_request(rq, ret);
+
+	hctx_unlock(hctx, srcu_idx);
+}
+
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
+{
+	blk_status_t ret;
+	int srcu_idx;
+	blk_qc_t unused_cookie;
+	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+	hctx_lock(hctx, &srcu_idx);
+	ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
 	hctx_unlock(hctx, srcu_idx);
-	switch (ret) {
-	case BLK_STS_OK:
-		break;
-	case BLK_STS_DEV_RESOURCE:
-	case BLK_STS_RESOURCE:
-		if (force) {
-			blk_mq_request_bypass_insert(rq, run_queue);
-			/*
-			 * We have to return BLK_STS_OK for the DM
-			 * to avoid livelock. Otherwise, we return
-			 * the real result to indicate whether the
-			 * request is direct-issued successfully.
-			 */
-			ret = bypass ? BLK_STS_OK : ret;
-		} else if (!bypass) {
-			blk_mq_sched_insert_request(rq, false,
-						    run_queue, false);
-		}
-		break;
-	default:
-		if (!bypass)
-			blk_mq_end_request(rq, ret);
-		break;
-	}
 
 	return ret;
 }
@@ -1876,20 +1878,22 @@ out_unlock:
 void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 		struct list_head *list)
 {
-	blk_qc_t unused;
-	blk_status_t ret = BLK_STS_OK;
-
 	while (!list_empty(list)) {
+		blk_status_t ret;
 		struct request *rq = list_first_entry(list, struct request,
 				queuelist);
 
 		list_del_init(&rq->queuelist);
-		if (ret == BLK_STS_OK)
-			ret = blk_mq_try_issue_directly(hctx, rq, &unused,
-							false,
+		ret = blk_mq_request_issue_directly(rq, list_empty(list));
+		if (ret != BLK_STS_OK) {
+			if (ret == BLK_STS_RESOURCE ||
+					ret == BLK_STS_DEV_RESOURCE) {
+				blk_mq_request_bypass_insert(rq,
 							list_empty(list));
-		else
-			blk_mq_sched_insert_request(rq, false, true, false);
+				break;
+			}
+			blk_mq_end_request(rq, ret);
+		}
 	}
 
 	/*
@@ -1897,7 +1901,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 	 * the driver there was more coming, but that turned out to
 	 * be a lie.
 	 */
-	if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs)
+	if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs)
 		hctx->queue->mq_ops->commit_rqs(hctx);
 }
 
@@ -2012,13 +2016,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 			data.hctx = same_queue_rq->mq_hctx;
 			trace_block_unplug(q, 1, true);
 			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
-					&cookie, false, true);
+					&cookie);
 		}
 	} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
 			!data.hctx->dispatch_busy)) {
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true);
+		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
 	} else {
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d704fc7766f45..423ea88ab6fba 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -70,10 +70,8 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 				struct list_head *list);
 
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
-						struct request *rq,
-						blk_qc_t *cookie,
-						bool bypass, bool last);
+/* Used by blk_insert_cloned_request() to issue request directly */
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last);
 void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 				    struct list_head *list);
 
-- 
GitLab


From 325aa19598e410672175ed50982f902d4e3f31c5 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 25 Mar 2019 11:10:26 -0700
Subject: [PATCH 0678/1861] genirq: Respect IRQCHIP_SKIP_SET_WAKE in
 irq_chip_set_wake_parent()

If a child irqchip calls irq_chip_set_wake_parent() but its parent irqchip
has the IRQCHIP_SKIP_SET_WAKE flag set an error is returned.

This is inconsistent behaviour vs. set_irq_wake_real() which returns 0 when
the irqchip has the IRQCHIP_SKIP_SET_WAKE flag set. It doesn't attempt to
walk the chain of parents and set irq wake on any chips that don't have the
flag set either. If the intent is to call the .irq_set_wake() callback of
the parent irqchip, then we expect irqchip implementations to omit the
IRQCHIP_SKIP_SET_WAKE flag and implement an .irq_set_wake() function that
calls irq_chip_set_wake_parent().

The problem has been observed on a Qualcomm sdm845 device where set wake
fails on any GPIO interrupts after applying work in progress wakeup irq
patches to the GPIO driver. The chain of chips looks like this:

     QCOM GPIO -> QCOM PDC (SKIP) -> ARM GIC (SKIP)

The GPIO controllers parent is the QCOM PDC irqchip which in turn has ARM
GIC as parent.  The QCOM PDC irqchip has the IRQCHIP_SKIP_SET_WAKE flag
set, and so does the grandparent ARM GIC.

The GPIO driver doesn't know if the parent needs to set wake or not, so it
unconditionally calls irq_chip_set_wake_parent() causing this function to
return a failure because the parent irqchip (PDC) doesn't have the
.irq_set_wake() callback set. Returning 0 instead makes everything work and
irqs from the GPIO controller can be configured for wakeup.

Make it consistent by returning 0 (success) from irq_chip_set_wake_parent()
when a parent chip has IRQCHIP_SKIP_SET_WAKE set.

[ tglx: Massaged changelog ]

Fixes: 08b55e2a9208e ("genirq: Add irqchip_set_wake_parent")
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-gpio@vger.kernel.org
Cc: Lina Iyer <ilina@codeaurora.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20190325181026.247796-1-swboyd@chromium.org
---
 kernel/irq/chip.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3faef4a77f710..51128bea3846c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1449,6 +1449,10 @@ int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info)
 int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
 {
 	data = data->parent_data;
+
+	if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE)
+		return 0;
+
 	if (data->chip->irq_set_wake)
 		return data->chip->irq_set_wake(data, on);
 
-- 
GitLab


From c7084edc3f6d67750f50d4183134c4fb5712a5c8 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 5 Apr 2019 15:39:26 +0200
Subject: [PATCH 0679/1861] tty: mark Siemens R3964 line discipline as BROKEN

The n_r3964 line discipline driver was written in a different time, when
SMP machines were rare, and users were trusted to do the right thing.
Since then, the world has moved on but not this code, it has stayed
rooted in the past with its lovely hand-crafted list structures and
loads of "interesting" race conditions all over the place.

After attempting to clean up most of the issues, I just gave up and am
now marking the driver as BROKEN so that hopefully someone who has this
hardware will show up out of the woodwork (I know you are out there!)
and will help with debugging a raft of changes that I had laying around
for the code, but was too afraid to commit as odds are they would break
things.

Many thanks to Jann and Linus for pointing out the initial problems in
this codebase, as well as many reviews of my attempts to fix the issues.
It was a case of whack-a-mole, and as you can see, the mole won.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 72866a004f075..466ebd84ad177 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -348,7 +348,7 @@ config XILINX_HWICAP
 
 config R3964
 	tristate "Siemens R3964 line discipline"
-	depends on TTY
+	depends on TTY && BROKEN
 	---help---
 	  This driver allows synchronous communication with devices using the
 	  Siemens R3964 packet protocol. Unless you are dealing with special
-- 
GitLab


From ede885ecb2cdf8a8dd5367702e3d964ec846a2d5 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 19 Mar 2019 15:19:56 -0700
Subject: [PATCH 0680/1861] kvm: svm: fix potential get_num_contig_pages
 overflow

get_num_contig_pages() could potentially overflow int so make its type
consistent with its usage.

Reported-by: Cfir Cohen <cfir@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 426039285fd1f..947c35a1e5459 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6422,11 +6422,11 @@ e_free:
 	return ret;
 }
 
-static int get_num_contig_pages(int idx, struct page **inpages,
-				unsigned long npages)
+static unsigned long get_num_contig_pages(unsigned long idx,
+				struct page **inpages, unsigned long npages)
 {
 	unsigned long paddr, next_paddr;
-	int i = idx + 1, pages = 1;
+	unsigned long i = idx + 1, pages = 1;
 
 	/* find the number of contiguous pages starting from idx */
 	paddr = __sme_page_pa(inpages[idx]);
@@ -6445,12 +6445,12 @@ static int get_num_contig_pages(int idx, struct page **inpages,
 
 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
-	unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+	unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct kvm_sev_launch_update_data params;
 	struct sev_data_launch_update_data *data;
 	struct page **inpages;
-	int i, ret, pages;
+	int ret;
 
 	if (!sev_guest(kvm))
 		return -ENOTTY;
-- 
GitLab


From b86bc2858b389255cd44555ce4b1e427b2b770c0 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 25 Mar 2019 11:47:31 -0700
Subject: [PATCH 0681/1861] KVM: SVM: prevent DBG_DECRYPT and DBG_ENCRYPT
 overflow

This ensures that the address and length provided to DBG_DECRYPT and
DBG_ENCRYPT do not cause an overflow.

At the same time, pass the actual number of pages pinned in memory to
sev_unpin_memory() as a cleanup.

Reported-by: Cfir Cohen <cfir@google.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 947c35a1e5459..e0a791c3d4fcc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6799,7 +6799,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 	struct page **src_p, **dst_p;
 	struct kvm_sev_dbg debug;
 	unsigned long n;
-	int ret, size;
+	unsigned int size;
+	int ret;
 
 	if (!sev_guest(kvm))
 		return -ENOTTY;
@@ -6807,6 +6808,11 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 	if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
 		return -EFAULT;
 
+	if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
+		return -EINVAL;
+	if (!debug.dst_uaddr)
+		return -EINVAL;
+
 	vaddr = debug.src_uaddr;
 	size = debug.len;
 	vaddr_end = vaddr + size;
@@ -6857,8 +6863,8 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 						     dst_vaddr,
 						     len, &argp->error);
 
-		sev_unpin_memory(kvm, src_p, 1);
-		sev_unpin_memory(kvm, dst_p, 1);
+		sev_unpin_memory(kvm, src_p, n);
+		sev_unpin_memory(kvm, dst_p, n);
 
 		if (ret)
 			goto err;
-- 
GitLab


From acff78477b9b4f26ecdf65733a4ed77fe837e9dc Mon Sep 17 00:00:00 2001
From: Marc Orr <marcorr@google.com>
Date: Mon, 1 Apr 2019 23:55:59 -0700
Subject: [PATCH 0682/1861] KVM: x86: nVMX: close leak of L0's x2APIC MSRs
 (CVE-2019-3887)

The nested_vmx_prepare_msr_bitmap() function doesn't directly guard the
x2APIC MSR intercepts with the "virtualize x2APIC mode" MSR. As a
result, we discovered the potential for a buggy or malicious L1 to get
access to L0's x2APIC MSRs, via an L2, as follows.

1. L1 executes WRMSR(IA32_SPEC_CTRL, 1). This causes the spec_ctrl
variable, in nested_vmx_prepare_msr_bitmap() to become true.
2. L1 disables "virtualize x2APIC mode" in VMCS12.
3. L1 enables "APIC-register virtualization" in VMCS12.

Now, KVM will set VMCS02's x2APIC MSR intercepts from VMCS12, and then
set "virtualize x2APIC mode" to 0 in VMCS02. Oops.

This patch closes the leak by explicitly guarding VMCS02's x2APIC MSR
intercepts with VMCS12's "virtualize x2APIC mode" control.

The scenario outlined above and fix prescribed here, were verified with
a related patch in kvm-unit-tests titled "Add leak scenario to
virt_x2apic_mode_test".

Note, it looks like this issue may have been introduced inadvertently
during a merge---see 15303ba5d1cd.

Signed-off-by: Marc Orr <marcorr@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 72 ++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 153e539c29c92..897d70e3d291d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -500,6 +500,17 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
 	}
 }
 
+static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
+	int msr;
+
+	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+		unsigned word = msr / BITS_PER_LONG;
+
+		msr_bitmap[word] = ~0;
+		msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+	}
+}
+
 /*
  * Merge L0's and L1's MSR bitmap, return false to indicate that
  * we do not use the hardware.
@@ -541,39 +552,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 		return false;
 
 	msr_bitmap_l1 = (unsigned long *)kmap(page);
-	if (nested_cpu_has_apic_reg_virt(vmcs12)) {
-		/*
-		 * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
-		 * just lets the processor take the value from the virtual-APIC page;
-		 * take those 256 bits directly from the L1 bitmap.
-		 */
-		for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
-			unsigned word = msr / BITS_PER_LONG;
-			msr_bitmap_l0[word] = msr_bitmap_l1[word];
-			msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
-		}
-	} else {
-		for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
-			unsigned word = msr / BITS_PER_LONG;
-			msr_bitmap_l0[word] = ~0;
-			msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
-		}
-	}
 
-	nested_vmx_disable_intercept_for_msr(
-		msr_bitmap_l1, msr_bitmap_l0,
-		X2APIC_MSR(APIC_TASKPRI),
-		MSR_TYPE_W);
+	/*
+	 * To keep the control flow simple, pay eight 8-byte writes (sixteen
+	 * 4-byte writes on 32-bit systems) up front to enable intercepts for
+	 * the x2APIC MSR range and selectively disable them below.
+	 */
+	enable_x2apic_msr_intercepts(msr_bitmap_l0);
+
+	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
+		if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+			/*
+			 * L0 need not intercept reads for MSRs between 0x800
+			 * and 0x8ff, it just lets the processor take the value
+			 * from the virtual-APIC page; take those 256 bits
+			 * directly from the L1 bitmap.
+			 */
+			for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+				unsigned word = msr / BITS_PER_LONG;
+
+				msr_bitmap_l0[word] = msr_bitmap_l1[word];
+			}
+		}
 
-	if (nested_cpu_has_vid(vmcs12)) {
-		nested_vmx_disable_intercept_for_msr(
-			msr_bitmap_l1, msr_bitmap_l0,
-			X2APIC_MSR(APIC_EOI),
-			MSR_TYPE_W);
 		nested_vmx_disable_intercept_for_msr(
 			msr_bitmap_l1, msr_bitmap_l0,
-			X2APIC_MSR(APIC_SELF_IPI),
+			X2APIC_MSR(APIC_TASKPRI),
 			MSR_TYPE_W);
+
+		if (nested_cpu_has_vid(vmcs12)) {
+			nested_vmx_disable_intercept_for_msr(
+				msr_bitmap_l1, msr_bitmap_l0,
+				X2APIC_MSR(APIC_EOI),
+				MSR_TYPE_W);
+			nested_vmx_disable_intercept_for_msr(
+				msr_bitmap_l1, msr_bitmap_l0,
+				X2APIC_MSR(APIC_SELF_IPI),
+				MSR_TYPE_W);
+		}
 	}
 
 	if (spec_ctrl)
-- 
GitLab


From c73f4c998e1fd4249b9edfa39e23f4fda2b9b041 Mon Sep 17 00:00:00 2001
From: Marc Orr <marcorr@google.com>
Date: Mon, 1 Apr 2019 23:56:00 -0700
Subject: [PATCH 0683/1861] KVM: x86: nVMX: fix x2APIC VTPR read intercept

Referring to the "VIRTUALIZING MSR-BASED APIC ACCESSES" chapter of the
SDM, when "virtualize x2APIC mode" is 1 and "APIC-register
virtualization" is 0, a RDMSR of 808H should return the VTPR from the
virtual APIC page.

However, for nested, KVM currently fails to disable the read intercept
for this MSR. This means that a RDMSR exit takes precedence over
"virtualize x2APIC mode", and KVM passes through L1's TPR to L2,
instead of sourcing the value from L2's virtual APIC page.

This patch fixes the issue by disabling the read intercept, in VMCS02,
for the VTPR when "APIC-register virtualization" is 0.

The issue described above and fix prescribed here, were verified with
a related patch in kvm-unit-tests titled "Test VMX's virtualize x2APIC
mode w/ nested".

Signed-off-by: Marc Orr <marcorr@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Fixes: c992384bde84f ("KVM: vmx: speed up MSR bitmap merge")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 897d70e3d291d..7ec9bb1dd7231 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -578,7 +578,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 		nested_vmx_disable_intercept_for_msr(
 			msr_bitmap_l1, msr_bitmap_l0,
 			X2APIC_MSR(APIC_TASKPRI),
-			MSR_TYPE_W);
+			MSR_TYPE_R | MSR_TYPE_W);
 
 		if (nested_cpu_has_vid(vmcs12)) {
 			nested_vmx_disable_intercept_for_msr(
-- 
GitLab


From 0925dda5962e9b55e4d38a72eba93858f24bac41 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Fri, 8 Mar 2019 10:56:15 +0800
Subject: [PATCH 0684/1861] x86/mm/KASLR: Use only one PUD entry for real mode
 trampoline

The current code builds identity mapping for the real mode trampoline by
borrowing page tables from the direct mapping section if KASLR is
enabled. It copies present entries of the first PUD table in 4-level paging
mode, or the first P4D table in 5-level paging mode.

However, there's only a very small area under low 1 MB reserved for the
real mode trampoline in reserve_real_mode() so it makes no sense to build
up a really large mapping for it.

Reduce it to one PUD (1GB) entry. This matches the randomization
granularity in 4-level paging mode and allows to change the randomization
granularity in 5-level paging mode from 512GB to 1GB later.

[ tglx: Massaged changelog and comments ]

Signed-off-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: dave.hansen@linux.intel.com
Cc: luto@kernel.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: hpa@zytor.com
Cc: keescook@chromium.org
Cc: thgarnie@google.com
Link: https://lkml.kernel.org/r/20190308025616.21440-2-bhe@redhat.com
---
 arch/x86/mm/kaslr.c | 84 ++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 47 deletions(-)

diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 3f452ffed7e93..97813751340d5 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -147,74 +147,64 @@ void __init kernel_randomize_memory(void)
 
 static void __meminit init_trampoline_pud(void)
 {
-	unsigned long paddr, paddr_next;
+	pud_t *pud_page_tramp, *pud, *pud_tramp;
+	p4d_t *p4d_page_tramp, *p4d, *p4d_tramp;
+	unsigned long paddr, vaddr;
 	pgd_t *pgd;
-	pud_t *pud_page, *pud_page_tramp;
-	int i;
 
 	pud_page_tramp = alloc_low_page();
 
+	/*
+	 * There are two mappings for the low 1MB area, the direct mapping
+	 * and the 1:1 mapping for the real mode trampoline:
+	 *
+	 * Direct mapping: virt_addr = phys_addr + PAGE_OFFSET
+	 * 1:1 mapping:    virt_addr = phys_addr
+	 */
 	paddr = 0;
-	pgd = pgd_offset_k((unsigned long)__va(paddr));
-	pud_page = (pud_t *) pgd_page_vaddr(*pgd);
-
-	for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
-		pud_t *pud, *pud_tramp;
-		unsigned long vaddr = (unsigned long)__va(paddr);
+	vaddr = (unsigned long)__va(paddr);
+	pgd = pgd_offset_k(vaddr);
 
-		pud_tramp = pud_page_tramp + pud_index(paddr);
-		pud = pud_page + pud_index(vaddr);
-		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
-
-		*pud_tramp = *pud;
-	}
+	p4d = p4d_offset(pgd, vaddr);
+	pud = pud_offset(p4d, vaddr);
 
-	set_pgd(&trampoline_pgd_entry,
-		__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
-}
-
-static void __meminit init_trampoline_p4d(void)
-{
-	unsigned long paddr, paddr_next;
-	pgd_t *pgd;
-	p4d_t *p4d_page, *p4d_page_tramp;
-	int i;
+	pud_tramp = pud_page_tramp + pud_index(paddr);
+	*pud_tramp = *pud;
 
-	p4d_page_tramp = alloc_low_page();
-
-	paddr = 0;
-	pgd = pgd_offset_k((unsigned long)__va(paddr));
-	p4d_page = (p4d_t *) pgd_page_vaddr(*pgd);
-
-	for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) {
-		p4d_t *p4d, *p4d_tramp;
-		unsigned long vaddr = (unsigned long)__va(paddr);
+	if (pgtable_l5_enabled()) {
+		p4d_page_tramp = alloc_low_page();
 
 		p4d_tramp = p4d_page_tramp + p4d_index(paddr);
-		p4d = p4d_page + p4d_index(vaddr);
-		paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
 
-		*p4d_tramp = *p4d;
-	}
+		set_p4d(p4d_tramp,
+			__p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
 
-	set_pgd(&trampoline_pgd_entry,
-		__pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+		set_pgd(&trampoline_pgd_entry,
+			__pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+	} else {
+		set_pgd(&trampoline_pgd_entry,
+			__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+	}
 }
 
 /*
- * Create PGD aligned trampoline table to allow real mode initialization
- * of additional CPUs. Consume only 1 low memory page.
+ * The real mode trampoline, which is required for bootstrapping CPUs
+ * occupies only a small area under the low 1MB.  See reserve_real_mode()
+ * for details.
+ *
+ * If KASLR is disabled the first PGD entry of the direct mapping is copied
+ * to map the real mode trampoline.
+ *
+ * If KASLR is enabled, copy only the PUD which covers the low 1MB
+ * area. This limits the randomization granularity to 1GB for both 4-level
+ * and 5-level paging.
  */
 void __meminit init_trampoline(void)
 {
-
 	if (!kaslr_memory_enabled()) {
 		init_trampoline_default();
 		return;
 	}
 
-	if (pgtable_l5_enabled())
-		init_trampoline_p4d();
-	else
-		init_trampoline_pud();
+	init_trampoline_pud();
 }
-- 
GitLab


From b569c18434987163a05f05a12cdf6a9975c55ff3 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Fri, 8 Mar 2019 10:56:16 +0800
Subject: [PATCH 0685/1861] x86/mm/KASLR: Reduce randomization granularity for
 5-level paging to 1GB

The current randomization granularity of 5-level is 512 GB. The mapping of
the real mode trampoline has been reduced to one PUD entry, so there is no
restriction anymore.

Reduce the granularity to 1GB for 5-level paging mode which allows better
randomization.

[ tglx: Massaged changelog ]

Signed-off-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: dave.hansen@linux.intel.com
Cc: luto@kernel.org
Cc: peterz@infradead.org
Cc: bp@alien8.de
Cc: hpa@zytor.com
Cc: keescook@chromium.org
Cc: thgarnie@google.com
Link: https://lkml.kernel.org/r/20190308025616.21440-3-bhe@redhat.com
---
 arch/x86/mm/kaslr.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 97813751340d5..f6ba2791eeb5f 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -125,10 +125,7 @@ void __init kernel_randomize_memory(void)
 		 */
 		entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
 		prandom_bytes_state(&rand_state, &rand, sizeof(rand));
-		if (pgtable_l5_enabled())
-			entropy = (rand % (entropy + 1)) & P4D_MASK;
-		else
-			entropy = (rand % (entropy + 1)) & PUD_MASK;
+		entropy = (rand % (entropy + 1)) & PUD_MASK;
 		vaddr += entropy;
 		*kaslr_regions[i].base = vaddr;
 
@@ -137,10 +134,7 @@ void __init kernel_randomize_memory(void)
 		 * randomization alignment.
 		 */
 		vaddr += get_padding(&kaslr_regions[i]);
-		if (pgtable_l5_enabled())
-			vaddr = round_up(vaddr + 1, P4D_SIZE);
-		else
-			vaddr = round_up(vaddr + 1, PUD_SIZE);
+		vaddr = round_up(vaddr + 1, PUD_SIZE);
 		remain_entropy -= entropy;
 	}
 }
-- 
GitLab


From bfe83844987a52dc1f71f757b60523811502dc93 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 28 Mar 2019 16:13:35 +0100
Subject: [PATCH 0686/1861] genirq/timings: Remove variance computation code

The variance computation did not provide the expected results and will be
replaced with a different approach to compute the next interrupt based on
the array suffixes derived algorithm.

There is no good way to transform the variance code to the new algorithm,
so for ease of review remove the existing code first.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: rjw@rjwysocki.net
Cc: ulf.hansson@linaro.org
Cc: linux-pm@vger.kernel.org
Link: https://lkml.kernel.org/r/20190328151336.5316-1-daniel.lezcano@linaro.org
---
 kernel/irq/timings.c | 252 +------------------------------------------
 1 file changed, 2 insertions(+), 250 deletions(-)

diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 1e4cb63a5c822..3cde046a2bc8e 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -8,7 +8,6 @@
 #include <linux/interrupt.h>
 #include <linux/idr.h>
 #include <linux/irq.h>
-#include <linux/math64.h>
 
 #include <trace/events/irq.h>
 
@@ -19,13 +18,7 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
 DEFINE_PER_CPU(struct irq_timings, irq_timings);
 
 struct irqt_stat {
-	u64	next_evt;
-	u64	last_ts;
-	u64	variance;
-	u32	avg;
-	u32	nr_samples;
-	int	anomalies;
-	int	valid;
+	u64     next_evt;
 };
 
 static DEFINE_IDR(irqt_stats);
@@ -40,184 +33,6 @@ void irq_timings_disable(void)
 	static_branch_disable(&irq_timing_enabled);
 }
 
-/**
- * irqs_update - update the irq timing statistics with a new timestamp
- *
- * @irqs: an irqt_stat struct pointer
- * @ts: the new timestamp
- *
- * The statistics are computed online, in other words, the code is
- * designed to compute the statistics on a stream of values rather
- * than doing multiple passes on the values to compute the average,
- * then the variance. The integer division introduces a loss of
- * precision but with an acceptable error margin regarding the results
- * we would have with the double floating precision: we are dealing
- * with nanosec, so big numbers, consequently the mantisse is
- * negligeable, especially when converting the time in usec
- * afterwards.
- *
- * The computation happens at idle time. When the CPU is not idle, the
- * interrupts' timestamps are stored in the circular buffer, when the
- * CPU goes idle and this routine is called, all the buffer's values
- * are injected in the statistical model continuying to extend the
- * statistics from the previous busy-idle cycle.
- *
- * The observations showed a device will trigger a burst of periodic
- * interrupts followed by one or two peaks of longer time, for
- * instance when a SD card device flushes its cache, then the periodic
- * intervals occur again. A one second inactivity period resets the
- * stats, that gives us the certitude the statistical values won't
- * exceed 1x10^9, thus the computation won't overflow.
- *
- * Basically, the purpose of the algorithm is to watch the periodic
- * interrupts and eliminate the peaks.
- *
- * An interrupt is considered periodically stable if the interval of
- * its occurences follow the normal distribution, thus the values
- * comply with:
- *
- *      avg - 3 x stddev < value < avg + 3 x stddev
- *
- * Which can be simplified to:
- *
- *      -3 x stddev < value - avg < 3 x stddev
- *
- *      abs(value - avg) < 3 x stddev
- *
- * In order to save a costly square root computation, we use the
- * variance. For the record, stddev = sqrt(variance). The equation
- * above becomes:
- *
- *      abs(value - avg) < 3 x sqrt(variance)
- *
- * And finally we square it:
- *
- *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
- *
- *      (value - avg) x (value - avg) < 9 x variance
- *
- * Statistically speaking, any values out of this interval is
- * considered as an anomaly and is discarded. However, a normal
- * distribution appears when the number of samples is 30 (it is the
- * rule of thumb in statistics, cf. "30 samples" on Internet). When
- * there are three consecutive anomalies, the statistics are resetted.
- *
- */
-static void irqs_update(struct irqt_stat *irqs, u64 ts)
-{
-	u64 old_ts = irqs->last_ts;
-	u64 variance = 0;
-	u64 interval;
-	s64 diff;
-
-	/*
-	 * The timestamps are absolute time values, we need to compute
-	 * the timing interval between two interrupts.
-	 */
-	irqs->last_ts = ts;
-
-	/*
-	 * The interval type is u64 in order to deal with the same
-	 * type in our computation, that prevent mindfuck issues with
-	 * overflow, sign and division.
-	 */
-	interval = ts - old_ts;
-
-	/*
-	 * The interrupt triggered more than one second apart, that
-	 * ends the sequence as predictible for our purpose. In this
-	 * case, assume we have the beginning of a sequence and the
-	 * timestamp is the first value. As it is impossible to
-	 * predict anything at this point, return.
-	 *
-	 * Note the first timestamp of the sequence will always fall
-	 * in this test because the old_ts is zero. That is what we
-	 * want as we need another timestamp to compute an interval.
-	 */
-	if (interval >= NSEC_PER_SEC) {
-		memset(irqs, 0, sizeof(*irqs));
-		irqs->last_ts = ts;
-		return;
-	}
-
-	/*
-	 * Pre-compute the delta with the average as the result is
-	 * used several times in this function.
-	 */
-	diff = interval - irqs->avg;
-
-	/*
-	 * Increment the number of samples.
-	 */
-	irqs->nr_samples++;
-
-	/*
-	 * Online variance divided by the number of elements if there
-	 * is more than one sample.  Normally the formula is division
-	 * by nr_samples - 1 but we assume the number of element will be
-	 * more than 32 and dividing by 32 instead of 31 is enough
-	 * precise.
-	 */
-	if (likely(irqs->nr_samples > 1))
-		variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
-
-	/*
-	 * The rule of thumb in statistics for the normal distribution
-	 * is having at least 30 samples in order to have the model to
-	 * apply. Values outside the interval are considered as an
-	 * anomaly.
-	 */
-	if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
-		/*
-		 * After three consecutive anomalies, we reset the
-		 * stats as it is no longer stable enough.
-		 */
-		if (irqs->anomalies++ >= 3) {
-			memset(irqs, 0, sizeof(*irqs));
-			irqs->last_ts = ts;
-			return;
-		}
-	} else {
-		/*
-		 * The anomalies must be consecutives, so at this
-		 * point, we reset the anomalies counter.
-		 */
-		irqs->anomalies = 0;
-	}
-
-	/*
-	 * The interrupt is considered stable enough to try to predict
-	 * the next event on it.
-	 */
-	irqs->valid = 1;
-
-	/*
-	 * Online average algorithm:
-	 *
-	 *  new_average = average + ((value - average) / count)
-	 *
-	 * The variance computation depends on the new average
-	 * to be computed here first.
-	 *
-	 */
-	irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
-
-	/*
-	 * Online variance algorithm:
-	 *
-	 *  new_variance = variance + (value - average) x (value - new_average)
-	 *
-	 * Warning: irqs->avg is updated with the line above, hence
-	 * 'interval - irqs->avg' is no longer equal to 'diff'
-	 */
-	irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
-
-	/*
-	 * Update the next event
-	 */
-	irqs->next_evt = ts + irqs->avg;
-}
-
 /**
  * irq_timings_next_event - Return when the next event is supposed to arrive
  *
@@ -246,12 +61,6 @@ static void irqs_update(struct irqt_stat *irqs, u64 ts)
  */
 u64 irq_timings_next_event(u64 now)
 {
-	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
-	struct irqt_stat *irqs;
-	struct irqt_stat __percpu *s;
-	u64 ts, next_evt = U64_MAX;
-	int i, irq = 0;
-
 	/*
 	 * This function must be called with the local irq disabled in
 	 * order to prevent the timings circular buffer to be updated
@@ -259,64 +68,7 @@ u64 irq_timings_next_event(u64 now)
 	 */
 	lockdep_assert_irqs_disabled();
 
-	/*
-	 * Number of elements in the circular buffer: If it happens it
-	 * was flushed before, then the number of elements could be
-	 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
-	 * otherwise the array size is used as we wrapped. The index
-	 * begins from zero when we did not wrap. That could be done
-	 * in a nicer way with the proper circular array structure
-	 * type but with the cost of extra computation in the
-	 * interrupt handler hot path. We choose efficiency.
-	 *
-	 * Inject measured irq/timestamp to the statistical model
-	 * while decrementing the counter because we consume the data
-	 * from our circular buffer.
-	 */
-	for (i = irqts->count & IRQ_TIMINGS_MASK,
-		     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
-	     irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
-
-		irq = irq_timing_decode(irqts->values[i], &ts);
-
-		s = idr_find(&irqt_stats, irq);
-		if (s) {
-			irqs = this_cpu_ptr(s);
-			irqs_update(irqs, ts);
-		}
-	}
-
-	/*
-	 * Look in the list of interrupts' statistics, the earliest
-	 * next event.
-	 */
-	idr_for_each_entry(&irqt_stats, s, i) {
-
-		irqs = this_cpu_ptr(s);
-
-		if (!irqs->valid)
-			continue;
-
-		if (irqs->next_evt <= now) {
-			irq = i;
-			next_evt = now;
-
-			/*
-			 * This interrupt mustn't use in the future
-			 * until new events occur and update the
-			 * statistics.
-			 */
-			irqs->valid = 0;
-			break;
-		}
-
-		if (irqs->next_evt < next_evt) {
-			irq = i;
-			next_evt = irqs->next_evt;
-		}
-	}
-
-	return next_evt;
+	return 0;
 }
 
 void irq_timings_free(int irq)
-- 
GitLab


From bbba0e7c5cdadb47a91edea1d5cd0caadbbb016f Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 28 Mar 2019 16:13:36 +0100
Subject: [PATCH 0687/1861] genirq/timings: Add array suffix computation code

The previous approach based on the variance was discarding values from
the timings when they were considered as anomalies as stated by the
normal law statistical model.

However in the interrupt life, there can be multiple anomalies due to the
nature of the device generating the interrupts, and most of the time a
repeating pattern can be observed, that is particulary true for network,
console, MMC or SSD devices.

The variance approach missed the patterns and it was only able to deal with
the interrupt coming in regular intervals, thus reducing considerably the
scope of what is predictable.

In order to find out the repeating patterns, the interrupt intervals are
grouped in a ilog2 basis to create a suite of numbers with small
amplitude. Every group contains an exponential moving average of the values
belonging to the group. The array suffix, a data structure used for string
searching, data compression, etc ..., is built from the suite of numbers
and the suffixes are then searched in this suite.

The tests showed the algorithm is able to find all repeating patterns,
as well as regular interval in less than 1us on x86-i7.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: rjw@rjwysocki.net
Cc: ulf.hansson@linaro.org
Cc: linux-pm@vger.kernel.org
Link: https://lkml.kernel.org/r/20190328151336.5316-2-daniel.lezcano@linaro.org
---
 kernel/irq/timings.c | 462 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 457 insertions(+), 5 deletions(-)

diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 3cde046a2bc8e..90c735da15d00 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -8,6 +8,8 @@
 #include <linux/interrupt.h>
 #include <linux/idr.h>
 #include <linux/irq.h>
+#include <linux/math64.h>
+#include <linux/log2.h>
 
 #include <trace/events/irq.h>
 
@@ -17,10 +19,6 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
 
 DEFINE_PER_CPU(struct irq_timings, irq_timings);
 
-struct irqt_stat {
-	u64     next_evt;
-};
-
 static DEFINE_IDR(irqt_stats);
 
 void irq_timings_enable(void)
@@ -33,6 +31,410 @@ void irq_timings_disable(void)
 	static_branch_disable(&irq_timing_enabled);
 }
 
+/*
+ * The main goal of this algorithm is to predict the next interrupt
+ * occurrence on the current CPU.
+ *
+ * Currently, the interrupt timings are stored in a circular array
+ * buffer every time there is an interrupt, as a tuple: the interrupt
+ * number and the associated timestamp when the event occurred <irq,
+ * timestamp>.
+ *
+ * For every interrupt occurring in a short period of time, we can
+ * measure the elapsed time between the occurrences for the same
+ * interrupt and we end up with a suite of intervals. The experience
+ * showed the interrupts are often coming following a periodic
+ * pattern.
+ *
+ * The objective of the algorithm is to find out this periodic pattern
+ * in a fastest way and use its period to predict the next irq event.
+ *
+ * When the next interrupt event is requested, we are in the situation
+ * where the interrupts are disabled and the circular buffer
+ * containing the timings is filled with the events which happened
+ * after the previous next-interrupt-event request.
+ *
+ * At this point, we read the circular buffer and we fill the irq
+ * related statistics structure. After this step, the circular array
+ * containing the timings is empty because all the values are
+ * dispatched in their corresponding buffers.
+ *
+ * Now for each interrupt, we can predict the next event by using the
+ * suffix array, log interval and exponential moving average
+ *
+ * 1. Suffix array
+ *
+ * Suffix array is an array of all the suffixes of a string. It is
+ * widely used as a data structure for compression, text search, ...
+ * For instance for the word 'banana', the suffixes will be: 'banana'
+ * 'anana' 'nana' 'ana' 'na' 'a'
+ *
+ * Usually, the suffix array is sorted but for our purpose it is
+ * not necessary and won't provide any improvement in the context of
+ * the solved problem where we clearly define the boundaries of the
+ * search by a max period and min period.
+ *
+ * The suffix array will build a suite of intervals of different
+ * length and will look for the repetition of each suite. If the suite
+ * is repeating then we have the period because it is the length of
+ * the suite whatever its position in the buffer.
+ *
+ * 2. Log interval
+ *
+ * We saw the irq timings allow to compute the interval of the
+ * occurrences for a specific interrupt. We can reasonibly assume the
+ * longer is the interval, the higher is the error for the next event
+ * and we can consider storing those interval values into an array
+ * where each slot in the array correspond to an interval at the power
+ * of 2 of the index. For example, index 12 will contain values
+ * between 2^11 and 2^12.
+ *
+ * At the end we have an array of values where at each index defines a
+ * [2^index - 1, 2 ^ index] interval values allowing to store a large
+ * number of values inside a small array.
+ *
+ * For example, if we have the value 1123, then we store it at
+ * ilog2(1123) = 10 index value.
+ *
+ * Storing those value at the specific index is done by computing an
+ * exponential moving average for this specific slot. For instance,
+ * for values 1800, 1123, 1453, ... fall under the same slot (10) and
+ * the exponential moving average is computed every time a new value
+ * is stored at this slot.
+ *
+ * 3. Exponential Moving Average
+ *
+ * The EMA is largely used to track a signal for stocks or as a low
+ * pass filter. The magic of the formula, is it is very simple and the
+ * reactivity of the average can be tuned with the factors called
+ * alpha.
+ *
+ * The higher the alphas are, the faster the average respond to the
+ * signal change. In our case, if a slot in the array is a big
+ * interval, we can have numbers with a big difference between
+ * them. The impact of those differences in the average computation
+ * can be tuned by changing the alpha value.
+ *
+ *
+ *  -- The algorithm --
+ *
+ * We saw the different processing above, now let's see how they are
+ * used together.
+ *
+ * For each interrupt:
+ *	For each interval:
+ *		Compute the index = ilog2(interval)
+ *		Compute a new_ema(buffer[index], interval)
+ *		Store the index in a circular buffer
+ *
+ *	Compute the suffix array of the indexes
+ *
+ *	For each suffix:
+ *		If the suffix is reverse-found 3 times
+ *			Return suffix
+ *
+ *	Return Not found
+ *
+ * However we can not have endless suffix array to be build, it won't
+ * make sense and it will add an extra overhead, so we can restrict
+ * this to a maximum suffix length of 5 and a minimum suffix length of
+ * 2. The experience showed 5 is the majority of the maximum pattern
+ * period found for different devices.
+ *
+ * The result is a pattern finding less than 1us for an interrupt.
+ *
+ * Example based on real values:
+ *
+ * Example 1 : MMC write/read interrupt interval:
+ *
+ *	223947, 1240, 1384, 1386, 1386,
+ *	217416, 1236, 1384, 1386, 1387,
+ *	214719, 1241, 1386, 1387, 1384,
+ *	213696, 1234, 1384, 1386, 1388,
+ *	219904, 1240, 1385, 1389, 1385,
+ *	212240, 1240, 1386, 1386, 1386,
+ *	214415, 1236, 1384, 1386, 1387,
+ *	214276, 1234, 1384, 1388, ?
+ *
+ * For each element, apply ilog2(value)
+ *
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, ?
+ *
+ * Max period of 5, we take the last (max_period * 3) 15 elements as
+ * we can be confident if the pattern repeats itself three times it is
+ * a repeating pattern.
+ *
+ *	             8,
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, 8,
+ *	15, 8, 8, 8, ?
+ *
+ * Suffixes are:
+ *
+ *  1) 8, 15, 8, 8, 8  <- max period
+ *  2) 8, 15, 8, 8
+ *  3) 8, 15, 8
+ *  4) 8, 15           <- min period
+ *
+ * From there we search the repeating pattern for each suffix.
+ *
+ * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8
+ *         |   |  |  |  |  |   |  |  |  |  |   |  |  |  |
+ *         8, 15, 8, 8, 8  |   |  |  |  |  |   |  |  |  |
+ *                         8, 15, 8, 8, 8  |   |  |  |  |
+ *                                         8, 15, 8, 8, 8
+ *
+ * When moving the suffix, we found exactly 3 matches.
+ *
+ * The first suffix with period 5 is repeating.
+ *
+ * The next event is (3 * max_period) % suffix_period
+ *
+ * In this example, the result 0, so the next event is suffix[0] => 8
+ *
+ * However, 8 is the index in the array of exponential moving average
+ * which was calculated on the fly when storing the values, so the
+ * interval is ema[8] = 1366
+ *
+ *
+ * Example 2:
+ *
+ *	4, 3, 5, 100,
+ *	3, 3, 5, 117,
+ *	4, 4, 5, 112,
+ *	4, 3, 4, 110,
+ *	3, 5, 3, 117,
+ *	4, 4, 5, 112,
+ *	4, 3, 4, 110,
+ *	3, 4, 5, 112,
+ *	4, 3, 4, 110
+ *
+ * ilog2
+ *
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4
+ *
+ * Max period 5:
+ *	   0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4,
+ *	0, 0, 0, 4
+ *
+ * Suffixes:
+ *
+ *  1) 0, 0, 4, 0, 0
+ *  2) 0, 0, 4, 0
+ *  3) 0, 0, 4
+ *  4) 0, 0
+ *
+ * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
+ *         |  |  |  |  |  |  X
+ *         0, 0, 4, 0, 0, |  X
+ *                        0, 0
+ *
+ * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
+ *         |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
+ *         0, 0, 4, 0, |  |  |  |  |  |  |  |  |  |  |
+ *                     0, 0, 4, 0, |  |  |  |  |  |  |
+ *                                 0, 0, 4, 0, |  |  |
+ *                                             0  0  4
+ *
+ * Pattern is found 3 times, the remaining is 1 which results from
+ * (max_period * 3) % suffix_period. This value is the index in the
+ * suffix arrays. The suffix array for a period 4 has the value 4
+ * at index 1.
+ */
+#define EMA_ALPHA_VAL		64
+#define EMA_ALPHA_SHIFT		7
+
+#define PREDICTION_PERIOD_MIN	2
+#define PREDICTION_PERIOD_MAX	5
+#define PREDICTION_FACTOR	4
+#define PREDICTION_MAX		10 /* 2 ^ PREDICTION_MAX useconds */
+#define PREDICTION_BUFFER_SIZE	16 /* slots for EMAs, hardly more than 16 */
+
+struct irqt_stat {
+	u64	last_ts;
+	u64	ema_time[PREDICTION_BUFFER_SIZE];
+	int	timings[IRQ_TIMINGS_SIZE];
+	int	circ_timings[IRQ_TIMINGS_SIZE];
+	int	count;
+};
+
+/*
+ * Exponential moving average computation
+ */
+static u64 irq_timings_ema_new(u64 value, u64 ema_old)
+{
+	s64 diff;
+
+	if (unlikely(!ema_old))
+		return value;
+
+	diff = (value - ema_old) * EMA_ALPHA_VAL;
+	/*
+	 * We can use a s64 type variable to be added with the u64
+	 * ema_old variable as this one will never have its topmost
+	 * bit set, it will be always smaller than 2^63 nanosec
+	 * interrupt interval (292 years).
+	 */
+	return ema_old + (diff >> EMA_ALPHA_SHIFT);
+}
+
+static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
+{
+	int i;
+
+	/*
+	 * The buffer contains the suite of intervals, in a ilog2
+	 * basis, we are looking for a repetition. We point the
+	 * beginning of the search three times the length of the
+	 * period beginning at the end of the buffer. We do that for
+	 * each suffix.
+	 */
+	for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) {
+
+		int *begin = &buffer[len - (i * 3)];
+		int *ptr = begin;
+
+		/*
+		 * We look if the suite with period 'i' repeat
+		 * itself. If it is truncated at the end, as it
+		 * repeats we can use the period to find out the next
+		 * element.
+		 */
+		while (!memcmp(ptr, begin, i * sizeof(*ptr))) {
+			ptr += i;
+			if (ptr >= &buffer[len])
+				return begin[((i * 3) % i)];
+		}
+	}
+
+	return -1;
+}
+
+static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
+{
+	int index, i, period_max, count, start, min = INT_MAX;
+
+	if ((now - irqs->last_ts) >= NSEC_PER_SEC) {
+		irqs->count = irqs->last_ts = 0;
+		return U64_MAX;
+	}
+
+	/*
+	 * As we want to find three times the repetition, we need a
+	 * number of intervals greater or equal to three times the
+	 * maximum period, otherwise we truncate the max period.
+	 */
+	period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?
+		PREDICTION_PERIOD_MAX : irqs->count / 3;
+
+	/*
+	 * If we don't have enough irq timings for this prediction,
+	 * just bail out.
+	 */
+	if (period_max <= PREDICTION_PERIOD_MIN)
+		return U64_MAX;
+
+	/*
+	 * 'count' will depends if the circular buffer wrapped or not
+	 */
+	count = irqs->count < IRQ_TIMINGS_SIZE ?
+		irqs->count : IRQ_TIMINGS_SIZE;
+
+	start = irqs->count < IRQ_TIMINGS_SIZE ?
+		0 : (irqs->count & IRQ_TIMINGS_MASK);
+
+	/*
+	 * Copy the content of the circular buffer into another buffer
+	 * in order to linearize the buffer instead of dealing with
+	 * wrapping indexes and shifted array which will be prone to
+	 * error and extremelly difficult to debug.
+	 */
+	for (i = 0; i < count; i++) {
+		int index = (start + i) & IRQ_TIMINGS_MASK;
+
+		irqs->timings[i] = irqs->circ_timings[index];
+		min = min_t(int, irqs->timings[i], min);
+	}
+
+	index = irq_timings_next_event_index(irqs->timings, count, period_max);
+	if (index < 0)
+		return irqs->last_ts + irqs->ema_time[min];
+
+	return irqs->last_ts + irqs->ema_time[index];
+}
+
+static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
+{
+	u64 old_ts = irqs->last_ts;
+	u64 interval;
+	int index;
+
+	/*
+	 * The timestamps are absolute time values, we need to compute
+	 * the timing interval between two interrupts.
+	 */
+	irqs->last_ts = ts;
+
+	/*
+	 * The interval type is u64 in order to deal with the same
+	 * type in our computation, that prevent mindfuck issues with
+	 * overflow, sign and division.
+	 */
+	interval = ts - old_ts;
+
+	/*
+	 * The interrupt triggered more than one second apart, that
+	 * ends the sequence as predictible for our purpose. In this
+	 * case, assume we have the beginning of a sequence and the
+	 * timestamp is the first value. As it is impossible to
+	 * predict anything at this point, return.
+	 *
+	 * Note the first timestamp of the sequence will always fall
+	 * in this test because the old_ts is zero. That is what we
+	 * want as we need another timestamp to compute an interval.
+	 */
+	if (interval >= NSEC_PER_SEC) {
+		irqs->count = 0;
+		return;
+	}
+
+	/*
+	 * Get the index in the ema table for this interrupt. The
+	 * PREDICTION_FACTOR increase the interval size for the array
+	 * of exponential average.
+	 */
+	index = likely(interval) ?
+		ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
+
+	/*
+	 * Store the index as an element of the pattern in another
+	 * circular array.
+	 */
+	irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
+
+	irqs->ema_time[index] = irq_timings_ema_new(interval,
+						    irqs->ema_time[index]);
+
+	irqs->count++;
+}
+
 /**
  * irq_timings_next_event - Return when the next event is supposed to arrive
  *
@@ -61,6 +463,12 @@ void irq_timings_disable(void)
  */
 u64 irq_timings_next_event(u64 now)
 {
+	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
+	struct irqt_stat *irqs;
+	struct irqt_stat __percpu *s;
+	u64 ts, next_evt = U64_MAX;
+	int i, irq = 0;
+
 	/*
 	 * This function must be called with the local irq disabled in
 	 * order to prevent the timings circular buffer to be updated
@@ -68,7 +476,51 @@ u64 irq_timings_next_event(u64 now)
 	 */
 	lockdep_assert_irqs_disabled();
 
-	return 0;
+	if (!irqts->count)
+		return next_evt;
+
+	/*
+	 * Number of elements in the circular buffer: If it happens it
+	 * was flushed before, then the number of elements could be
+	 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
+	 * otherwise the array size is used as we wrapped. The index
+	 * begins from zero when we did not wrap. That could be done
+	 * in a nicer way with the proper circular array structure
+	 * type but with the cost of extra computation in the
+	 * interrupt handler hot path. We choose efficiency.
+	 *
+	 * Inject measured irq/timestamp to the pattern prediction
+	 * model while decrementing the counter because we consume the
+	 * data from our circular buffer.
+	 */
+
+	i = (irqts->count & IRQ_TIMINGS_MASK) - 1;
+	irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
+
+	for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+		irq = irq_timing_decode(irqts->values[i], &ts);
+		s = idr_find(&irqt_stats, irq);
+		if (s)
+			irq_timings_store(irq, this_cpu_ptr(s), ts);
+	}
+
+	/*
+	 * Look in the list of interrupts' statistics, the earliest
+	 * next event.
+	 */
+	idr_for_each_entry(&irqt_stats, s, i) {
+
+		irqs = this_cpu_ptr(s);
+
+		ts = __irq_timings_next_event(irqs, i, now);
+		if (ts <= now)
+			return now;
+
+		if (ts < next_evt)
+			next_evt = ts;
+	}
+
+	return next_evt;
 }
 
 void irq_timings_free(int irq)
-- 
GitLab


From 9cde402a59770a0669d895399c13407f63d7d209 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 5 Apr 2019 16:20:47 +0100
Subject: [PATCH 0688/1861] PCI: Add function 1 DMA alias quirk for Marvell
 9170 SATA controller

There is a Marvell 88SE9170 PCIe SATA controller I found on a board here.
Some quick testing with the ARM SMMU enabled reveals that it suffers from
the same requester ID mixup problems as the other Marvell chips listed
already.

Add the PCI vendor/device ID to the list of chips which need the
workaround.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org
---
 drivers/pci/quirks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index a59ad09ce911d..a077f67fe1dac 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3877,6 +3877,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128,
 /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130,
 			 quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9170,
+			 quirk_dma_func1_alias);
 /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172,
 			 quirk_dma_func1_alias);
-- 
GitLab


From 4ed319c6ac08e9a28fca7ac188181ac122f4de84 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 5 Apr 2019 15:26:39 -0400
Subject: [PATCH 0689/1861] dm integrity: fix deadlock with overlapping I/O

dm-integrity will deadlock if overlapping I/O is issued to it, the bug
was introduced by commit 724376a04d1a ("dm integrity: implement fair
range locks").  Users rarely use overlapping I/O so this bug went
undetected until now.

Fix this bug by correcting, likely cut-n-paste, typos in
ranges_overlap() and also remove a flawed ranges_overlap() check in
remove_range_unlocked().  This condition could leave unprocessed bios
hanging on wait_list forever.

Cc: stable@vger.kernel.org # v4.19+
Fixes: 724376a04d1a ("dm integrity: implement fair range locks")
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-integrity.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 94b865c5ce715..7c678f50aaa37 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -913,7 +913,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig
 static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
 {
 	return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
-	       range2->logical_sector + range2->n_sectors > range2->logical_sector;
+	       range1->logical_sector + range1->n_sectors > range2->logical_sector;
 }
 
 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
@@ -959,8 +959,6 @@ static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity
 		struct dm_integrity_range *last_range =
 			list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
 		struct task_struct *last_range_task;
-		if (!ranges_overlap(range, last_range))
-			break;
 		last_range_task = last_range->task;
 		list_del(&last_range->wait_entry);
 		if (!add_new_range(ic, last_range, false)) {
-- 
GitLab


From d58431eacb226222430940134d97bfd72f292fcd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 5 Apr 2019 11:34:40 +1100
Subject: [PATCH 0690/1861] sunrpc: don't mark uninitialised items as VALID.

A recent commit added a call to cache_fresh_locked()
when an expired item was found.
The call sets the CACHE_VALID flag, so it is important
that the item actually is valid.
There are two ways it could be valid:
1/ If ->update has been called to fill in relevant content
2/ if CACHE_NEGATIVE is set, to say that content doesn't exist.

An expired item that is waiting for an update will be neither.
Setting CACHE_VALID will mean that a subsequent call to cache_put()
will be likely to dereference uninitialised pointers.

So we must make sure the item is valid, and we already have code to do
that in try_to_negate_entry().  This takes the hash lock and so cannot
be used directly, so take out the two lines that we need and use them.

Now cache_fresh_locked() is certain to be called only on
a valid item.

Cc: stable@kernel.org # 2.6.35
Fixes: 4ecd55ea0742 ("sunrpc: fix cache_head leak due to queued request")
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 12bb23b8e0c50..261131dfa1f1b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
 	h->last_refresh = now;
 }
 
+static inline int cache_is_valid(struct cache_head *h);
 static void cache_fresh_locked(struct cache_head *head, time_t expiry,
 				struct cache_detail *detail);
 static void cache_fresh_unlocked(struct cache_head *head,
@@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 			if (cache_is_expired(detail, tmp)) {
 				hlist_del_init_rcu(&tmp->cache_list);
 				detail->entries --;
+				if (cache_is_valid(tmp) == -EAGAIN)
+					set_bit(CACHE_NEGATIVE, &tmp->flags);
 				cache_fresh_locked(tmp, 0, detail);
 				freeme = tmp;
 				break;
-- 
GitLab


From 3c86794ac0e6582eea7733619d58ea150198502f Mon Sep 17 00:00:00 2001
From: Murphy Zhou <jencce.kernel@gmail.com>
Date: Thu, 4 Apr 2019 14:57:11 +0800
Subject: [PATCH 0691/1861] nfsd/nfsd3_proc_readdir: fix buffer count and page
 pointers

After this commit
  f875a79 nfsd: allow nfsv3 readdir request to be larger.
nfsv3 readdir request size can be larger than PAGE_SIZE. So if the
directory been read is large enough, we can use multiple pages
in rq_respages. Update buffer count and page pointers like we do
in readdirplus to make this happen.

Now listing a directory within 3000 files will panic because we
are counting in a wrong way and would write on random page.

Fixes: f875a79 "nfsd: allow nfsv3 readdir request to be larger"
Signed-off-by: Murphy Zhou <jencce.kernel@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3proc.c | 17 +++++++++++++++--
 fs/nfsd/nfs3xdr.c  | 11 +++++++++--
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 8f933e84cec18..9bc32af4e2daf 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -442,7 +442,9 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
 	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
 	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32		nfserr;
-	int		count;
+	int		count = 0;
+	struct page	**p;
+	caddr_t		page_addr = NULL;
 
 	dprintk("nfsd: READDIR(3)  %s %d bytes at %d\n",
 				SVCFH_fmt(&argp->fh),
@@ -462,7 +464,18 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
 	nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie, 
 					&resp->common, nfs3svc_encode_entry);
 	memcpy(resp->verf, argp->verf, 8);
-	resp->count = resp->buffer - argp->buffer;
+	count = 0;
+	for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+		page_addr = page_address(*p);
+
+		if (((caddr_t)resp->buffer >= page_addr) &&
+		    ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+			count += (caddr_t)resp->buffer - page_addr;
+			break;
+		}
+		count += PAGE_SIZE;
+	}
+	resp->count = count >> 2;
 	if (resp->offset) {
 		loff_t offset = argp->cookie;
 
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 93fea246f676e..8d789124ed3c1 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -573,6 +573,7 @@ int
 nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_readdirargs *args = rqstp->rq_argp;
+	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	p = decode_fh(p, &args->fh);
@@ -582,8 +583,14 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 	args->verf   = p; p += 2;
 	args->dircount = ~0;
 	args->count  = ntohl(*p++);
-	args->count  = min_t(u32, args->count, max_blocksize);
-	args->buffer = page_address(*(rqstp->rq_next_page++));
+	len = args->count  = min_t(u32, args->count, max_blocksize);
+
+	while (len > 0) {
+		struct page *p = *(rqstp->rq_next_page++);
+		if (!args->buffer)
+			args->buffer = page_address(p);
+		len -= PAGE_SIZE;
+	}
 
 	return xdr_argsize_check(rqstp, p);
 }
-- 
GitLab


From 5f074f3e192f10c9fade898b9b3b8812e3d83342 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 5 Apr 2019 18:38:45 -0700
Subject: [PATCH 0692/1861] lib/string.c: implement a basic bcmp

A recent optimization in Clang (r355672) lowers comparisons of the
return value of memcmp against zero to comparisons of the return value
of bcmp against zero.  This helps some platforms that implement bcmp
more efficiently than memcmp.  glibc simply aliases bcmp to memcmp, but
an optimized implementation is in the works.

This results in linkage failures for all targets with Clang due to the
undefined symbol.  For now, just implement bcmp as a tailcail to memcmp
to unbreak the build.  This routine can be further optimized in the
future.

Other ideas discussed:

 * A weak alias was discussed, but breaks for architectures that define
   their own implementations of memcmp since aliases to declarations are
   not permitted (only definitions). Arch-specific memcmp
   implementations typically declare memcmp in C headers, but implement
   them in assembly.

 * -ffreestanding also is used sporadically throughout the kernel.

 * -fno-builtin-bcmp doesn't work when doing LTO.

Link: https://bugs.llvm.org/show_bug.cgi?id=41035
Link: https://code.woboq.org/userspace/glibc/string/memcmp.c.html#bcmp
Link: https://github.com/llvm/llvm-project/commit/8e16d73346f8091461319a7dfc4ddd18eedcff13
Link: https://github.com/ClangBuiltLinux/linux/issues/416
Link: http://lkml.kernel.org/r/20190313211335.165605-1-ndesaulniers@google.com
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Reported-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: James Y Knight <jyknight@google.com>
Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Suggested-by: Nathan Chancellor <natechancellor@gmail.com>
Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h |  3 +++
 lib/string.c           | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/include/linux/string.h b/include/linux/string.h
index 7927b875f80cf..6ab0a6fa512e7 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -150,6 +150,9 @@ extern void * memscan(void *,int,__kernel_size_t);
 #ifndef __HAVE_ARCH_MEMCMP
 extern int memcmp(const void *,const void *,__kernel_size_t);
 #endif
+#ifndef __HAVE_ARCH_BCMP
+extern int bcmp(const void *,const void *,__kernel_size_t);
+#endif
 #ifndef __HAVE_ARCH_MEMCHR
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
diff --git a/lib/string.c b/lib/string.c
index 38e4ca08e757c..3ab861c1a857a 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -866,6 +866,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count)
 EXPORT_SYMBOL(memcmp);
 #endif
 
+#ifndef __HAVE_ARCH_BCMP
+/**
+ * bcmp - returns 0 if and only if the buffers have identical contents.
+ * @a: pointer to first buffer.
+ * @b: pointer to second buffer.
+ * @len: size of buffers.
+ *
+ * The sign or magnitude of a non-zero return value has no particular
+ * meaning, and architectures may implement their own more efficient bcmp(). So
+ * while this particular implementation is a simple (tail) call to memcmp, do
+ * not rely on anything but whether the return value is zero or non-zero.
+ */
+#undef bcmp
+int bcmp(const void *a, const void *b, size_t len)
+{
+	return memcmp(a, b, len);
+}
+EXPORT_SYMBOL(bcmp);
+#endif
+
 #ifndef __HAVE_ARCH_MEMSCAN
 /**
  * memscan - Find a character in an area of memory.
-- 
GitLab


From 298a32b132087550d3fa80641ca58323c5dfd4d9 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 5 Apr 2019 18:38:49 -0700
Subject: [PATCH 0693/1861] kmemleak: powerpc: skip scanning holes in the .bss
 section

Commit 2d4f567103ff ("KVM: PPC: Introduce kvm_tmp framework") adds
kvm_tmp[] into the .bss section and then free the rest of unused spaces
back to the page allocator.

kernel_init
  kvm_guest_init
    kvm_free_tmp
      free_reserved_area
        free_unref_page
          free_unref_page_prepare

With DEBUG_PAGEALLOC=y, it will unmap those pages from kernel.  As the
result, kmemleak scan will trigger a panic when it scans the .bss
section with unmapped pages.

This patch creates dedicated kmemleak objects for the .data, .bss and
potentially .data..ro_after_init sections to allow partial freeing via
the kmemleak_free_part() in the powerpc kvm_free_tmp() function.

Link: http://lkml.kernel.org/r/20190321171917.62049-1-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Qian Cai <cai@lca.pw>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Tested-by: Qian Cai <cai@lca.pw>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/kvm.c |  7 +++++++
 mm/kmemleak.c             | 16 +++++++++++-----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 683b5b3805bd1..cd381e2291dfe 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -22,6 +22,7 @@
 #include <linux/kvm_host.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/kmemleak.h>
 #include <linux/kvm_para.h>
 #include <linux/slab.h>
 #include <linux/of.h>
@@ -712,6 +713,12 @@ static void kvm_use_magic_page(void)
 
 static __init void kvm_free_tmp(void)
 {
+	/*
+	 * Inform kmemleak about the hole in the .bss section since the
+	 * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
+	 */
+	kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
+			   ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
 	free_reserved_area(&kvm_tmp[kvm_tmp_index],
 			   &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 707fa5579f66f..6c318f5ac234f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1529,11 +1529,6 @@ static void kmemleak_scan(void)
 	}
 	rcu_read_unlock();
 
-	/* data/bss scanning */
-	scan_large_block(_sdata, _edata);
-	scan_large_block(__bss_start, __bss_stop);
-	scan_large_block(__start_ro_after_init, __end_ro_after_init);
-
 #ifdef CONFIG_SMP
 	/* per-cpu sections scanning */
 	for_each_possible_cpu(i)
@@ -2071,6 +2066,17 @@ void __init kmemleak_init(void)
 	}
 	local_irq_restore(flags);
 
+	/* register the data/bss sections */
+	create_object((unsigned long)_sdata, _edata - _sdata,
+		      KMEMLEAK_GREY, GFP_ATOMIC);
+	create_object((unsigned long)__bss_start, __bss_stop - __bss_start,
+		      KMEMLEAK_GREY, GFP_ATOMIC);
+	/* only register .data..ro_after_init if not within .data */
+	if (__start_ro_after_init < _sdata || __end_ro_after_init > _edata)
+		create_object((unsigned long)__start_ro_after_init,
+			      __end_ro_after_init - __start_ro_after_init,
+			      KMEMLEAK_GREY, GFP_ATOMIC);
+
 	/*
 	 * This is the point where tracking allocations is safe. Automatic
 	 * scanning is started during the late initcall. Add the early logged
-- 
GitLab


From 6147e136ff5071609b54f18982dea87706288e21 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 5 Apr 2019 18:38:53 -0700
Subject: [PATCH 0694/1861] include/linux/bitrev.h: fix constant bitrev

clang points out with hundreds of warnings that the bitrev macros have a
problem with constant input:

  drivers/hwmon/sht15.c:187:11: error: variable '__x' is uninitialized when used within its own initialization
        [-Werror,-Wuninitialized]
          u8 crc = bitrev8(data->val_status & 0x0F);
                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  include/linux/bitrev.h:102:21: note: expanded from macro 'bitrev8'
          __constant_bitrev8(__x) :                       \
          ~~~~~~~~~~~~~~~~~~~^~~~
  include/linux/bitrev.h:67:11: note: expanded from macro '__constant_bitrev8'
          u8 __x = x;                     \
             ~~~   ^

Both the bitrev and the __constant_bitrev macros use an internal
variable named __x, which goes horribly wrong when passing one to the
other.

The obvious fix is to rename one of the variables, so this adds an extra
'_'.

It seems we got away with this because

 - there are only a few drivers using bitrev macros

 - usually there are no constant arguments to those

 - when they are constant, they tend to be either 0 or (unsigned)-1
   (drivers/isdn/i4l/isdnhdlc.o, drivers/iio/amplifiers/ad8366.c) and
   give the correct result by pure chance.

In fact, the only driver that I could find that gets different results
with this is drivers/net/wan/slic_ds26522.c, which in turn is a driver
for fairly rare hardware (adding the maintainer to Cc for testing).

Link: http://lkml.kernel.org/r/20190322140503.123580-1-arnd@arndb.de
Fixes: 556d2f055bf6 ("ARM: 8187/1: add CONFIG_HAVE_ARCH_BITREVERSE to support rbit instruction")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Zhao Qiang <qiang.zhao@nxp.com>
Cc: Yalin Wang <yalin.wang@sonymobile.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitrev.h | 46 +++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h
index 50fb0dee23e86..d35b8ec1c485c 100644
--- a/include/linux/bitrev.h
+++ b/include/linux/bitrev.h
@@ -34,41 +34,41 @@ static inline u32 __bitrev32(u32 x)
 
 #define __constant_bitrev32(x)	\
 ({					\
-	u32 __x = x;			\
-	__x = (__x >> 16) | (__x << 16);	\
-	__x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8);	\
-	__x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4);	\
-	__x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2);	\
-	__x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1);	\
-	__x;								\
+	u32 ___x = x;			\
+	___x = (___x >> 16) | (___x << 16);	\
+	___x = ((___x & (u32)0xFF00FF00UL) >> 8) | ((___x & (u32)0x00FF00FFUL) << 8);	\
+	___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4);	\
+	___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2);	\
+	___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1);	\
+	___x;								\
 })
 
 #define __constant_bitrev16(x)	\
 ({					\
-	u16 __x = x;			\
-	__x = (__x >> 8) | (__x << 8);	\
-	__x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4);	\
-	__x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2);	\
-	__x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1);	\
-	__x;								\
+	u16 ___x = x;			\
+	___x = (___x >> 8) | (___x << 8);	\
+	___x = ((___x & (u16)0xF0F0U) >> 4) | ((___x & (u16)0x0F0FU) << 4);	\
+	___x = ((___x & (u16)0xCCCCU) >> 2) | ((___x & (u16)0x3333U) << 2);	\
+	___x = ((___x & (u16)0xAAAAU) >> 1) | ((___x & (u16)0x5555U) << 1);	\
+	___x;								\
 })
 
 #define __constant_bitrev8x4(x) \
 ({			\
-	u32 __x = x;	\
-	__x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4);	\
-	__x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2);	\
-	__x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1);	\
-	__x;								\
+	u32 ___x = x;	\
+	___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4);	\
+	___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2);	\
+	___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1);	\
+	___x;								\
 })
 
 #define __constant_bitrev8(x)	\
 ({					\
-	u8 __x = x;			\
-	__x = (__x >> 4) | (__x << 4);	\
-	__x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2);	\
-	__x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1);	\
-	__x;								\
+	u8 ___x = x;			\
+	___x = (___x >> 4) | (___x << 4);	\
+	___x = ((___x & (u8)0xCCU) >> 2) | ((___x & (u8)0x33U) << 2);	\
+	___x = ((___x & (u8)0xAAU) >> 1) | ((___x & (u8)0x55U) << 1);	\
+	___x;								\
 })
 
 #define bitrev32(x) \
-- 
GitLab


From b11ed18efa8f3dc58b259b812588317b765b1cfc Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Fri, 5 Apr 2019 18:38:58 -0700
Subject: [PATCH 0695/1861] lib/lzo: fix bugs for very short or empty input

For very short input data (0 - 1 bytes), lzo-rle was not behaving
correctly.  Fix this behaviour and update documentation accordingly.

For zero-length input, lzo v0 outputs an end-of-stream marker only,
which was misinterpreted by lzo-rle as a bitstream version number.
Ensure bitstream versions > 0 require a minimum stream length of 5.

Also fixes a bug in handling the tail for very short inputs when a
bitstream version is present.

Link: http://lkml.kernel.org/r/20190326165857.34613-1-dave.rodgman@arm.com
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/lzo.txt           | 8 +++++---
 lib/lzo/lzo1x_compress.c        | 9 ++++++---
 lib/lzo/lzo1x_decompress_safe.c | 4 +---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
index f79934225d8d3..ca983328976bc 100644
--- a/Documentation/lzo.txt
+++ b/Documentation/lzo.txt
@@ -102,9 +102,11 @@ Byte sequences
                 dictionary which is empty, and that it will always be
                 invalid at this place.
 
-      17      : bitstream version. If the first byte is 17, the next byte
-                gives the bitstream version (version 1 only). If the first byte
-                is not 17, the bitstream version is 0.
+      17      : bitstream version. If the first byte is 17, and compressed
+                stream length is at least 5 bytes (length of shortest possible
+                versioned bitstream), the next byte gives the bitstream version
+                (version 1 only).
+                Otherwise, the bitstream version is 0.
 
       18..21  : copy 0..3 literals
                 state = (byte - 17) = 0..3  [ copy <state> literals ]
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 4525fb0948442..a8ede77afe0db 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -291,13 +291,14 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
 {
 	const unsigned char *ip = in;
 	unsigned char *op = out;
+	unsigned char *data_start;
 	size_t l = in_len;
 	size_t t = 0;
 	signed char state_offset = -2;
 	unsigned int m4_max_offset;
 
-	// LZO v0 will never write 17 as first byte,
-	// so this is used to version the bitstream
+	// LZO v0 will never write 17 as first byte (except for zero-length
+	// input), so this is used to version the bitstream
 	if (bitstream_version > 0) {
 		*op++ = 17;
 		*op++ = bitstream_version;
@@ -306,6 +307,8 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
 		m4_max_offset = M4_MAX_OFFSET_V0;
 	}
 
+	data_start = op;
+
 	while (l > 20) {
 		size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
 		uintptr_t ll_end = (uintptr_t) ip + ll;
@@ -324,7 +327,7 @@ int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
 	if (t > 0) {
 		const unsigned char *ii = in + in_len - t;
 
-		if (op == out && t <= 238) {
+		if (op == data_start && t <= 238) {
 			*op++ = (17 + t);
 		} else if (t <= 3) {
 			op[state_offset] |= t;
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 6d2600ea3b554..9e07e9ef1aad7 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -54,11 +54,9 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 	if (unlikely(in_len < 3))
 		goto input_overrun;
 
-	if (likely(*ip == 17)) {
+	if (likely(in_len >= 5) && likely(*ip == 17)) {
 		bitstream_version = ip[1];
 		ip += 2;
-		if (unlikely(in_len < 5))
-			goto input_overrun;
 	} else {
 		bitstream_version = 0;
 	}
-- 
GitLab


From fcae96ff96538f66e7acd5d4e0f2e7516ff8cbd0 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 5 Apr 2019 18:39:01 -0700
Subject: [PATCH 0696/1861] mm: fix vm_fault_t cast in VM_FAULT_GET_HINDEX()

Symmetrically to VM_FAULT_SET_HINDEX(), we need a force-cast in
VM_FAULT_GET_HINDEX() to tell sparse that this is intentional.

Sparse complains about the current code when building a kernel with
CONFIG_MEMORY_FAILURE:

  arch/x86/mm/fault.c:1058:53: warning: restricted vm_fault_t degrades to integer

Link: http://lkml.kernel.org/r/20190327204117.35215-1-jannh@google.com
Fixes: 3d3539018d2c ("mm: create the new vm_fault_t type")
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7eade9132f02e..4ef4bbe78a1da 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -671,7 +671,7 @@ enum vm_fault_reason {
 
 /* Encode hstate index for a hwpoisoned large page */
 #define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16))
-#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf)
+#define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf)
 
 #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS |	\
 			VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON |	\
-- 
GitLab


From 58b6e5e8f1addd44583d61b0a03c0f5519527e35 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 5 Apr 2019 18:39:06 -0700
Subject: [PATCH 0697/1861] hugetlbfs: fix memory leak for resv_map

When mknod is used to create a block special file in hugetlbfs, it will
allocate an inode and kmalloc a 'struct resv_map' via resv_map_alloc().
inode->i_mapping->private_data will point the newly allocated resv_map.
However, when the device special file is opened bd_acquire() will set
inode->i_mapping to bd_inode->i_mapping.  Thus the pointer to the
allocated resv_map is lost and the structure is leaked.

Programs to reproduce:
        mount -t hugetlbfs nodev hugetlbfs
        mknod hugetlbfs/dev b 0 0
        exec 30<> hugetlbfs/dev
        umount hugetlbfs/

resv_map structures are only needed for inodes which can have associated
page allocations.  To fix the leak, only allocate resv_map for those
inodes which could possibly be associated with page allocations.

Link: http://lkml.kernel.org/r/20190401213101.16476-1-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Yufen Yu <yuyufen@huawei.com>
Suggested-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ec32fece5e1e9..9285dd4f4b1ce 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -755,11 +755,17 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 					umode_t mode, dev_t dev)
 {
 	struct inode *inode;
-	struct resv_map *resv_map;
+	struct resv_map *resv_map = NULL;
 
-	resv_map = resv_map_alloc();
-	if (!resv_map)
-		return NULL;
+	/*
+	 * Reserve maps are only needed for inodes that can have associated
+	 * page allocations.
+	 */
+	if (S_ISREG(mode) || S_ISLNK(mode)) {
+		resv_map = resv_map_alloc();
+		if (!resv_map)
+			return NULL;
+	}
 
 	inode = new_inode(sb);
 	if (inode) {
@@ -794,8 +800,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 			break;
 		}
 		lockdep_annotate_inode_mutex_key(inode);
-	} else
-		kref_put(&resv_map->refs, resv_map_release);
+	} else {
+		if (resv_map)
+			kref_put(&resv_map->refs, resv_map_release);
+	}
 
 	return inode;
 }
-- 
GitLab


From c6f3c5ee40c10bb65725047a220570f718507001 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Fri, 5 Apr 2019 18:39:10 -0700
Subject: [PATCH 0698/1861] mm/huge_memory.c: fix modifying of page protection
 by insert_pfn_pmd()

With some architectures like ppc64, set_pmd_at() cannot cope with a
situation where there is already some (different) valid entry present.

Use pmdp_set_access_flags() instead to modify the pfn which is built to
deal with modifying existing PMD entries.

This is similar to commit cae85cb8add3 ("mm/memory.c: fix modifying of
page protection by insert_pfn()")

We also do similar update w.r.t insert_pfn_pud eventhough ppc64 don't
support pud pfn entries now.

Without this patch we also see the below message in kernel log "BUG:
non-zero pgtables_bytes on freeing mm:"

Link: http://lkml.kernel.org/r/20190402115125.18803-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reported-by: Chandan Rajendra <chandan@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 404acdcd0455d..165ea46bf1492 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -755,6 +755,21 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 	spinlock_t *ptl;
 
 	ptl = pmd_lock(mm, pmd);
+	if (!pmd_none(*pmd)) {
+		if (write) {
+			if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
+				WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
+				goto out_unlock;
+			}
+			entry = pmd_mkyoung(*pmd);
+			entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+			if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+				update_mmu_cache_pmd(vma, addr, pmd);
+		}
+
+		goto out_unlock;
+	}
+
 	entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
 	if (pfn_t_devmap(pfn))
 		entry = pmd_mkdevmap(entry);
@@ -766,11 +781,16 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 	if (pgtable) {
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 		mm_inc_nr_ptes(mm);
+		pgtable = NULL;
 	}
 
 	set_pmd_at(mm, addr, pmd, entry);
 	update_mmu_cache_pmd(vma, addr, pmd);
+
+out_unlock:
 	spin_unlock(ptl);
+	if (pgtable)
+		pte_free(mm, pgtable);
 }
 
 vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -821,6 +841,20 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 	spinlock_t *ptl;
 
 	ptl = pud_lock(mm, pud);
+	if (!pud_none(*pud)) {
+		if (write) {
+			if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) {
+				WARN_ON_ONCE(!is_huge_zero_pud(*pud));
+				goto out_unlock;
+			}
+			entry = pud_mkyoung(*pud);
+			entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+			if (pudp_set_access_flags(vma, addr, pud, entry, 1))
+				update_mmu_cache_pud(vma, addr, pud);
+		}
+		goto out_unlock;
+	}
+
 	entry = pud_mkhuge(pfn_t_pud(pfn, prot));
 	if (pfn_t_devmap(pfn))
 		entry = pud_mkdevmap(entry);
@@ -830,6 +864,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 	}
 	set_pud_at(mm, addr, pud, entry);
 	update_mmu_cache_pud(vma, addr, pud);
+
+out_unlock:
 	spin_unlock(ptl);
 }
 
-- 
GitLab


From be87ab0afd680ac35486d16c0963c56d9be1d8a0 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Fri, 5 Apr 2019 18:39:14 -0700
Subject: [PATCH 0699/1861] psi: clarify the units used in pressure files

The output of the PSI files show a bunch of numbers with no unit.  The
psi.txt documentation file also does not indicate what units are used.
One can only find out by looking at the source code.  The units are
percentage for the averages and useconds for the total.  Make the
information easier to find by documenting the units in psi.txt.

Link: http://lkml.kernel.org/r/20190402193810.3450-1-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/psi.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
index b8ca28b60215a..7e71c9c1d8e9c 100644
--- a/Documentation/accounting/psi.txt
+++ b/Documentation/accounting/psi.txt
@@ -56,12 +56,12 @@ situation from a state where some tasks are stalled but the CPU is
 still doing productive work. As such, time spent in this subset of the
 stall state is tracked separately and exported in the "full" averages.
 
-The ratios are tracked as recent trends over ten, sixty, and three
-hundred second windows, which gives insight into short term events as
-well as medium and long term trends. The total absolute stall time is
-tracked and exported as well, to allow detection of latency spikes
-which wouldn't necessarily make a dent in the time averages, or to
-average trends over custom time frames.
+The ratios (in %) are tracked as recent trends over ten, sixty, and
+three hundred second windows, which gives insight into short term events
+as well as medium and long term trends. The total absolute stall time
+(in us) is tracked and exported as well, to allow detection of latency
+spikes which wouldn't necessarily make a dent in the time averages,
+or to average trends over custom time frames.
 
 Cgroup2 interface
 =================
-- 
GitLab


From 0b3d6e6f2dd0a7b697b1aa8c167265908940624b Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Fri, 5 Apr 2019 18:39:18 -0700
Subject: [PATCH 0700/1861] mm: writeback: use exact memcg dirty counts

Since commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in
memory.stat reporting") memcg dirty and writeback counters are managed
as:

 1) per-memcg per-cpu values in range of [-32..32]

 2) per-memcg atomic counter

When a per-cpu counter cannot fit in [-32..32] it's flushed to the
atomic.  Stat readers only check the atomic.  Thus readers such as
balance_dirty_pages() may see a nontrivial error margin: 32 pages per
cpu.

Assuming 100 cpus:
   4k x86 page_size:  13 MiB error per memcg
  64k ppc page_size: 200 MiB error per memcg

Considering that dirty+writeback are used together for some decisions the
errors double.

This inaccuracy can lead to undeserved oom kills.  One nasty case is
when all per-cpu counters hold positive values offsetting an atomic
negative value (i.e.  per_cpu[*]=32, atomic=n_cpu*-32).
balance_dirty_pages() only consults the atomic and does not consider
throttling the next n_cpu*32 dirty pages.  If the file_lru is in the
13..200 MiB range then there's absolutely no dirty throttling, which
burdens vmscan with only dirty+writeback pages thus resorting to oom
kill.

It could be argued that tiny containers are not supported, but it's more
subtle.  It's the amount the space available for file lru that matters.
If a container has memory.max-200MiB of non reclaimable memory, then it
will also suffer such oom kills on a 100 cpu machine.

The following test reliably ooms without this patch.  This patch avoids
oom kills.

  $ cat test
  mount -t cgroup2 none /dev/cgroup
  cd /dev/cgroup
  echo +io +memory > cgroup.subtree_control
  mkdir test
  cd test
  echo 10M > memory.max
  (echo $BASHPID > cgroup.procs && exec /memcg-writeback-stress /foo)
  (echo $BASHPID > cgroup.procs && exec dd if=/dev/zero of=/foo bs=2M count=100)

  $ cat memcg-writeback-stress.c
  /*
   * Dirty pages from all but one cpu.
   * Clean pages from the non dirtying cpu.
   * This is to stress per cpu counter imbalance.
   * On a 100 cpu machine:
   * - per memcg per cpu dirty count is 32 pages for each of 99 cpus
   * - per memcg atomic is -99*32 pages
   * - thus the complete dirty limit: sum of all counters 0
   * - balance_dirty_pages() only sees atomic count -99*32 pages, which
   *   it max()s to 0.
   * - So a workload can dirty -99*32 pages before balance_dirty_pages()
   *   cares.
   */
  #define _GNU_SOURCE
  #include <err.h>
  #include <fcntl.h>
  #include <sched.h>
  #include <stdlib.h>
  #include <stdio.h>
  #include <sys/stat.h>
  #include <sys/sysinfo.h>
  #include <sys/types.h>
  #include <unistd.h>

  static char *buf;
  static int bufSize;

  static void set_affinity(int cpu)
  {
  	cpu_set_t affinity;

  	CPU_ZERO(&affinity);
  	CPU_SET(cpu, &affinity);
  	if (sched_setaffinity(0, sizeof(affinity), &affinity))
  		err(1, "sched_setaffinity");
  }

  static void dirty_on(int output_fd, int cpu)
  {
  	int i, wrote;

  	set_affinity(cpu);
  	for (i = 0; i < 32; i++) {
  		for (wrote = 0; wrote < bufSize; ) {
  			int ret = write(output_fd, buf+wrote, bufSize-wrote);
  			if (ret == -1)
  				err(1, "write");
  			wrote += ret;
  		}
  	}
  }

  int main(int argc, char **argv)
  {
  	int cpu, flush_cpu = 1, output_fd;
  	const char *output;

  	if (argc != 2)
  		errx(1, "usage: output_file");

  	output = argv[1];
  	bufSize = getpagesize();
  	buf = malloc(getpagesize());
  	if (buf == NULL)
  		errx(1, "malloc failed");

  	output_fd = open(output, O_CREAT|O_RDWR);
  	if (output_fd == -1)
  		err(1, "open(%s)", output);

  	for (cpu = 0; cpu < get_nprocs(); cpu++) {
  		if (cpu != flush_cpu)
  			dirty_on(output_fd, cpu);
  	}

  	set_affinity(flush_cpu);
  	if (fsync(output_fd))
  		err(1, "fsync(%s)", output);
  	if (close(output_fd))
  		err(1, "close(%s)", output);
  	free(buf);
  }

Make balance_dirty_pages() and wb_over_bg_thresh() work harder to
collect exact per memcg counters.  This avoids the aforementioned oom
kills.

This does not affect the overhead of memory.stat, which still reads the
single atomic counter.

Why not use percpu_counter? memcg already handles cpus going offline, so
no need for that overhead from percpu_counter.  And the percpu_counter
spinlocks are more heavyweight than is required.

It probably also makes sense to use exact dirty and writeback counters
in memcg oom reports.  But that is saved for later.

Link: http://lkml.kernel.org/r/20190329174609.164344-1-gthelen@google.com
Signed-off-by: Greg Thelen <gthelen@google.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>	[4.16+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 ++++-
 mm/memcontrol.c            | 20 ++++++++++++++++++--
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1f3d880b7ca17..dbb6118370c1e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -566,7 +566,10 @@ struct mem_cgroup *lock_page_memcg(struct page *page);
 void __unlock_page_memcg(struct mem_cgroup *memcg);
 void unlock_page_memcg(struct page *page);
 
-/* idx can be of type enum memcg_stat_item or node_stat_item */
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page_state().
+ */
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 					     int idx)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 532e0e2a4817e..81a0d3914ec99 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3882,6 +3882,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
 	return &memcg->cgwb_domain;
 }
 
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page().
+ */
+static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
+{
+	long x = atomic_long_read(&memcg->stat[idx]);
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
+	if (x < 0)
+		x = 0;
+	return x;
+}
+
 /**
  * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
  * @wb: bdi_writeback in question
@@ -3907,10 +3923,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
 	struct mem_cgroup *parent;
 
-	*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+	*pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
 
 	/* this should eventually include NR_UNSTABLE_NFS */
-	*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+	*pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
 	*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
 						     (1 << LRU_ACTIVE_FILE));
 	*pheadroom = PAGE_COUNTER_MAX;
-- 
GitLab


From 166dbd930c99f640fa8a9beead7b9f5f5b016fa0 Mon Sep 17 00:00:00 2001
From: Tomer Maimon <tmaimon77@gmail.com>
Date: Fri, 5 Apr 2019 18:39:22 -0700
Subject: [PATCH 0701/1861] MAINTAINERS: fix bad pattern in ARM/NUVOTON NPCM

In the process of upstreaming architecture support for ARM/NUVOTON NPCM
include/dt-bindings/clock/nuvoton,npcm7xx-clks.h was renamed
include/dt-bindings/clock/nuvoton,npcm7xx-clock.h without updating
MAINTAINERS.  This updates the MAINTAINERS pattern to match the new name
of this file.

Link: http://lkml.kernel.org/r/20190328235752.334462-1-tmaimon77@gmail.com
Fixes: 6a498e06ba22 ("MAINTAINERS: Add entry for the Nuvoton NPCM architecture")
Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Tomer Maimon <tmaimon77@gmail.com>
Reported-by: Joe Perches <joe@perches.com>
Reviewed-by: Benjamin Fair <benjaminfair@google.com>
Cc: Avi Fishman <avifishman70@gmail.com>
Cc: Mukesh Ojha <mojha@codeaurora.org>
Cc: Nancy Yuen <yuenn@google.com>
Cc: Patrick Venture <venture@google.com>
Cc: Tali Perry <tali.perry1@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6771bd784f5f5..63bfdaf13f28c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1900,7 +1900,7 @@ L:	openbmc@lists.ozlabs.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/arm/mach-npcm/
 F:	arch/arm/boot/dts/nuvoton-npcm*
-F:	include/dt-bindings/clock/nuvoton,npcm7xx-clks.h
+F:	include/dt-bindings/clock/nuvoton,npcm7xx-clock.h
 F:	drivers/*/*npcm*
 F:	Documentation/devicetree/bindings/*/*npcm*
 F:	Documentation/devicetree/bindings/*/*/*npcm*
-- 
GitLab


From 803cfadcb6c5518ebb5a9a398d56b9418ad65585 Mon Sep 17 00:00:00 2001
From: Tomer Maimon <tmaimon77@gmail.com>
Date: Fri, 5 Apr 2019 18:39:26 -0700
Subject: [PATCH 0702/1861] MAINTAINERS: add maintainer and replacing reviewer
 ARM/NUVOTON NPCM

Add Tali Perry as Nuvoton NPCM maintainer, replace Brendan Higgins
Nuvoton NPCM reviewer with Benjamin Fair.

Link: http://lkml.kernel.org/r/20190328235752.334462-2-tmaimon77@gmail.com
Signed-off-by: Tomer Maimon <tmaimon77@gmail.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Reviewed-by: Benjamin Fair <benjaminfair@google.com>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Joe Perches <joe@perches.com>
Cc: Avi Fishman <avifishman70@gmail.com>
Cc: Patrick Venture <venture@google.com>
Cc: Nancy Yuen <yuenn@google.com>
Cc: Tali Perry <tali.perry1@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 63bfdaf13f28c..2359e12e4c41e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1893,9 +1893,10 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
 ARM/NUVOTON NPCM ARCHITECTURE
 M:	Avi Fishman <avifishman70@gmail.com>
 M:	Tomer Maimon <tmaimon77@gmail.com>
+M:	Tali Perry <tali.perry1@gmail.com>
 R:	Patrick Venture <venture@google.com>
 R:	Nancy Yuen <yuenn@google.com>
-R:	Brendan Higgins <brendanhiggins@google.com>
+R:	Benjamin Fair <benjaminfair@google.com>
 L:	openbmc@lists.ozlabs.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/arm/mach-npcm/
-- 
GitLab


From acaf892ecbf5be7710ae05a61fd43c668f68ad95 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 5 Apr 2019 18:39:30 -0700
Subject: [PATCH 0703/1861] sh: fix multiple function definition build errors

Many of the sh CPU-types have their own plat_irq_setup() and
arch_init_clk_ops() functions, so these same (empty) functions in
arch/sh/boards/of-generic.c are not needed and cause build errors.

If there is some case where these empty functions are needed, they can
be retained by marking them as "__weak" while at the same time making
builds that do not need them succeed.

Fixes these build errors:

arch/sh/boards/of-generic.o: In function `plat_irq_setup':
(.init.text+0x134): multiple definition of `plat_irq_setup'
arch/sh/kernel/cpu/sh2/setup-sh7619.o:(.init.text+0x30): first defined here
arch/sh/boards/of-generic.o: In function `arch_init_clk_ops':
(.init.text+0x118): multiple definition of `arch_init_clk_ops'
arch/sh/kernel/cpu/sh2/clock-sh7619.o:(.init.text+0x0): first defined here

Link: http://lkml.kernel.org/r/9ee4e0c5-f100-86a2-bd4d-1d3287ceab31@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kbuild test robot <lkp@intel.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/boards/of-generic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 958f46da3a791..d91065e81a4e5 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -164,10 +164,10 @@ static struct sh_machine_vector __initmv sh_of_generic_mv = {
 
 struct sh_clk_ops;
 
-void __init arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
+void __init __weak arch_init_clk_ops(struct sh_clk_ops **ops, int idx)
 {
 }
 
-void __init plat_irq_setup(void)
+void __init __weak plat_irq_setup(void)
 {
 }
-- 
GitLab


From e91455217d8c7b128c158432869f6e697283f3ec Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 5 Apr 2019 18:39:34 -0700
Subject: [PATCH 0704/1861] mm/util.c: fix strndup_user() comment

The kerneldoc misdescribes strndup_user()'s return value.

Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Timur Tabi <timur@freescale.com>
Cc: Mihai Caraman <mihai.caraman@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/util.c b/mm/util.c
index d559bde497a9b..43a2984bccaab 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -204,7 +204,7 @@ EXPORT_SYMBOL(vmemdup_user);
  * @s: The string to duplicate
  * @n: Maximum number of bytes to copy, including the trailing NUL.
  *
- * Return: newly allocated copy of @s or %NULL in case of error
+ * Return: newly allocated copy of @s or an ERR_PTR() in case of error
  */
 char *strndup_user(const char __user *s, long n)
 {
-- 
GitLab


From 9002b21465fa4d829edfc94a5a441005cffaa972 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 5 Apr 2019 18:39:38 -0700
Subject: [PATCH 0705/1861] kernel/sysctl.c: fix out-of-bounds access when
 setting file-max

Commit 32a5ad9c2285 ("sysctl: handle overflow for file-max") hooked up
min/max values for the file-max sysctl parameter via the .extra1 and
.extra2 fields in the corresponding struct ctl_table entry.

Unfortunately, the minimum value points at the global 'zero' variable,
which is an int.  This results in a KASAN splat when accessed as a long
by proc_doulongvec_minmax on 64-bit architectures:

  | BUG: KASAN: global-out-of-bounds in __do_proc_doulongvec_minmax+0x5d8/0x6a0
  | Read of size 8 at addr ffff2000133d1c20 by task systemd/1
  |
  | CPU: 0 PID: 1 Comm: systemd Not tainted 5.1.0-rc3-00012-g40b114779944 #2
  | Hardware name: linux,dummy-virt (DT)
  | Call trace:
  |  dump_backtrace+0x0/0x228
  |  show_stack+0x14/0x20
  |  dump_stack+0xe8/0x124
  |  print_address_description+0x60/0x258
  |  kasan_report+0x140/0x1a0
  |  __asan_report_load8_noabort+0x18/0x20
  |  __do_proc_doulongvec_minmax+0x5d8/0x6a0
  |  proc_doulongvec_minmax+0x4c/0x78
  |  proc_sys_call_handler.isra.19+0x144/0x1d8
  |  proc_sys_write+0x34/0x58
  |  __vfs_write+0x54/0xe8
  |  vfs_write+0x124/0x3c0
  |  ksys_write+0xbc/0x168
  |  __arm64_sys_write+0x68/0x98
  |  el0_svc_common+0x100/0x258
  |  el0_svc_handler+0x48/0xc0
  |  el0_svc+0x8/0xc
  |
  | The buggy address belongs to the variable:
  |  zero+0x0/0x40
  |
  | Memory state around the buggy address:
  |  ffff2000133d1b00: 00 00 00 00 00 00 00 00 fa fa fa fa 04 fa fa fa
  |  ffff2000133d1b80: fa fa fa fa 04 fa fa fa fa fa fa fa 04 fa fa fa
  | >ffff2000133d1c00: fa fa fa fa 04 fa fa fa fa fa fa fa 00 00 00 00
  |                                ^
  |  ffff2000133d1c80: fa fa fa fa 00 fa fa fa fa fa fa fa 00 00 00 00
  |  ffff2000133d1d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Fix the splat by introducing a unsigned long 'zero_ul' and using that
instead.

Link: http://lkml.kernel.org/r/20190403153409.17307-1-will.deacon@arm.com
Fixes: 32a5ad9c2285 ("sysctl: handle overflow for file-max")
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Christian Brauner <christian@brauner.io>
Cc: Kees Cook <keescook@chromium.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e5da394d1ca36..c9ec050bcf461 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -128,6 +128,7 @@ static int zero;
 static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
 static int __maybe_unused four = 4;
+static unsigned long zero_ul;
 static unsigned long one_ul = 1;
 static unsigned long long_max = LONG_MAX;
 static int one_hundred = 100;
@@ -1750,7 +1751,7 @@ static struct ctl_table fs_table[] = {
 		.maxlen		= sizeof(files_stat.max_files),
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &zero,
+		.extra1		= &zero_ul,
 		.extra2		= &long_max,
 	},
 	{
-- 
GitLab


From 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Tue, 2 Apr 2019 13:28:13 +0200
Subject: [PATCH 0706/1861] x86/asm: Use stricter assembly constraints in
 bitops

There's a number of problems with how arch/x86/include/asm/bitops.h
is currently using assembly constraints for the memory region
bitops are modifying:

1) Use memory clobber in bitops that touch arbitrary memory

Certain bit operations that read/write bits take a base pointer and an
arbitrarily large offset to address the bit relative to that base.
Inline assembly constraints aren't expressive enough to tell the
compiler that the assembly directive is going to touch a specific memory
location of unknown size, therefore we have to use the "memory" clobber
to indicate that the assembly is going to access memory locations other
than those listed in the inputs/outputs.

To indicate that BTR/BTS instructions don't necessarily touch the first
sizeof(long) bytes of the argument, we also move the address to assembly
inputs.

This particular change leads to size increase of 124 kernel functions in
a defconfig build. For some of them the diff is in NOP operations, other
end up re-reading values from memory and may potentially slow down the
execution. But without these clobbers the compiler is free to cache
the contents of the bitmaps and use them as if they weren't changed by
the inline assembly.

2) Use byte-sized arguments for operations touching single bytes.

Passing a long value to ANDB/ORB/XORB instructions makes the compiler
treat sizeof(long) bytes as being clobbered, which isn't the case. This
may theoretically lead to worse code in the case of heavy optimization.

Practical impact:

I've built a defconfig kernel and looked through some of the functions
generated by GCC 7.3.0 with and without this clobber, and didn't spot
any miscompilations.

However there is a (trivial) theoretical case where this code leads to
miscompilation:

  https://lkml.org/lkml/2019/3/28/393

using just GCC 8.3.0 with -O2.  It isn't hard to imagine someone writes
such a function in the kernel someday.

So the primary motivation is to fix an existing misuse of the asm
directive, which happens to work in certain configurations now, but
isn't guaranteed to work under different circumstances.

[ --mingo: Added -stable tag because defconfig only builds a fraction
  of the kernel and the trivial testcase looks normal enough to
  be used in existing or in-development code. ]

Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: <stable@vger.kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: James Y Knight <jyknight@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190402112813.193378-1-glider@google.com
[ Edited the changelog, tidied up one of the defines. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bitops.h | 41 +++++++++++++++--------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index d153d570bb047..8e790ec219a5f 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -36,16 +36,17 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define RLONG_ADDR(x)			 "m" (*(volatile long *) (x))
+#define WBYTE_ADDR(x)			"+m" (*(volatile char *) (x))
 
-#define ADDR				BITOP_ADDR(addr)
+#define ADDR				RLONG_ADDR(addr)
 
 /*
  * We do the locked ops that don't return the old value as
  * a mask operation on a byte.
  */
 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr)	WBYTE_ADDR((void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 /**
@@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
 			: "memory");
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
-			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
+	asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)~CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
 
 static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
 	bool negative;
 	asm volatile(LOCK_PREFIX "andb %2,%1"
 		CC_SET(s)
-		: CC_OUT(s) (negative), ADDR
+		: CC_OUT(s) (negative), WBYTE_ADDR(addr)
 		: "ir" ((char) ~(1 << nr)) : "memory");
 	return negative;
 }
@@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
  * __clear_bit() is non-atomic and implies release semantics before the memory
  * operation. It can be used for an unlock if no other CPUs can concurrently
  * modify other bits in the word.
- *
- * No memory barrier is required here, because x86 cannot reorder stores past
- * older loads. Same principle as spin_unlock.
  */
 static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
-	barrier();
 	__clear_bit(nr, addr);
 }
 
@@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
  */
 static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
+	asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
 }
 
 /**
@@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
 			: "iq" ((u8)CONST_MASK(nr)));
 	} else {
 		asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
-			: BITOP_ADDR(addr)
-			: "Ir" (nr));
+			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
 	}
 }
 
@@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 
 	asm(__ASM_SIZE(bts) " %2,%1"
 	    CC_SET(c)
-	    : CC_OUT(c) (oldbit), ADDR
-	    : "Ir" (nr));
+	    : CC_OUT(c) (oldbit)
+	    : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 
 	asm volatile(__ASM_SIZE(btr) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr));
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 	return oldbit;
 }
 
@@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 
 	asm volatile(__ASM_SIZE(btc) " %2,%1"
 		     CC_SET(c)
-		     : CC_OUT(c) (oldbit), ADDR
-		     : "Ir" (nr) : "memory");
+		     : CC_OUT(c) (oldbit)
+		     : ADDR, "Ir" (nr) : "memory");
 
 	return oldbit;
 }
@@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 	asm volatile(__ASM_SIZE(bt) " %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit)
-		     : "m" (*(unsigned long *)addr), "Ir" (nr));
+		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
 
 	return oldbit;
 }
-- 
GitLab


From 3ace6891ce8bb9e1267358cb58f93b4fd8b72b69 Mon Sep 17 00:00:00 2001
From: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Date: Mon, 1 Apr 2019 13:14:37 +0300
Subject: [PATCH 0707/1861] i2c: imx: don't leak the i2c adapter on error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sure to free the i2c adapter on the error exit path.

Signed-off-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Reviewed-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Fixes: e1ab9a468e3b ("i2c: imx: improve the error handling in i2c_imx_dma_request()")
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-imx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 42fed40198a0f..c0c3043b5d611 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1169,11 +1169,13 @@ static int i2c_imx_probe(struct platform_device *pdev)
 	/* Init DMA config if supported */
 	ret = i2c_imx_dma_request(i2c_imx, phy_addr);
 	if (ret < 0)
-		goto clk_notifier_unregister;
+		goto del_adapter;
 
 	dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n");
 	return 0;   /* Return OK */
 
+del_adapter:
+	i2c_del_adapter(&i2c_imx->adapter);
 clk_notifier_unregister:
 	clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb);
 rpm_disable:
-- 
GitLab


From 7ff684a683d777c4956fce93e60accbab2bd7696 Mon Sep 17 00:00:00 2001
From: John Pittman <jpittman@redhat.com>
Date: Fri, 5 Apr 2019 17:42:45 -0400
Subject: [PATCH 0708/1861] null_blk: prevent crash from bad home_node value

At module load, if the selected home_node value is greater than
the available numa nodes, the system will crash in
__alloc_pages_nodemask() due to a bad paging request.  Prevent this
user error crash by detecting the bad value, logging an error, and
setting g_home_node back to the default of NUMA_NO_NODE.

Signed-off-by: John Pittman <jpittman@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 417a9f15c1163..d7ac09c092f2a 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1748,6 +1748,11 @@ static int __init null_init(void)
 		return -EINVAL;
 	}
 
+	if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
+		pr_err("null_blk: invalid home_node value\n");
+		g_home_node = NUMA_NO_NODE;
+	}
+
 	if (g_queue_mode == NULL_Q_RQ) {
 		pr_err("null_blk: legacy IO path no longer available\n");
 		return -EINVAL;
-- 
GitLab


From 47b16820c490149c2923e8474048f2c6e7557cab Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 19 Feb 2019 08:49:56 -0800
Subject: [PATCH 0709/1861] xsysace: Fix error handling in ace_setup

If xace hardware reports a bad version number, the error handling code
in ace_setup() calls put_disk(), followed by queue cleanup. However, since
the disk data structure has the queue pointer set, put_disk() also
cleans and releases the queue. This results in blk_cleanup_queue()
accessing an already released data structure, which in turn may result
in a crash such as the following.

[   10.681671] BUG: Kernel NULL pointer dereference at 0x00000040
[   10.681826] Faulting instruction address: 0xc0431480
[   10.682072] Oops: Kernel access of bad area, sig: 11 [#1]
[   10.682251] BE PAGE_SIZE=4K PREEMPT Xilinx Virtex440
[   10.682387] Modules linked in:
[   10.682528] CPU: 0 PID: 1 Comm: swapper Tainted: G        W         5.0.0-rc6-next-20190218+ #2
[   10.682733] NIP:  c0431480 LR: c043147c CTR: c0422ad8
[   10.682863] REGS: cf82fbe0 TRAP: 0300   Tainted: G        W          (5.0.0-rc6-next-20190218+)
[   10.683065] MSR:  00029000 <CE,EE,ME>  CR: 22000222  XER: 00000000
[   10.683236] DEAR: 00000040 ESR: 00000000
[   10.683236] GPR00: c043147c cf82fc90 cf82ccc0 00000000 00000000 00000000 00000002 00000000
[   10.683236] GPR08: 00000000 00000000 c04310bc 00000000 22000222 00000000 c0002c54 00000000
[   10.683236] GPR16: 00000000 00000001 c09aa39c c09021b0 c09021dc 00000007 c0a68c08 00000000
[   10.683236] GPR24: 00000001 ced6d400 ced6dcf0 c0815d9c 00000000 00000000 00000000 cedf0800
[   10.684331] NIP [c0431480] blk_mq_run_hw_queue+0x28/0x114
[   10.684473] LR [c043147c] blk_mq_run_hw_queue+0x24/0x114
[   10.684602] Call Trace:
[   10.684671] [cf82fc90] [c043147c] blk_mq_run_hw_queue+0x24/0x114 (unreliable)
[   10.684854] [cf82fcc0] [c04315bc] blk_mq_run_hw_queues+0x50/0x7c
[   10.685002] [cf82fce0] [c0422b24] blk_set_queue_dying+0x30/0x68
[   10.685154] [cf82fcf0] [c0423ec0] blk_cleanup_queue+0x34/0x14c
[   10.685306] [cf82fd10] [c054d73c] ace_probe+0x3dc/0x508
[   10.685445] [cf82fd50] [c052d740] platform_drv_probe+0x4c/0xb8
[   10.685592] [cf82fd70] [c052abb0] really_probe+0x20c/0x32c
[   10.685728] [cf82fda0] [c052ae58] driver_probe_device+0x68/0x464
[   10.685877] [cf82fdc0] [c052b500] device_driver_attach+0xb4/0xe4
[   10.686024] [cf82fde0] [c052b5dc] __driver_attach+0xac/0xfc
[   10.686161] [cf82fe00] [c0528428] bus_for_each_dev+0x80/0xc0
[   10.686314] [cf82fe30] [c0529b3c] bus_add_driver+0x144/0x234
[   10.686457] [cf82fe50] [c052c46c] driver_register+0x88/0x15c
[   10.686610] [cf82fe60] [c09de288] ace_init+0x4c/0xac
[   10.686742] [cf82fe80] [c0002730] do_one_initcall+0xac/0x330
[   10.686888] [cf82fee0] [c09aafd0] kernel_init_freeable+0x34c/0x478
[   10.687043] [cf82ff30] [c0002c6c] kernel_init+0x18/0x114
[   10.687188] [cf82ff40] [c000f2f0] ret_from_kernel_thread+0x14/0x1c
[   10.687349] Instruction dump:
[   10.687435] 3863ffd4 4bfffd70 9421ffd0 7c0802a6 93c10028 7c9e2378 93e1002c 38810008
[   10.687637] 7c7f1b78 90010034 4bfffc25 813f008c <81290040> 75290100 4182002c 80810008
[   10.688056] ---[ end trace 13c9ff51d41b9d40 ]---

Fix the problem by setting the disk queue pointer to NULL before calling
put_disk(). A more comprehensive fix might be to rearrange the code
to check the hardware version before initializing data structures,
but I don't know if this would have undesirable side effects, and
it would increase the complexity of backporting the fix to older kernels.

Fixes: 74489a91dd43a ("Add support for Xilinx SystemACE CompactFlash interface")
Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xsysace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 87ccef4bd69e9..32a21b8d1d85f 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -1090,6 +1090,8 @@ static int ace_setup(struct ace_device *ace)
 	return 0;
 
 err_read:
+	/* prevent double queue cleanup */
+	ace->gd->queue = NULL;
 	put_disk(ace->gd);
 err_alloc_disk:
 	blk_cleanup_queue(ace->queue);
-- 
GitLab


From 10dce8af34226d90fa56746a934f8da5dcdba3df Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Tue, 26 Mar 2019 22:20:43 +0000
Subject: [PATCH 0710/1861] fs: stream_open - opener for stream-like files so
 that read and write can run simultaneously without deadlock

Commit 9c225f2655e3 ("vfs: atomic f_pos accesses as per POSIX") added
locking for file.f_pos access and in particular made concurrent read and
write not possible - now both those functions take f_pos lock for the
whole run, and so if e.g. a read is blocked waiting for data, write will
deadlock waiting for that read to complete.

This caused regression for stream-like files where previously read and
write could run simultaneously, but after that patch could not do so
anymore. See e.g. commit 581d21a2d02a ("xenbus: fix deadlock on writes
to /proc/xen/xenbus") which fixes such regression for particular case of
/proc/xen/xenbus.

The patch that added f_pos lock in 2014 did so to guarantee POSIX thread
safety for read/write/lseek and added the locking to file descriptors of
all regular files. In 2014 that thread-safety problem was not new as it
was already discussed earlier in 2006.

However even though 2006'th version of Linus's patch was adding f_pos
locking "only for files that are marked seekable with FMODE_LSEEK (thus
avoiding the stream-like objects like pipes and sockets)", the 2014
version - the one that actually made it into the tree as 9c225f2655e3 -
is doing so irregardless of whether a file is seekable or not.

See

    https://lore.kernel.org/lkml/53022DB1.4070805@gmail.com/
    https://lwn.net/Articles/180387
    https://lwn.net/Articles/180396

for historic context.

The reason that it did so is, probably, that there are many files that
are marked non-seekable, but e.g. their read implementation actually
depends on knowing current position to correctly handle the read. Some
examples:

	kernel/power/user.c		snapshot_read
	fs/debugfs/file.c		u32_array_read
	fs/fuse/control.c		fuse_conn_waiting_read + ...
	drivers/hwmon/asus_atk0110.c	atk_debugfs_ggrp_read
	arch/s390/hypfs/inode.c		hypfs_read_iter
	...

Despite that, many nonseekable_open users implement read and write with
pure stream semantics - they don't depend on passed ppos at all. And for
those cases where read could wait for something inside, it creates a
situation similar to xenbus - the write could be never made to go until
read is done, and read is waiting for some, potentially external, event,
for potentially unbounded time -> deadlock.

Besides xenbus, there are 14 such places in the kernel that I've found
with semantic patch (see below):

	drivers/xen/evtchn.c:667:8-24: ERROR: evtchn_fops: .read() can deadlock .write()
	drivers/isdn/capi/capi.c:963:8-24: ERROR: capi_fops: .read() can deadlock .write()
	drivers/input/evdev.c:527:1-17: ERROR: evdev_fops: .read() can deadlock .write()
	drivers/char/pcmcia/cm4000_cs.c:1685:7-23: ERROR: cm4000_fops: .read() can deadlock .write()
	net/rfkill/core.c:1146:8-24: ERROR: rfkill_fops: .read() can deadlock .write()
	drivers/s390/char/fs3270.c:488:1-17: ERROR: fs3270_fops: .read() can deadlock .write()
	drivers/usb/misc/ldusb.c:310:1-17: ERROR: ld_usb_fops: .read() can deadlock .write()
	drivers/hid/uhid.c:635:1-17: ERROR: uhid_fops: .read() can deadlock .write()
	net/batman-adv/icmp_socket.c:80:1-17: ERROR: batadv_fops: .read() can deadlock .write()
	drivers/media/rc/lirc_dev.c:198:1-17: ERROR: lirc_fops: .read() can deadlock .write()
	drivers/leds/uleds.c:77:1-17: ERROR: uleds_fops: .read() can deadlock .write()
	drivers/input/misc/uinput.c:400:1-17: ERROR: uinput_fops: .read() can deadlock .write()
	drivers/infiniband/core/user_mad.c:985:7-23: ERROR: umad_fops: .read() can deadlock .write()
	drivers/gnss/core.c:45:1-17: ERROR: gnss_fops: .read() can deadlock .write()

In addition to the cases above another regression caused by f_pos
locking is that now FUSE filesystems that implement open with
FOPEN_NONSEEKABLE flag, can no longer implement bidirectional
stream-like files - for the same reason as above e.g. read can deadlock
write locking on file.f_pos in the kernel.

FUSE's FOPEN_NONSEEKABLE was added in 2008 in a7c1b990f715 ("fuse:
implement nonseekable open") to support OSSPD. OSSPD implements /dev/dsp
in userspace with FOPEN_NONSEEKABLE flag, with corresponding read and
write routines not depending on current position at all, and with both
read and write being potentially blocking operations:

See

    https://github.com/libfuse/osspd
    https://lwn.net/Articles/308445

    https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1406
    https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1438-L1477
    https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1479-L1510

Corresponding libfuse example/test also describes FOPEN_NONSEEKABLE as
"somewhat pipe-like files ..." with read handler not using offset.
However that test implements only read without write and cannot exercise
the deadlock scenario:

    https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L124-L131
    https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L146-L163
    https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L209-L216

I've actually hit the read vs write deadlock for real while implementing
my FUSE filesystem where there is /head/watch file, for which open
creates separate bidirectional socket-like stream in between filesystem
and its user with both read and write being later performed
simultaneously. And there it is semantically not easy to split the
stream into two separate read-only and write-only channels:

    https://lab.nexedi.com/kirr/wendelin.core/blob/f13aa600/wcfs/wcfs.go#L88-169

Let's fix this regression. The plan is:

1. We can't change nonseekable_open to include &~FMODE_ATOMIC_POS -
   doing so would break many in-kernel nonseekable_open users which
   actually use ppos in read/write handlers.

2. Add stream_open() to kernel to open stream-like non-seekable file
   descriptors. Read and write on such file descriptors would never use
   nor change ppos. And with that property on stream-like files read and
   write will be running without taking f_pos lock - i.e. read and write
   could be running simultaneously.

3. With semantic patch search and convert to stream_open all in-kernel
   nonseekable_open users for which read and write actually do not
   depend on ppos and where there is no other methods in file_operations
   which assume @offset access.

4. Add FOPEN_STREAM to fs/fuse/ and open in-kernel file-descriptors via
   steam_open if that bit is present in filesystem open reply.

   It was tempting to change fs/fuse/ open handler to use stream_open
   instead of nonseekable_open on just FOPEN_NONSEEKABLE flags, but
   grepping through Debian codesearch shows users of FOPEN_NONSEEKABLE,
   and in particular GVFS which actually uses offset in its read and
   write handlers

	https://codesearch.debian.net/search?q=-%3Enonseekable+%3D
	https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1080
	https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1247-1346
	https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1399-1481

   so if we would do such a change it will break a real user.

5. Add stream_open and FOPEN_STREAM handling to stable kernels starting
   from v3.14+ (the kernel where 9c225f2655 first appeared).

   This will allow to patch OSSPD and other FUSE filesystems that
   provide stream-like files to return FOPEN_STREAM | FOPEN_NONSEEKABLE
   in their open handler and this way avoid the deadlock on all kernel
   versions. This should work because fs/fuse/ ignores unknown open
   flags returned from a filesystem and so passing FOPEN_STREAM to a
   kernel that is not aware of this flag cannot hurt. In turn the kernel
   that is not aware of FOPEN_STREAM will be < v3.14 where just
   FOPEN_NONSEEKABLE is sufficient to implement streams without read vs
   write deadlock.

This patch adds stream_open, converts /proc/xen/xenbus to it and adds
semantic patch to automatically locate in-kernel places that are either
required to be converted due to read vs write deadlock, or that are just
safe to be converted because read and write do not use ppos and there
are no other funky methods in file_operations.

Regarding semantic patch I've verified each generated change manually -
that it is correct to convert - and each other nonseekable_open instance
left - that it is either not correct to convert there, or that it is not
converted due to current stream_open.cocci limitations.

The script also does not convert files that should be valid to convert,
but that currently have .llseek = noop_llseek or generic_file_llseek for
unknown reason despite file being opened with nonseekable_open (e.g.
drivers/input/mousedev.c)

Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yongzhi Pan <panyongzhi@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Tejun Heo <tj@kernel.org>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: Nikolaus Rath <Nikolaus@rath.org>
Cc: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Kirill Smelkov <kirr@nexedi.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/xen/xenbus/xenbus_dev_frontend.c |   4 +-
 fs/open.c                                |  18 ++
 fs/read_write.c                          |   5 +-
 include/linux/fs.h                       |   4 +
 scripts/coccinelle/api/stream_open.cocci | 363 +++++++++++++++++++++++
 5 files changed, 389 insertions(+), 5 deletions(-)
 create mode 100644 scripts/coccinelle/api/stream_open.cocci

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index c3e201025ef01..0782ff3c22735 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -622,9 +622,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
 	if (xen_store_evtchn == 0)
 		return -ENOENT;
 
-	nonseekable_open(inode, filp);
-
-	filp->f_mode &= ~FMODE_ATOMIC_POS; /* cdev-style semantics */
+	stream_open(inode, filp);
 
 	u = kzalloc(sizeof(*u), GFP_KERNEL);
 	if (u == NULL)
diff --git a/fs/open.c b/fs/open.c
index f1c2f855fd43c..a00350018a479 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1215,3 +1215,21 @@ int nonseekable_open(struct inode *inode, struct file *filp)
 }
 
 EXPORT_SYMBOL(nonseekable_open);
+
+/*
+ * stream_open is used by subsystems that want stream-like file descriptors.
+ * Such file descriptors are not seekable and don't have notion of position
+ * (file.f_pos is always 0). Contrary to file descriptors of other regular
+ * files, .read() and .write() can run simultaneously.
+ *
+ * stream_open never fails and is marked to return int so that it could be
+ * directly used as file_operations.open .
+ */
+int stream_open(struct inode *inode, struct file *filp)
+{
+	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
+	filp->f_mode |= FMODE_STREAM;
+	return 0;
+}
+
+EXPORT_SYMBOL(stream_open);
diff --git a/fs/read_write.c b/fs/read_write.c
index 177ccc3d405a3..61b43ad7608e3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -560,12 +560,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 
 static inline loff_t file_pos_read(struct file *file)
 {
-	return file->f_pos;
+	return file->f_mode & FMODE_STREAM ? 0 : file->f_pos;
 }
 
 static inline void file_pos_write(struct file *file, loff_t pos)
 {
-	file->f_pos = pos;
+	if ((file->f_mode & FMODE_STREAM) == 0)
+		file->f_pos = pos;
 }
 
 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b42df09b04c9..dd28e76790891 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -158,6 +158,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_OPENED		((__force fmode_t)0x80000)
 #define FMODE_CREATED		((__force fmode_t)0x100000)
 
+/* File is stream-like */
+#define FMODE_STREAM		((__force fmode_t)0x200000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
@@ -3074,6 +3077,7 @@ extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
 extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
+extern int stream_open(struct inode * inode, struct file * filp);
 
 #ifdef CONFIG_BLOCK
 typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci
new file mode 100644
index 0000000000000..350145da76691
--- /dev/null
+++ b/scripts/coccinelle/api/stream_open.cocci
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+// Author: Kirill Smelkov (kirr@nexedi.com)
+//
+// Search for stream-like files that are using nonseekable_open and convert
+// them to stream_open. A stream-like file is a file that does not use ppos in
+// its read and write. Rationale for the conversion is to avoid deadlock in
+// between read and write.
+
+virtual report
+virtual patch
+virtual explain  // explain decisions in the patch (SPFLAGS="-D explain")
+
+// stream-like reader & writer - ones that do not depend on f_pos.
+@ stream_reader @
+identifier readstream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+  ssize_t readstream(struct file *f, char *buf, size_t len, loff_t *ppos)
+  {
+    ... when != ppos
+  }
+
+@ stream_writer @
+identifier writestream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+  ssize_t writestream(struct file *f, const char *buf, size_t len, loff_t *ppos)
+  {
+    ... when != ppos
+  }
+
+
+// a function that blocks
+@ blocks @
+identifier block_f;
+identifier wait_event =~ "^wait_event_.*";
+@@
+  block_f(...) {
+    ... when exists
+    wait_event(...)
+    ... when exists
+  }
+
+// stream_reader that can block inside.
+//
+// XXX wait_* can be called not directly from current function (e.g. func -> f -> g -> wait())
+// XXX currently reader_blocks supports only direct and 1-level indirect cases.
+@ reader_blocks_direct @
+identifier stream_reader.readstream;
+identifier wait_event =~ "^wait_event_.*";
+@@
+  readstream(...)
+  {
+    ... when exists
+    wait_event(...)
+    ... when exists
+  }
+
+@ reader_blocks_1 @
+identifier stream_reader.readstream;
+identifier blocks.block_f;
+@@
+  readstream(...)
+  {
+    ... when exists
+    block_f(...)
+    ... when exists
+  }
+
+@ reader_blocks depends on reader_blocks_direct || reader_blocks_1 @
+identifier stream_reader.readstream;
+@@
+  readstream(...) {
+    ...
+  }
+
+
+// file_operations + whether they have _any_ .read, .write, .llseek ... at all.
+//
+// XXX add support for file_operations xxx[N] = ...	(sound/core/pcm_native.c)
+@ fops0 @
+identifier fops;
+@@
+  struct file_operations fops = {
+    ...
+  };
+
+@ has_read @
+identifier fops0.fops;
+identifier read_f;
+@@
+  struct file_operations fops = {
+    .read = read_f,
+  };
+
+@ has_read_iter @
+identifier fops0.fops;
+identifier read_iter_f;
+@@
+  struct file_operations fops = {
+    .read_iter = read_iter_f,
+  };
+
+@ has_write @
+identifier fops0.fops;
+identifier write_f;
+@@
+  struct file_operations fops = {
+    .write = write_f,
+  };
+
+@ has_write_iter @
+identifier fops0.fops;
+identifier write_iter_f;
+@@
+  struct file_operations fops = {
+    .write_iter = write_iter_f,
+  };
+
+@ has_llseek @
+identifier fops0.fops;
+identifier llseek_f;
+@@
+  struct file_operations fops = {
+    .llseek = llseek_f,
+  };
+
+@ has_no_llseek @
+identifier fops0.fops;
+@@
+  struct file_operations fops = {
+    .llseek = no_llseek,
+  };
+
+@ has_mmap @
+identifier fops0.fops;
+identifier mmap_f;
+@@
+  struct file_operations fops = {
+    .mmap = mmap_f,
+  };
+
+@ has_copy_file_range @
+identifier fops0.fops;
+identifier copy_file_range_f;
+@@
+  struct file_operations fops = {
+    .copy_file_range = copy_file_range_f,
+  };
+
+@ has_remap_file_range @
+identifier fops0.fops;
+identifier remap_file_range_f;
+@@
+  struct file_operations fops = {
+    .remap_file_range = remap_file_range_f,
+  };
+
+@ has_splice_read @
+identifier fops0.fops;
+identifier splice_read_f;
+@@
+  struct file_operations fops = {
+    .splice_read = splice_read_f,
+  };
+
+@ has_splice_write @
+identifier fops0.fops;
+identifier splice_write_f;
+@@
+  struct file_operations fops = {
+    .splice_write = splice_write_f,
+  };
+
+
+// file_operations that is candidate for stream_open conversion - it does not
+// use mmap and other methods that assume @offset access to file.
+//
+// XXX for simplicity require no .{read/write}_iter and no .splice_{read/write} for now.
+// XXX maybe_steam.fops cannot be used in other rules - it gives "bad rule maybe_stream or bad variable fops".
+@ maybe_stream depends on (!has_llseek || has_no_llseek) && !has_mmap && !has_copy_file_range && !has_remap_file_range && !has_read_iter && !has_write_iter && !has_splice_read && !has_splice_write @
+identifier fops0.fops;
+@@
+  struct file_operations fops = {
+  };
+
+
+// ---- conversions ----
+
+// XXX .open = nonseekable_open -> .open = stream_open
+// XXX .open = func -> openfunc -> nonseekable_open
+
+// read & write
+//
+// if both are used in the same file_operations together with an opener -
+// under that conditions we can use stream_open instead of nonseekable_open.
+@ fops_rw depends on maybe_stream @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+identifier stream_writer.writestream;
+@@
+  struct file_operations fops = {
+      .open  = openfunc,
+      .read  = readstream,
+      .write = writestream,
+  };
+
+@ report_rw depends on report @
+identifier fops_rw.openfunc;
+position p1;
+@@
+  openfunc(...) {
+    <...
+     nonseekable_open@p1
+    ...>
+  }
+
+@ script:python depends on report && reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+  "ERROR: %s: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix." % (fops,))
+
+@ script:python depends on report && !reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+  "WARNING: %s: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+
+@ explain_rw_deadlocked depends on explain && reader_blocks @
+identifier fops_rw.openfunc;
+@@
+  openfunc(...) {
+    <...
+-    nonseekable_open
++    nonseekable_open /* read & write (was deadlock) */
+    ...>
+  }
+
+
+@ explain_rw_nodeadlock depends on explain && !reader_blocks @
+identifier fops_rw.openfunc;
+@@
+  openfunc(...) {
+    <...
+-    nonseekable_open
++    nonseekable_open /* read & write (no direct deadlock) */
+    ...>
+  }
+
+@ patch_rw depends on patch @
+identifier fops_rw.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   stream_open
+    ...>
+  }
+
+
+// read, but not write
+@ fops_r depends on maybe_stream && !has_write @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+@@
+  struct file_operations fops = {
+      .open  = openfunc,
+      .read  = readstream,
+  };
+
+@ report_r depends on report @
+identifier fops_r.openfunc;
+position p1;
+@@
+  openfunc(...) {
+    <...
+    nonseekable_open@p1
+    ...>
+  }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_r.p1;
+@@
+coccilib.report.print_report(p[0],
+  "WARNING: %s: .read() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_r depends on explain @
+identifier fops_r.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   nonseekable_open /* read only */
+    ...>
+  }
+
+@ patch_r depends on patch @
+identifier fops_r.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   stream_open
+    ...>
+  }
+
+
+// write, but not read
+@ fops_w depends on maybe_stream && !has_read @
+identifier fops0.fops, openfunc;
+identifier stream_writer.writestream;
+@@
+  struct file_operations fops = {
+      .open  = openfunc,
+      .write = writestream,
+  };
+
+@ report_w depends on report @
+identifier fops_w.openfunc;
+position p1;
+@@
+  openfunc(...) {
+    <...
+    nonseekable_open@p1
+    ...>
+  }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_w.p1;
+@@
+coccilib.report.print_report(p[0],
+  "WARNING: %s: .write() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_w depends on explain @
+identifier fops_w.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   nonseekable_open /* write only */
+    ...>
+  }
+
+@ patch_w depends on patch @
+identifier fops_w.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   stream_open
+    ...>
+  }
+
+
+// no read, no write - don't change anything
-- 
GitLab


From c2f8d7cb32cd95e3005bed58ce02afa686b9f357 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 18 Mar 2019 22:56:15 +0100
Subject: [PATCH 0711/1861] Revert: parisc: Use F_EXTEND() macro in iosapic
 code

Revert parts of commit 97d7e2e3fd8a ("parisc: Use F_EXTEND() macro in
iosapic code"). It breaks booting the 32-bit kernel on some machines.

Reported-by: Sven Schnelle <svens@stackframe.org>
Tested-by: Sven Schnelle <svens@stackframe.org>
Fixes: 97d7e2e3fd8a ("parisc: Use F_EXTEND() macro in iosapic code")
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parisc/iosapic.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 1be571c20062c..6bad04cbb1d37 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -157,8 +157,12 @@
 #define DBG_IRT(x...)
 #endif
 
+#ifdef CONFIG_64BIT
+#define COMPARE_IRTE_ADDR(irte, hpa)	((irte)->dest_iosapic_addr == (hpa))
+#else
 #define COMPARE_IRTE_ADDR(irte, hpa)	\
-		((irte)->dest_iosapic_addr == F_EXTEND(hpa))
+		((irte)->dest_iosapic_addr == ((hpa) | 0xffffffff00000000ULL))
+#endif
 
 #define IOSAPIC_REG_SELECT              0x00
 #define IOSAPIC_REG_WINDOW              0x10
-- 
GitLab


From 45efd871bf0a47648f119d1b41467f70484de5bc Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@stackframe.org>
Date: Thu, 4 Apr 2019 18:16:03 +0200
Subject: [PATCH 0712/1861] parisc: regs_return_value() should return gpr28

While working on kretprobes for PA-RISC I was wondering while the
kprobes sanity test always fails on kretprobes. This is caused by
returning gpr20 instead of gpr28.

Signed-off-by: Sven Schnelle <svens@stackframe.org>
Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org # 4.14+
---
 arch/parisc/include/asm/ptrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 2a27b275ab092..4a87b3d600c61 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -22,7 +22,7 @@ unsigned long profile_pc(struct pt_regs *);
 
 static inline unsigned long regs_return_value(struct pt_regs *regs)
 {
-	return regs->gr[20];
+	return regs->gr[28];
 }
 
 static inline void instruction_pointer_set(struct pt_regs *regs,
-- 
GitLab


From f324fa58327791b2696628b31480e7e21c745706 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@stackframe.org>
Date: Thu, 4 Apr 2019 18:16:04 +0200
Subject: [PATCH 0713/1861] parisc: also set iaoq_b in
 instruction_pointer_set()

When setting the instruction pointer on PA-RISC we also need
to set the back of the instruction queue to the new offset, otherwise
we will execute on instruction from the new location, and jumping
back to the old location stored in iaoq_b.

Signed-off-by: Sven Schnelle <svens@stackframe.org>
Signed-off-by: Helge Deller <deller@gmx.de>
Fixes: 75ebedf1d263 ("parisc: Add HAVE_REGS_AND_STACK_ACCESS_API feature")
Cc: stable@vger.kernel.org # 4.19+
---
 arch/parisc/include/asm/ptrace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 4a87b3d600c61..9ff033d261ab3 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -28,7 +28,8 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
 static inline void instruction_pointer_set(struct pt_regs *regs,
 						unsigned long val)
 {
-        regs->iaoq[0] = val;
+	regs->iaoq[0] = val;
+	regs->iaoq[1] = val + 4;
 }
 
 /* Query offset/name of register from its name/offset */
-- 
GitLab


From d006e95b5561f708d0385e9677ffe2c46f2ae345 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 2 Apr 2019 12:13:27 +0200
Subject: [PATCH 0714/1861] parisc: Detect QEMU earlier in boot process

While adding LASI support to QEMU, I noticed that the QEMU detection in
the kernel happens much too late. For example, when a LASI chip is found
by the kernel, it registers the LASI LED driver as well.  But when we
run on QEMU it makes sense to avoid spending unnecessary CPU cycles, so
we need to access the running_on_QEMU flag earlier than before.

This patch now makes the QEMU detection the fist task of the Linux
kernel by moving it to where the kernel enters the C-coding.

Fixes: 310d82784fb4 ("parisc: qemu idle sleep support")
Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org # v4.14+
---
 arch/parisc/kernel/process.c | 6 ------
 arch/parisc/kernel/setup.c   | 3 +++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index eb39e7e380d7e..841db71958cdb 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -210,12 +210,6 @@ void __cpuidle arch_cpu_idle(void)
 
 static int __init parisc_idle_init(void)
 {
-	const char *marker;
-
-	/* check QEMU/SeaBIOS marker in PAGE0 */
-	marker = (char *) &PAGE0->pad0;
-	running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
-
 	if (!running_on_qemu)
 		cpu_idle_poll_ctrl(1);
 
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 15dd9e21be7ea..d908058d05c10 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -397,6 +397,9 @@ void __init start_parisc(void)
 	int ret, cpunum;
 	struct pdc_coproc_cfg coproc_cfg;
 
+	/* check QEMU/SeaBIOS marker in PAGE0 */
+	running_on_qemu = (memcmp(&PAGE0->pad0, "SeaBIOS", 8) == 0);
+
 	cpunum = smp_processor_id();
 
 	init_cpu_topology();
-- 
GitLab


From d7ee81ad09f072eab1681877fc71ec05f9c1ae92 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 3 Apr 2019 10:12:48 +0300
Subject: [PATCH 0715/1861] NFC: nci: Add some bounds checking in
 nci_hci_cmd_received()

This is similar to commit 674d9de02aa7 ("NFC: Fix possible memory
corruption when handling SHDLC I-Frame commands").

I'm not totally sure, but I think that commit description may have
overstated the danger.  I was under the impression that this data came
from the firmware?  If you can't trust your networking firmware, then
you're already in trouble.

Anyway, these days we add bounds checking where ever we can and we call
it kernel hardening.  Better safe than sorry.

Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/nci/hci.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ddfc52ac1f9b4..c0d323b58e732 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
 		create_info = (struct nci_hci_create_pipe_resp *)skb->data;
 		dest_gate = create_info->dest_gate;
 		new_pipe = create_info->pipe;
+		if (new_pipe >= NCI_HCI_MAX_PIPES) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
 
 		/* Save the new created pipe and bind with local gate,
 		 * the description for skb->data[3] is destination gate id
@@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
 			goto exit;
 		}
 		delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
+		if (delete_info->pipe >= NCI_HCI_MAX_PIPES) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
 
 		ndev->hci_dev->pipes[delete_info->pipe].gate =
 						NCI_HCI_INVALID_GATE;
-- 
GitLab


From 6491d698396fd5da4941980a35ca7c162a672016 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 3 Apr 2019 10:13:51 +0300
Subject: [PATCH 0716/1861] nfc: nci: Potential off by one in ->pipes[] array

This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes")
where we changed NFC_HCI_MAX_PIPES from 127 to 128.

As the comment next to the define explains, the pipe identifier is 7
bits long.  The highest possible pipe is 127, but the number of possible
pipes is 128.  As the code is now, then there is potential for an
out of bounds array access:

    net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one?
    'ndev->hci_dev->pipes[pipe]' '0-127 == 127'

Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nfc/nci_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 87499b6b35d6d..df5c69db68afc 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -166,7 +166,7 @@ struct nci_conn_info {
  * According to specification 102 622 chapter 4.4 Pipes,
  * the pipe identifier is 7 bits long.
  */
-#define NCI_HCI_MAX_PIPES          127
+#define NCI_HCI_MAX_PIPES          128
 
 struct nci_hci_gate {
 	u8 gate;
-- 
GitLab


From 5861381d486601430cccf64849bd0a226154bc0d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 21 Mar 2019 23:18:01 +0100
Subject: [PATCH 0717/1861] PM / arch: x86: Rework the
 MSR_IA32_ENERGY_PERF_BIAS handling

The current handling of MSR_IA32_ENERGY_PERF_BIAS in the kernel is
problematic, because it may cause changes made by user space to that
MSR (with the help of the x86_energy_perf_policy tool, for example)
to be lost every time a CPU goes offline and then back online as well
as during system-wide power management transitions into sleep states
and back into the working state.

The first problem is that if the current EPB value for a CPU going
online is 0 ('performance'), the kernel will change it to 6 ('normal')
regardless of whether or not this is the first bring-up of that CPU.
That also happens during system-wide resume from sleep states
(including, but not limited to, hibernation).  However, the EPB may
have been adjusted by user space this way and the kernel should not
blindly override that setting.

The second problem is that if the platform firmware resets the EPB
values for any CPUs during system-wide resume from a sleep state,
the kernel will not restore their previous EPB values that may
have been set by user space before the preceding system-wide
suspend transition.  Again, that behavior may at least be confusing
from the user space perspective.

In order to address these issues, rework the handling of
MSR_IA32_ENERGY_PERF_BIAS so that the EPB value is saved on CPU
offline and restored on CPU online as well as (for the boot CPU)
during the syscore stages of system-wide suspend and resume
transitions, respectively.

However, retain the policy by which the EPB is set to 6 ('normal')
on the first bring-up of each CPU if its initial value is 0, based
on the observation that 0 may mean 'not initialized' just as well as
'performance' in that case.

While at it, move the MSR_IA32_ENERGY_PERF_BIAS handling code into
a separate file and document it in Documentation/admin-guide.

Fixes: abe48b108247 (x86, intel, power: Initialize MSR_IA32_ENERGY_PERF_BIAS)
Fixes: b51ef52df71c (x86/cpu: Restore MSR_IA32_ENERGY_PERF_BIAS after resume)
Reported-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/admin-guide/pm/intel_epb.rst    |   6 +
 .../admin-guide/pm/working-state.rst          |   1 +
 arch/x86/kernel/cpu/Makefile                  |   2 +-
 arch/x86/kernel/cpu/common.c                  |  17 ---
 arch/x86/kernel/cpu/cpu.h                     |   1 -
 arch/x86/kernel/cpu/intel.c                   |  34 -----
 arch/x86/kernel/cpu/intel_epb.c               | 131 ++++++++++++++++++
 include/linux/cpuhotplug.h                    |   1 +
 8 files changed, 140 insertions(+), 53 deletions(-)
 create mode 100644 Documentation/admin-guide/pm/intel_epb.rst
 create mode 100644 arch/x86/kernel/cpu/intel_epb.c

diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst
new file mode 100644
index 0000000000000..e9cfa7ec5420d
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_epb.rst
@@ -0,0 +1,6 @@
+======================================
+Intel Performance and Energy Bias Hint
+======================================
+
+.. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c
+   :doc: overview
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index b6cef9b5e961e..beb004d3632b4 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -8,3 +8,4 @@ Working-State Power Management
    cpuidle
    cpufreq
    intel_pstate
+   intel_epb
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfd24f9f76144..1796d2bdcaaa2 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -28,7 +28,7 @@ obj-y			+= cpuid-deps.o
 obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
 
-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o intel_pconfig.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o intel_pconfig.o intel_epb.o
 obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
 obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o
 obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659a..5e37dfa4d9df2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1864,23 +1864,6 @@ void cpu_init(void)
 }
 #endif
 
-static void bsp_resume(void)
-{
-	if (this_cpu->c_bsp_resume)
-		this_cpu->c_bsp_resume(&boot_cpu_data);
-}
-
-static struct syscore_ops cpu_syscore_ops = {
-	.resume		= bsp_resume,
-};
-
-static int __init init_cpu_syscore(void)
-{
-	register_syscore_ops(&cpu_syscore_ops);
-	return 0;
-}
-core_initcall(init_cpu_syscore);
-
 /*
  * The microcode loader calls this upon late microcode load to recheck features,
  * only when microcode has been updated. Caller holds microcode_mutex and CPU
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 5eb946b9a9f36..c0e2407abdd6a 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -14,7 +14,6 @@ struct cpu_dev {
 	void		(*c_init)(struct cpuinfo_x86 *);
 	void		(*c_identify)(struct cpuinfo_x86 *);
 	void		(*c_detect_tlb)(struct cpuinfo_x86 *);
-	void		(*c_bsp_resume)(struct cpuinfo_x86 *);
 	int		c_x86_vendor;
 #ifdef CONFIG_X86_32
 	/* Optional vendor specific routine to obtain the cache size. */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df58..f17c1a714779b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -596,36 +596,6 @@ detect_keyid_bits:
 	c->x86_phys_bits -= keyid_bits;
 }
 
-static void init_intel_energy_perf(struct cpuinfo_x86 *c)
-{
-	u64 epb;
-
-	/*
-	 * Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized.
-	 * (x86_energy_perf_policy(8) is available to change it at run-time.)
-	 */
-	if (!cpu_has(c, X86_FEATURE_EPB))
-		return;
-
-	rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
-	if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
-		return;
-
-	pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
-	pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
-	epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
-	wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
-}
-
-static void intel_bsp_resume(struct cpuinfo_x86 *c)
-{
-	/*
-	 * MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume,
-	 * so reinitialize it properly like during bootup:
-	 */
-	init_intel_energy_perf(c);
-}
-
 static void init_cpuid_fault(struct cpuinfo_x86 *c)
 {
 	u64 msr;
@@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c)
 	if (cpu_has(c, X86_FEATURE_TME))
 		detect_tme(c);
 
-	init_intel_energy_perf(c);
-
 	init_intel_misc_features(c);
 }
 
@@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = {
 	.c_detect_tlb	= intel_detect_tlb,
 	.c_early_init   = early_init_intel,
 	.c_init		= init_intel,
-	.c_bsp_resume	= intel_bsp_resume,
 	.c_x86_vendor	= X86_VENDOR_INTEL,
 };
 
 cpu_dev_register(intel_cpu_dev);
-
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
new file mode 100644
index 0000000000000..8d53cc88bd229
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Performance and Energy Bias Hint support.
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author:
+ *	Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kernel.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+
+/**
+ * DOC: overview
+ *
+ * The Performance and Energy Bias Hint (EPB) allows software to specify its
+ * preference with respect to the power-performance tradeoffs present in the
+ * processor.  Generally, the EPB is expected to be set by user space through
+ * the generic MSR interface (with the help of the x86_energy_perf_policy tool),
+ * but there are two reasons for the kernel to touch it.
+ *
+ * First, there are systems where the platform firmware resets the EPB during
+ * system-wide transitions from sleep states back into the working state
+ * effectively causing the previous EPB updates by user space to be lost.
+ * Thus the kernel needs to save the current EPB values for all CPUs during
+ * system-wide transitions to sleep states and restore them on the way back to
+ * the working state.  That can be achieved by saving EPB for secondary CPUs
+ * when they are taken offline during transitions into system sleep states and
+ * for the boot CPU in a syscore suspend operation, so that it can be restored
+ * for the boot CPU in a syscore resume operation and for the other CPUs when
+ * they are brought back online.  However, CPUs that are already offline when
+ * a system-wide PM transition is started are not taken offline again, but their
+ * EPB values may still be reset by the platform firmware during the transition,
+ * so in fact it is necessary to save the EPB of any CPU taken offline and to
+ * restore it when the given CPU goes back online at all times.
+ *
+ * Second, on many systems the initial EPB value coming from the platform
+ * firmware is 0 ('performance') and at least on some of them that is because
+ * the platform firmware does not initialize EPB at all with the assumption that
+ * the OS will do that anyway.  That sometimes is problematic, as it may cause
+ * the system battery to drain too fast, for example, so it is better to adjust
+ * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the
+ * kernel changes it to 6 ('normal').
+ */
+
+static DEFINE_PER_CPU(u8, saved_epb);
+
+#define EPB_MASK	0x0fULL
+#define EPB_SAVED	0x10ULL
+
+static int intel_epb_save(void)
+{
+	u64 epb;
+
+	rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+	/*
+	 * Ensure that saved_epb will always be nonzero after this write even if
+	 * the EPB value read from the MSR is 0.
+	 */
+	this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED);
+
+	return 0;
+}
+
+static void intel_epb_restore(void)
+{
+	u64 val = this_cpu_read(saved_epb);
+	u64 epb;
+
+	rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+	if (val) {
+		val &= EPB_MASK;
+	} else {
+		/*
+		 * Because intel_epb_save() has not run for the current CPU yet,
+		 * it is going online for the first time, so if its EPB value is
+		 * 0 ('performance') at this point, assume that it has not been
+		 * initialized by the platform firmware and set it to 6
+		 * ('normal').
+		 */
+		val = epb & EPB_MASK;
+		if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
+			val = ENERGY_PERF_BIAS_NORMAL;
+			pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+		}
+	}
+	wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
+}
+
+static struct syscore_ops intel_epb_syscore_ops = {
+	.suspend = intel_epb_save,
+	.resume = intel_epb_restore,
+};
+
+static int intel_epb_online(unsigned int cpu)
+{
+	intel_epb_restore();
+	return 0;
+}
+
+static int intel_epb_offline(unsigned int cpu)
+{
+	return intel_epb_save();
+}
+
+static __init int intel_epb_init(void)
+{
+	int ret;
+
+	if (!boot_cpu_has(X86_FEATURE_EPB))
+		return -ENODEV;
+
+	ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
+				"x86/intel/epb:online", intel_epb_online,
+				intel_epb_offline);
+	if (ret < 0)
+		goto err_out_online;
+
+	register_syscore_ops(&intel_epb_syscore_ops);
+	return 0;
+
+err_out_online:
+	cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
+	return ret;
+}
+subsys_initcall(intel_epb_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index e78281d07b70b..dbfdd0fadbeff 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -147,6 +147,7 @@ enum cpuhp_state {
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
 	CPUHP_AP_IRQ_AFFINITY_ONLINE,
 	CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
+	CPUHP_AP_X86_INTEL_EPB_ONLINE,
 	CPUHP_AP_PERF_ONLINE,
 	CPUHP_AP_PERF_X86_ONLINE,
 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,
-- 
GitLab


From b9c273babce791cf228fc466577f55056a699f9c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 21 Mar 2019 23:20:17 +0100
Subject: [PATCH 0718/1861] PM / arch: x86: MSR_IA32_ENERGY_PERF_BIAS sysfs
 interface

The Performance and Energy Bias Hint (EPB) is expected to be set by
user space through the generic MSR interface, but that interface is
not particularly nice and there are security concerns regarding it,
so it is not always available.

For this reason, add a sysfs interface for reading and updating the
EPB, in the form of a new attribute, energy_perf_bias, located
under /sys/devices/system/cpu/cpu#/power/ for online CPUs that
support the EPB feature.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Borislav Petkov <bp@suse.de>
---
 .../ABI/testing/sysfs-devices-system-cpu      | 18 ++++
 Documentation/admin-guide/pm/intel_epb.rst    | 27 ++++++
 arch/x86/kernel/cpu/intel_epb.c               | 93 ++++++++++++++++++-
 3 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 9605dbd4b5b59..7f4af7da3fbcd 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -518,3 +518,21 @@ Description:	Control Symetric Multi Threading (SMT)
 
 			 If control status is "forceoff" or "notsupported" writes
 			 are rejected.
+
+What:		/sys/devices/system/cpu/cpu#/power/energy_perf_bias
+Date:		March 2019
+Contact:	linux-pm@vger.kernel.org
+Description:	Intel Energy and Performance Bias Hint (EPB)
+
+		EPB for the given CPU in a sliding scale 0 - 15, where a value
+		of 0 corresponds to a hint preference for highest performance
+		and a value of 15 corresponds to the maximum energy savings.
+
+		In order to change the EPB value for the CPU, write either
+		a number in the 0 - 15 sliding scale above, or one of the
+		strings: "performance", "balance-performance", "normal",
+		"balance-power", "power" (that represent values reflected by
+		their meaning), to this attribute.
+
+		This attribute is present for all online CPUs supporting the
+		Intel EPB feature.
diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst
index e9cfa7ec5420d..d100849edfc4e 100644
--- a/Documentation/admin-guide/pm/intel_epb.rst
+++ b/Documentation/admin-guide/pm/intel_epb.rst
@@ -4,3 +4,30 @@ Intel Performance and Energy Bias Hint
 
 .. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c
    :doc: overview
+
+Intel Performance and Energy Bias Attribute in ``sysfs``
+========================================================
+
+The Intel Performance and Energy Bias Hint (EPB) value for a given (logical) CPU
+can be checked or updated through a ``sysfs`` attribute (file) under
+:file:`/sys/devices/system/cpu/cpu<N>/power/`, where the CPU number ``<N>``
+is allocated at the system initialization time:
+
+``energy_perf_bias``
+	Shows the current EPB value for the CPU in a sliding scale 0 - 15, where
+	a value of 0 corresponds to a hint preference for highest performance
+	and a value of 15 corresponds to the maximum energy savings.
+
+	In order to update the EPB value for the CPU, this attribute can be
+	written to, either with a number in the 0 - 15 sliding scale above, or
+	with one of the strings: "performance", "balance-performance", "normal",
+	"balance-power", "power" that represent values reflected by their
+	meaning.
+
+	This attribute is present for all online CPUs supporting the EPB
+	feature.
+
+Note that while the EPB interface to the processor is defined at the logical CPU
+level, the physical register backing it may be shared by multiple CPUs (for
+example, SMT siblings or cores in one package).  For this reason, updating the
+EPB value for one CPU may cause the EPB values for other CPUs to change.
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index 8d53cc88bd229..f4dd73396f288 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -9,8 +9,12 @@
  */
 
 #include <linux/cpuhotplug.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/string.h>
 #include <linux/syscore_ops.h>
+#include <linux/pm.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -20,9 +24,9 @@
  *
  * The Performance and Energy Bias Hint (EPB) allows software to specify its
  * preference with respect to the power-performance tradeoffs present in the
- * processor.  Generally, the EPB is expected to be set by user space through
- * the generic MSR interface (with the help of the x86_energy_perf_policy tool),
- * but there are two reasons for the kernel to touch it.
+ * processor.  Generally, the EPB is expected to be set by user space (directly
+ * via sysfs or with the help of the x86_energy_perf_policy tool), but there are
+ * two reasons for the kernel to update it.
  *
  * First, there are systems where the platform firmware resets the EPB during
  * system-wide transitions from sleep states back into the working state
@@ -52,6 +56,7 @@ static DEFINE_PER_CPU(u8, saved_epb);
 
 #define EPB_MASK	0x0fULL
 #define EPB_SAVED	0x10ULL
+#define MAX_EPB		EPB_MASK
 
 static int intel_epb_save(void)
 {
@@ -97,15 +102,95 @@ static struct syscore_ops intel_epb_syscore_ops = {
 	.resume = intel_epb_restore,
 };
 
+static const char * const energy_perf_strings[] = {
+	"performance",
+	"balance-performance",
+	"normal",
+	"balance-power",
+	"power"
+};
+static const u8 energ_perf_values[] = {
+	ENERGY_PERF_BIAS_PERFORMANCE,
+	ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
+	ENERGY_PERF_BIAS_NORMAL,
+	ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
+	ENERGY_PERF_BIAS_POWERSAVE
+};
+
+static ssize_t energy_perf_bias_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	unsigned int cpu = dev->id;
+	u64 epb;
+	int ret;
+
+	ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%llu\n", epb);
+}
+
+static ssize_t energy_perf_bias_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	unsigned int cpu = dev->id;
+	u64 epb, val;
+	int ret;
+
+	ret = __sysfs_match_string(energy_perf_strings,
+				   ARRAY_SIZE(energy_perf_strings), buf);
+	if (ret >= 0)
+		val = energ_perf_values[ret];
+	else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
+		return -EINVAL;
+
+	ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+	if (ret < 0)
+		return ret;
+
+	ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
+			    (epb & ~EPB_MASK) | val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(energy_perf_bias);
+
+static struct attribute *intel_epb_attrs[] = {
+	&dev_attr_energy_perf_bias.attr,
+	NULL
+};
+
+static const struct attribute_group intel_epb_attr_group = {
+	.name = power_group_name,
+	.attrs =  intel_epb_attrs
+};
+
 static int intel_epb_online(unsigned int cpu)
 {
+	struct device *cpu_dev = get_cpu_device(cpu);
+
 	intel_epb_restore();
+	if (!cpuhp_tasks_frozen)
+		sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group);
+
 	return 0;
 }
 
 static int intel_epb_offline(unsigned int cpu)
 {
-	return intel_epb_save();
+	struct device *cpu_dev = get_cpu_device(cpu);
+
+	if (!cpuhp_tasks_frozen)
+		sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group);
+
+	intel_epb_save();
+	return 0;
 }
 
 static __init int intel_epb_init(void)
-- 
GitLab


From dd9a994fc68d196a052b73747e3366c57d14a09e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 4 Apr 2019 12:20:05 +0000
Subject: [PATCH 0719/1861] powerpc/vdso32: fix CLOCK_MONOTONIC on PPC64

Commit b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC
inconsistencies across Y2038") changed the type of wtom_clock_sec
to s64 on PPC64. Therefore, VDSO32 needs to read it with a 4 bytes
shift in order to retrieve the lower part of it.

Fixes: b5b4453e7912 ("powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038")
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/vdso32/gettimeofday.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 1e0bc5955a400..afd516b572f86 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -98,7 +98,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	 * can be used, r7 contains NSEC_PER_SEC.
 	 */
 
-	lwz	r5,WTOM_CLOCK_SEC(r9)
+	lwz	r5,(WTOM_CLOCK_SEC+LOPART)(r9)
 	lwz	r6,WTOM_CLOCK_NSEC(r9)
 
 	/* We now have our offset in r5,r6. We create a fake dependency
-- 
GitLab


From 9dc6488e84b0f64df17672271664752488cd6a25 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Thu, 4 Apr 2019 10:58:01 +0800
Subject: [PATCH 0720/1861] libnvdimm/pmem: fix a possible OOB access when read
 and write pmem

If offset is not zero and length is bigger than PAGE_SIZE,
this will cause to out of boundary access to a page memory

Fixes: 98cc093cba1e ("block, THP: make block_device_operations.rw_page support THP")
Co-developed-by: Liang ZhiCheng <liangzhicheng@baidu.com>
Signed-off-by: Liang ZhiCheng <liangzhicheng@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index bc2f700feef8a..0279eb1da3ef5 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -113,13 +113,13 @@ static void write_pmem(void *pmem_addr, struct page *page,
 
 	while (len) {
 		mem = kmap_atomic(page);
-		chunk = min_t(unsigned int, len, PAGE_SIZE);
+		chunk = min_t(unsigned int, len, PAGE_SIZE - off);
 		memcpy_flushcache(pmem_addr, mem + off, chunk);
 		kunmap_atomic(mem);
 		len -= chunk;
 		off = 0;
 		page++;
-		pmem_addr += PAGE_SIZE;
+		pmem_addr += chunk;
 	}
 }
 
@@ -132,7 +132,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 
 	while (len) {
 		mem = kmap_atomic(page);
-		chunk = min_t(unsigned int, len, PAGE_SIZE);
+		chunk = min_t(unsigned int, len, PAGE_SIZE - off);
 		rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
 		kunmap_atomic(mem);
 		if (rem)
@@ -140,7 +140,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 		len -= chunk;
 		off = 0;
 		page++;
-		pmem_addr += PAGE_SIZE;
+		pmem_addr += chunk;
 	}
 	return BLK_STS_OK;
 }
-- 
GitLab


From ac0722f23ff5bc1b15e268564a4d56d35cd4a1b5 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 18 Mar 2019 11:05:21 +0100
Subject: [PATCH 0721/1861] dt-bindings: cpu: Fix JSON schema

Commit fd73403a4862 ("dt-bindings: arm: Add SMP enable-method for
Milbeaut") added support for a new cpu enable-method, but did so using
tabulations to ident. This is however invalid in the syntax, and resulted
in a failure when trying to use that schemas for validation.

Use spaces instead of tabs to indent to fix this.

Fixes: fd73403a4862 ("dt-bindings: arm: Add SMP enable-method for Milbeaut")
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Sugaya Taichi <sugaya.taichi@socionext.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 Documentation/devicetree/bindings/arm/cpus.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 365dcf384d739..82dd7582e9454 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -228,7 +228,7 @@ patternProperties:
                 - renesas,r9a06g032-smp
                 - rockchip,rk3036-smp
                 - rockchip,rk3066-smp
-		- socionext,milbeaut-m10v-smp
+                - socionext,milbeaut-m10v-smp
                 - ste,dbx500-smp
 
       cpu-release-addr:
-- 
GitLab


From fbe8758f931ff5468aaeb4b304fc3edb70c908d6 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Sun, 7 Apr 2019 15:18:41 -0700
Subject: [PATCH 0722/1861] Revert "ARM: dts: nomadik: Fix polarity of SPI CS"

This reverts commit fa9463564e77067df81b0b8dec91adbbbc47bfb4.

Per Linus Walleij:

Dear ARM SoC maintainers,

can you please revert this patch. It was the wrong solution to the
wrong problem, and I must have acted in stress. Andrey fixed the
real bug in a proper way in these commits:

commit e5545c94e43b8f6599ffc01df8d1aedf18ee912a
"gpio: of: Check propname before applying "cs-gpios" quirks"
commit 7ce40277bf848391705011ba37eac2e377cbd9e6
"gpio: of: Check for "spi-cs-high" in child instead of parent node"

Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/ste-nomadik-nhk15.dts | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
index f2f6558a00f18..04066f9cb8a31 100644
--- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts
+++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts
@@ -213,13 +213,12 @@
 		gpio-sck = <&gpio0 5 GPIO_ACTIVE_HIGH>;
 		gpio-mosi = <&gpio0 4 GPIO_ACTIVE_HIGH>;
 		/*
-		 * This chipselect is active high. Just setting the flags
-		 * to GPIO_ACTIVE_HIGH is not enough for the SPI DT bindings,
-		 * it will be ignored, only the special "spi-cs-high" flag
-		 * really counts.
+		 * It's not actually active high, but the frameworks assume
+		 * the polarity of the passed-in GPIO is "normal" (active
+		 * high) then actively drives the line low to select the
+		 * chip.
 		 */
 		cs-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
-		spi-cs-high;
 		num-chipselects = <1>;
 
 		/*
-- 
GitLab


From cd92d74d67c811dc22544430b9ac3029f5bd64c5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Mar 2019 16:50:42 +0100
Subject: [PATCH 0723/1861] ARM: orion: don't use using 64-bit DMA masks

clang warns about statically defined DMA masks from the DMA_BIT_MASK
macro with length 64:

arch/arm/plat-orion/common.c:625:29: error: shift count >= width of type [-Werror,-Wshift-count-overflow]
                .coherent_dma_mask      = DMA_BIT_MASK(64),
                                          ^~~~~~~~~~~~~~~~
include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK'
 #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))

The ones in orion shouldn't really be 64 bit masks, so changing them
to what the driver can support avoids the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/plat-orion/common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index a6c81ce00f520..8647cb80a93bd 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -622,7 +622,7 @@ static struct platform_device orion_xor0_shared = {
 	.resource	= orion_xor0_shared_resources,
 	.dev            = {
 		.dma_mask               = &orion_xor_dmamask,
-		.coherent_dma_mask      = DMA_BIT_MASK(64),
+		.coherent_dma_mask      = DMA_BIT_MASK(32),
 		.platform_data          = &orion_xor0_pdata,
 	},
 };
@@ -683,7 +683,7 @@ static struct platform_device orion_xor1_shared = {
 	.resource	= orion_xor1_shared_resources,
 	.dev            = {
 		.dma_mask               = &orion_xor_dmamask,
-		.coherent_dma_mask      = DMA_BIT_MASK(64),
+		.coherent_dma_mask      = DMA_BIT_MASK(32),
 		.platform_data          = &orion_xor1_pdata,
 	},
 };
-- 
GitLab


From 2125801ccce19249708ca3245d48998e70569ab8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Mar 2019 16:50:43 +0100
Subject: [PATCH 0724/1861] ARM: iop: don't use using 64-bit DMA masks

clang warns about statically defined DMA masks from the DMA_BIT_MASK
macro with length 64:

 arch/arm/mach-iop13xx/setup.c:303:35: error: shift count >= width of type [-Werror,-Wshift-count-overflow]
 static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64);
                                  ^~~~~~~~~~~~~~~~
 include/linux/dma-mapping.h:141:54: note: expanded from macro 'DMA_BIT_MASK'
 #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
                                                      ^ ~~~

The ones in iop shouldn't really be 64 bit masks, so changing them
to what the driver can support avoids the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-iop13xx/setup.c |  8 ++++----
 arch/arm/mach-iop13xx/tpmi.c  | 10 +++++-----
 arch/arm/plat-iop/adma.c      |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 53c316f7301e6..fe4932fda01d7 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -300,7 +300,7 @@ static struct resource iop13xx_adma_2_resources[] = {
 	}
 };
 
-static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64);
+static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(32);
 static struct iop_adma_platform_data iop13xx_adma_0_data = {
 	.hw_id = 0,
 	.pool_size = PAGE_SIZE,
@@ -324,7 +324,7 @@ static struct platform_device iop13xx_adma_0_channel = {
 	.resource = iop13xx_adma_0_resources,
 	.dev = {
 		.dma_mask = &iop13xx_adma_dmamask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 		.platform_data = (void *) &iop13xx_adma_0_data,
 	},
 };
@@ -336,7 +336,7 @@ static struct platform_device iop13xx_adma_1_channel = {
 	.resource = iop13xx_adma_1_resources,
 	.dev = {
 		.dma_mask = &iop13xx_adma_dmamask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 		.platform_data = (void *) &iop13xx_adma_1_data,
 	},
 };
@@ -348,7 +348,7 @@ static struct platform_device iop13xx_adma_2_channel = {
 	.resource = iop13xx_adma_2_resources,
 	.dev = {
 		.dma_mask = &iop13xx_adma_dmamask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 		.platform_data = (void *) &iop13xx_adma_2_data,
 	},
 };
diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c
index db511ec2b1df6..116feb6b261eb 100644
--- a/arch/arm/mach-iop13xx/tpmi.c
+++ b/arch/arm/mach-iop13xx/tpmi.c
@@ -152,7 +152,7 @@ static struct resource iop13xx_tpmi_3_resources[] = {
 	}
 };
 
-u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64);
+u64 iop13xx_tpmi_mask = DMA_BIT_MASK(32);
 static struct platform_device iop13xx_tpmi_0_device = {
 	.name = "iop-tpmi",
 	.id = 0,
@@ -160,7 +160,7 @@ static struct platform_device iop13xx_tpmi_0_device = {
 	.resource = iop13xx_tpmi_0_resources,
 	.dev = {
 		.dma_mask          = &iop13xx_tpmi_mask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
@@ -171,7 +171,7 @@ static struct platform_device iop13xx_tpmi_1_device = {
 	.resource = iop13xx_tpmi_1_resources,
 	.dev = {
 		.dma_mask          = &iop13xx_tpmi_mask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
@@ -182,7 +182,7 @@ static struct platform_device iop13xx_tpmi_2_device = {
 	.resource = iop13xx_tpmi_2_resources,
 	.dev = {
 		.dma_mask          = &iop13xx_tpmi_mask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
@@ -193,7 +193,7 @@ static struct platform_device iop13xx_tpmi_3_device = {
 	.resource = iop13xx_tpmi_3_resources,
 	.dev = {
 		.dma_mask          = &iop13xx_tpmi_mask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
 };
 
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index a4d1f8de3b5b2..d9612221e4848 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -143,7 +143,7 @@ struct platform_device iop3xx_dma_0_channel = {
 	.resource = iop3xx_dma_0_resources,
 	.dev = {
 		.dma_mask = &iop3xx_adma_dmamask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 		.platform_data = (void *) &iop3xx_dma_0_data,
 	},
 };
@@ -155,7 +155,7 @@ struct platform_device iop3xx_dma_1_channel = {
 	.resource = iop3xx_dma_1_resources,
 	.dev = {
 		.dma_mask = &iop3xx_adma_dmamask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 		.platform_data = (void *) &iop3xx_dma_1_data,
 	},
 };
@@ -167,7 +167,7 @@ struct platform_device iop3xx_aau_channel = {
 	.resource = iop3xx_aau_resources,
 	.dev = {
 		.dma_mask = &iop3xx_adma_dmamask,
-		.coherent_dma_mask = DMA_BIT_MASK(64),
+		.coherent_dma_mask = DMA_BIT_MASK(32),
 		.platform_data = (void *) &iop3xx_aau_data,
 	},
 };
-- 
GitLab


From 9a8f32038a74cb800e9649afbf4b3dba2b7d6539 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 Mar 2019 22:19:16 +0100
Subject: [PATCH 0725/1861] ARM: milbeaut: fix build with !CONFIG_HOTPLUG_CPU

When HOTPLUG_CPU is disabled, some fields in the smp operations
are not available or needed:

arch/arm/mach-milbeaut/platsmp.c:90:3: error: field designator 'cpu_die' does not refer to any field in type
      'struct smp_operations'
        .cpu_die                = m10v_cpu_die,
         ^
arch/arm/mach-milbeaut/platsmp.c:91:3: error: field designator 'cpu_kill' does not refer to any field in type
      'struct smp_operations'
        .cpu_kill               = m10v_cpu_kill,
         ^

Hide them in an #ifdef like the other platforms do.

Fixes: 9fb29c734f9e ("ARM: milbeaut: Add basic support for Milbeaut m10v SoC")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-milbeaut/platsmp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/mach-milbeaut/platsmp.c b/arch/arm/mach-milbeaut/platsmp.c
index 591543c81399b..3ea880f5fcb73 100644
--- a/arch/arm/mach-milbeaut/platsmp.c
+++ b/arch/arm/mach-milbeaut/platsmp.c
@@ -65,6 +65,7 @@ static void m10v_smp_init(unsigned int max_cpus)
 		writel(KERNEL_UNBOOT_FLAG, m10v_smp_base + cpu * 4);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 static void m10v_cpu_die(unsigned int l_cpu)
 {
 	gic_cpu_if_down(0);
@@ -83,12 +84,15 @@ static int m10v_cpu_kill(unsigned int l_cpu)
 
 	return 1;
 }
+#endif
 
 static struct smp_operations m10v_smp_ops __initdata = {
 	.smp_prepare_cpus	= m10v_smp_init,
 	.smp_boot_secondary	= m10v_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
 	.cpu_die		= m10v_cpu_die,
 	.cpu_kill		= m10v_cpu_kill,
+#endif
 };
 CPU_METHOD_OF_DECLARE(m10v_smp, "socionext,milbeaut-m10v-smp", &m10v_smp_ops);
 
-- 
GitLab


From 15ade5d2e7775667cf191cf2f94327a4889f8b9d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 7 Apr 2019 14:09:59 -1000
Subject: [PATCH 0726/1861] Linux 5.1-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 026fbc450906a..15c8251d4d5ed 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
-- 
GitLab


From b959ecf8f953701a19970e5db7e427c05143f303 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 5 Apr 2019 14:20:24 +0200
Subject: [PATCH 0727/1861] selftests: add a tc matchall test case

This is a follow up of the commit 0db6f8befc32 ("net/sched: fix ->get
helper of the matchall cls").

To test it:
$ cd tools/testing/selftests/tc-testing
$ ln -s ../plugin-lib/nsPlugin.py plugins/20-nsPlugin.py
$ ./tdc.py -n -e 2638

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/filters/tests.json    | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 99a5ffca1088a..2d096b2abf2c9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -18,6 +18,26 @@
             "$TC qdisc del dev $DEV1 ingress"
         ]
     },
+    {
+        "id": "2638",
+        "name": "Add matchall and try to get it",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 clsact",
+            "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
+        ],
+        "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 ingress",
+        "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 clsact"
+        ]
+    },
     {
         "id": "d052",
         "name": "Add 1M filters with the same action",
-- 
GitLab


From 196a66275520ffc27513c56ecc06a2d9450fd12f Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 4 Apr 2019 08:14:25 +0100
Subject: [PATCH 0728/1861] drm/i915/gvt: Annotate iomem usage

Fix the sparse warning for blithely using iomem with normal memcpy:

drivers/gpu/drm/i915/gvt/kvmgt.c:916:21: warning: incorrect type in assignment (different address spaces)
drivers/gpu/drm/i915/gvt/kvmgt.c:916:21:    expected void *aperture_va
drivers/gpu/drm/i915/gvt/kvmgt.c:916:21:    got void [noderef] <asn:2> *
drivers/gpu/drm/i915/gvt/kvmgt.c:927:26: warning: incorrect type in argument 1 (different address spaces)
drivers/gpu/drm/i915/gvt/kvmgt.c:927:26:    expected void [noderef] <asn:2> *vaddr
drivers/gpu/drm/i915/gvt/kvmgt.c:927:26:    got void *aperture_va

Fixes: d480b28a41a6 ("drm/i915/gvt: Fix aperture read/write emulation when enable x-no-mmap=on")
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Changbin Du <changbin.du@intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index d5fcc447d22f0..a68addf95c230 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -905,7 +905,7 @@ static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
 static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
 		void *buf, unsigned long count, bool is_write)
 {
-	void *aperture_va;
+	void __iomem *aperture_va;
 
 	if (!intel_vgpu_in_aperture(vgpu, off) ||
 	    !intel_vgpu_in_aperture(vgpu, off + count)) {
@@ -920,9 +920,9 @@ static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
 		return -EIO;
 
 	if (is_write)
-		memcpy(aperture_va + offset_in_page(off), buf, count);
+		memcpy_toio(aperture_va + offset_in_page(off), buf, count);
 	else
-		memcpy(buf, aperture_va + offset_in_page(off), count);
+		memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
 
 	io_mapping_unmap(aperture_va);
 
-- 
GitLab


From 968a85b19d0a79dd8ed85f39e23eacd34b503e72 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 4 Apr 2019 08:30:56 +0100
Subject: [PATCH 0729/1861] drm/i915/gvt: Prevent use-after-free in
 ppgtt_free_all_spt()

ppgtt_free_all_spt() iterates the radixtree as it is deleting it,
forgoing all protection against the leaves being freed in the process
(leaving the iter pointing into the void).

A minimal fix seems to be to use the available post_shadow_list to
decompose the tree into a list prior to destroying the radixtree.

Alerted by the sparse warnings:

drivers/gpu/drm/i915/gvt/gtt.c:757:9: warning: incorrect type in assignment (different address spaces)
drivers/gpu/drm/i915/gvt/gtt.c:757:9:    expected void **slot
drivers/gpu/drm/i915/gvt/gtt.c:757:9:    got void [noderef] <asn:4> **
drivers/gpu/drm/i915/gvt/gtt.c:757:9: warning: incorrect type in assignment (different address spaces)
drivers/gpu/drm/i915/gvt/gtt.c:757:9:    expected void **slot
drivers/gpu/drm/i915/gvt/gtt.c:757:9:    got void [noderef] <asn:4> **
drivers/gpu/drm/i915/gvt/gtt.c:758:45: warning: incorrect type in argument 1 (different address spaces)
drivers/gpu/drm/i915/gvt/gtt.c:758:45:    expected void [noderef] <asn:4> **slot
drivers/gpu/drm/i915/gvt/gtt.c:758:45:    got void **slot
drivers/gpu/drm/i915/gvt/gtt.c:757:9: warning: incorrect type in argument 1 (different address spaces)
drivers/gpu/drm/i915/gvt/gtt.c:757:9:    expected void [noderef] <asn:4> **slot
drivers/gpu/drm/i915/gvt/gtt.c:757:9:    got void **slot
drivers/gpu/drm/i915/gvt/gtt.c:757:9: warning: incorrect type in assignment (different address spaces)
drivers/gpu/drm/i915/gvt/gtt.c:757:9:    expected void **slot
drivers/gpu/drm/i915/gvt/gtt.c:757:9:    got void [noderef] <asn:4> **

This would also have been loudly warning if run through CI for the
invalid RCU dereferences.

Fixes: b6c126a39345 ("drm/i915/gvt: Manage shadow pages with radix tree")
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Changbin Du <changbin.du@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index cf133ef038735..9814773882ec2 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -750,14 +750,20 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
 
 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
 {
-	struct intel_vgpu_ppgtt_spt *spt;
+	struct intel_vgpu_ppgtt_spt *spt, *spn;
 	struct radix_tree_iter iter;
-	void **slot;
+	LIST_HEAD(all_spt);
+	void __rcu **slot;
 
+	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
 		spt = radix_tree_deref_slot(slot);
-		ppgtt_free_spt(spt);
+		list_move(&spt->post_shadow_list, &all_spt);
 	}
+	rcu_read_unlock();
+
+	list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
+		ppgtt_free_spt(spt);
 }
 
 static int ppgtt_handle_guest_write_page_table_bytes(
-- 
GitLab


From fcf88917dd435c6a4cb2830cb086ee58605a1d85 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Sat, 6 Apr 2019 18:59:01 -0400
Subject: [PATCH 0730/1861] slab: fix a crash by reading /proc/slab_allocators

The commit 510ded33e075 ("slab: implement slab_root_caches list")
changes the name of the list node within "struct kmem_cache" from "list"
to "root_caches_node", but leaks_show() still use the "list" which
causes a crash when reading /proc/slab_allocators.

You need to have CONFIG_SLAB=y and CONFIG_MEMCG=y to see the problem,
because without MEMCG all slab caches are root caches, and the "list"
node happens to be the right one.

Fixes: 510ded33e075 ("slab: implement slab_root_caches list")
Signed-off-by: Qian Cai <cai@lca.pw>
Reviewed-by: Tobin C. Harding <tobin@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/slab.c b/mm/slab.c
index 329bfe67f2cae..47a380a486eef 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4308,7 +4308,8 @@ static void show_symbol(struct seq_file *m, unsigned long address)
 
 static int leaks_show(struct seq_file *m, void *p)
 {
-	struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
+	struct kmem_cache *cachep = list_entry(p, struct kmem_cache,
+					       root_caches_node);
 	struct page *page;
 	struct kmem_cache_node *n;
 	const char *name;
-- 
GitLab


From 5055376a3b44c4021de8830c9157f086a97731df Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 8 Apr 2019 10:04:20 +0800
Subject: [PATCH 0731/1861] net: vrf: Fix ping failed when vrf mtu is set to 0

When the mtu of a vrf device is set to 0, it would cause ping
failed. So I think we should limit vrf mtu in a reasonable range
to solve this problem. I set dev->min_mtu to IPV6_MIN_MTU, so it
will works for both ipv4 and ipv6. And if dev->max_mtu still be 0
can be confusing, so I set dev->max_mtu to ETH_MAX_MTU.

Here is the reproduce step:

1.Config vrf interface and set mtu to 0:
3: enp4s0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel
master vrf1 state UP mode DEFAULT group default qlen 1000
    link/ether 52:54:00:9e:dd:c1 brd ff:ff:ff:ff:ff:ff

2.Ping peer:
3: enp4s0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel
master vrf1 state UP group default qlen 1000
    link/ether 52:54:00:9e:dd:c1 brd ff:ff:ff:ff:ff:ff
    inet 10.0.0.1/16 scope global enp4s0
       valid_lft forever preferred_lft forever
connect: Network is unreachable

3.Set mtu to default value, ping works:
PING 10.0.0.2 (10.0.0.2) 56(84) bytes of data.
64 bytes from 10.0.0.2: icmp_seq=1 ttl=64 time=1.88 ms

Fixes: ad49bc6361ca2 ("net: vrf: remove MTU limits for vrf device")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 6d1a1abbed27e..cd15c32b2e436 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1275,8 +1275,12 @@ static void vrf_setup(struct net_device *dev)
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->priv_flags |= IFF_NO_RX_HANDLER;
 
-	dev->min_mtu = 0;
-	dev->max_mtu = 0;
+	/* VRF devices do not care about MTU, but if the MTU is set
+	 * too low then the ipv4 and ipv6 protocols are disabled
+	 * which breaks networking.
+	 */
+	dev->min_mtu = IPV6_MIN_MTU;
+	dev->max_mtu = ETH_MAX_MTU;
 }
 
 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
-- 
GitLab


From 9b39b013037fbfa8d4b999345d9e904d8a336fc2 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 5 Apr 2019 13:17:13 +1000
Subject: [PATCH 0732/1861] drm/udl: add a release method and delay modeset
 teardown

If we unplug a udl device, the usb callback with deinit the
mode_config struct, however userspace will still have an open
file descriptor and a framebuffer on that device. When userspace
closes the fd, we'll oops because it'll try and look stuff up
in the object idr which we've destroyed.

This punts destroying the mode objects until release time instead.

Cc: stable@vger.kernel.org
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190405031715.5959-2-airlied@gmail.com
---
 drivers/gpu/drm/udl/udl_drv.c  | 1 +
 drivers/gpu/drm/udl/udl_drv.h  | 1 +
 drivers/gpu/drm/udl/udl_main.c | 8 +++++++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index 22cd2d13e272f..ff47f890e6ad8 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -52,6 +52,7 @@ static struct drm_driver driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
 	.load = udl_driver_load,
 	.unload = udl_driver_unload,
+	.release = udl_driver_release,
 
 	/* gem hooks */
 	.gem_free_object_unlocked = udl_gem_free_object,
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index e9e9b1ff678ee..4ae67d882eae9 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -104,6 +104,7 @@ void udl_urb_completion(struct urb *urb);
 
 int udl_driver_load(struct drm_device *dev, unsigned long flags);
 void udl_driver_unload(struct drm_device *dev);
+void udl_driver_release(struct drm_device *dev);
 
 int udl_fbdev_init(struct drm_device *dev);
 void udl_fbdev_cleanup(struct drm_device *dev);
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index 9086d0d1b880d..1f8ef34ade243 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -379,6 +379,12 @@ void udl_driver_unload(struct drm_device *dev)
 		udl_free_urb_list(dev);
 
 	udl_fbdev_cleanup(dev);
-	udl_modeset_cleanup(dev);
 	kfree(udl);
 }
+
+void udl_driver_release(struct drm_device *dev)
+{
+	udl_modeset_cleanup(dev);
+	drm_dev_fini(dev);
+	kfree(dev);
+}
-- 
GitLab


From 54f8844e3f6cf898450a6c85f70fa997f0aa72b9 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Thu, 4 Apr 2019 19:48:33 -0700
Subject: [PATCH 0733/1861] ASoC: topology: Use the correct dobj to free enum
 control values and texts

The control values and texts of the enum kcontrol associated
with a widget need to be freed when the widget is removed.
However, both struct snd_soc_dapm_widget and struct soc_enum
contain a dobj member, which resulted in a confusion.
The existing code generates a null pointer dereference by
attempting to free the values and texts from the dobj which
belongs to the widget instead of the dobj belonging to the
enum kcontrol.

The suggested fix is to use the correct dobj member (se->dobj)
of the enum kcontrol.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-topology.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 25fca7055464a..96852d2506193 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -482,10 +482,11 @@ static void remove_widget(struct snd_soc_component *comp,
 
 			snd_ctl_remove(card, kcontrol);
 
-			kfree(dobj->control.dvalues);
+			/* free enum kcontrol's dvalues and dtexts */
+			kfree(se->dobj.control.dvalues);
 			for (j = 0; j < se->items; j++)
-				kfree(dobj->control.dtexts[j]);
-			kfree(dobj->control.dtexts);
+				kfree(se->dobj.control.dtexts[j]);
+			kfree(se->dobj.control.dtexts);
 
 			kfree(se);
 			kfree(w->kcontrol_news[i].name);
-- 
GitLab


From 17d3069ccf06970e2db3f7cbf4335f207524279e Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Date: Fri, 5 Apr 2019 11:19:11 +0200
Subject: [PATCH 0734/1861] ASoC: stm32: fix sai driver name initialisation

This patch fixes the sai driver structure overwriting which results in
a cpu dai name equal NULL.

Fixes: 3e086ed ("ASoC: stm32: add SAI driver")

Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai_sub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index 55d802f51c155..83d8a7ac56f42 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -1419,7 +1419,6 @@ static int stm32_sai_sub_dais_init(struct platform_device *pdev,
 	if (!sai->cpu_dai_drv)
 		return -ENOMEM;
 
-	sai->cpu_dai_drv->name = dev_name(&pdev->dev);
 	if (STM_SAI_IS_PLAYBACK(sai)) {
 		memcpy(sai->cpu_dai_drv, &stm32_sai_playback_dai,
 		       sizeof(stm32_sai_playback_dai));
@@ -1429,6 +1428,7 @@ static int stm32_sai_sub_dais_init(struct platform_device *pdev,
 		       sizeof(stm32_sai_capture_dai));
 		sai->cpu_dai_drv->capture.stream_name = sai->cpu_dai_drv->name;
 	}
+	sai->cpu_dai_drv->name = dev_name(&pdev->dev);
 
 	return 0;
 }
-- 
GitLab


From 678cce4019d746da6c680c48ba9e6d417803e127 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 31 Mar 2019 13:04:11 -0700
Subject: [PATCH 0735/1861] crypto: x86/poly1305 - fix overflow during partial
 reduction

The x86_64 implementation of Poly1305 produces the wrong result on some
inputs because poly1305_4block_avx2() incorrectly assumes that when
partially reducing the accumulator, the bits carried from limb 'd4' to
limb 'h0' fit in a 32-bit integer.  This is true for poly1305-generic
which processes only one block at a time.  However, it's not true for
the AVX2 implementation, which processes 4 blocks at a time and
therefore can produce intermediate limbs about 4x larger.

Fix it by making the relevant calculations use 64-bit arithmetic rather
than 32-bit.  Note that most of the carries already used 64-bit
arithmetic, but the d4 -> h0 carry was different for some reason.

To be safe I also made the same change to the corresponding SSE2 code,
though that only operates on 1 or 2 blocks at a time.  I don't think
it's really needed for poly1305_block_sse2(), but it doesn't hurt
because it's already x86_64 code.  It *might* be needed for
poly1305_2block_sse2(), but overflows aren't easy to reproduce there.

This bug was originally detected by my patches that improve testmgr to
fuzz algorithms against their generic implementation.  But also add a
test vector which reproduces it directly (in the AVX2 case).

Fixes: b1ccc8f4b631 ("crypto: poly1305 - Add a four block AVX2 variant for x86_64")
Fixes: c70f4abef07a ("crypto: poly1305 - Add a SSE2 SIMD variant for x86_64")
Cc: <stable@vger.kernel.org> # v4.3+
Cc: Martin Willi <martin@strongswan.org>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/poly1305-avx2-x86_64.S | 14 +++++---
 arch/x86/crypto/poly1305-sse2-x86_64.S | 22 ++++++++-----
 crypto/testmgr.h                       | 44 +++++++++++++++++++++++++-
 3 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
index 3b6e70d085da8..8457cdd47f751 100644
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2)
 	vpaddq		t2,t1,t1
 	vmovq		t1x,d4
 
+	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
+	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+	# integers.  It's true in a single-block implementation, but not here.
+
 	# d1 += d0 >> 26
 	mov		d0,%rax
 	shr		$26,%rax
@@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2)
 	# h0 += (d4 >> 26) * 5
 	mov		d4,%rax
 	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
+	lea		(%rax,%rax,4),%rax
+	add		%rax,%rbx
 	# h4 = d4 & 0x3ffffff
 	mov		d4,%rax
 	and		$0x3ffffff,%eax
 	mov		%eax,h4
 
 	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
+	mov		%rbx,%rax
+	shr		$26,%rax
 	add		%eax,h1
 	# h0 = h0 & 0x3ffffff
 	andl		$0x3ffffff,%ebx
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index e6add74d78a59..6f0be7a869641 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2)
 	# h0 += (d4 >> 26) * 5
 	mov		d4,%rax
 	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
+	lea		(%rax,%rax,4),%rax
+	add		%rax,%rbx
 	# h4 = d4 & 0x3ffffff
 	mov		d4,%rax
 	and		$0x3ffffff,%eax
 	mov		%eax,h4
 
 	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
+	mov		%rbx,%rax
+	shr		$26,%rax
 	add		%eax,h1
 	# h0 = h0 & 0x3ffffff
 	andl		$0x3ffffff,%ebx
@@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2)
 	paddq		t2,t1
 	movq		t1,d4
 
+	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
+	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+	# integers.  It's true in a single-block implementation, but not here.
+
 	# d1 += d0 >> 26
 	mov		d0,%rax
 	shr		$26,%rax
@@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2)
 	# h0 += (d4 >> 26) * 5
 	mov		d4,%rax
 	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
+	lea		(%rax,%rax,4),%rax
+	add		%rax,%rbx
 	# h4 = d4 & 0x3ffffff
 	mov		d4,%rax
 	and		$0x3ffffff,%eax
 	mov		%eax,h4
 
 	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
+	mov		%rbx,%rax
+	shr		$26,%rax
 	add		%eax,h1
 	# h0 = h0 & 0x3ffffff
 	andl		$0x3ffffff,%ebx
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index f267633cf13ac..d18a37629f053 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -5634,7 +5634,49 @@ static const struct hash_testvec poly1305_tv_template[] = {
 		.psize		= 80,
 		.digest		= "\x13\x00\x00\x00\x00\x00\x00\x00"
 				  "\x00\x00\x00\x00\x00\x00\x00\x00",
-	},
+	}, { /* Regression test for overflow in AVX2 implementation */
+		.plaintext	= "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff",
+		.psize		= 300,
+		.digest		= "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8"
+				  "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1",
+	}
 };
 
 /* NHPoly1305 test vectors from https://github.com/google/adiantum */
-- 
GitLab


From b4ed6b51f356224c6c71540ed94087f7f09b84af Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Fri, 5 Apr 2019 09:57:08 -0700
Subject: [PATCH 0736/1861] ASoC: core: conditionally increase module refcount
 on component open

Recently, for Intel platforms the "ignore_module_refcount" field
was introduced for the component driver. In order to avoid a
deadlock preventing the PCI modules from being removed
even when the card was idle, the refcounts were not incremented
for the device driver module during component probe.

However, this change introduced a nasty side effect:
the device driver module can be unloaded while a pcm stream is open.

This patch proposes to change the field to be renamed as
"module_get_upon_open". When this field is set, the module
refcount should be incremented on pcm open amd decremented
upon pcm close. This will enable modules to be removed
when no PCM playback/capture happens and prevent removal
when the component is actually in use.

Also, align with the skylake component driver with the new name.

Fixes: b450b878('ASoC: core: don't increase component module refcount
                 unconditionally'
Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h               | 9 +++++++--
 sound/soc/intel/skylake/skl-pcm.c | 2 +-
 sound/soc/soc-core.c              | 4 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 1e2be35ed36f2..482b4ea87c3c4 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -802,8 +802,13 @@ struct snd_soc_component_driver {
 	int probe_order;
 	int remove_order;
 
-	/* signal if the module handling the component cannot be removed */
-	unsigned int ignore_module_refcount:1;
+	/*
+	 * signal if the module handling the component should not be removed
+	 * if a pcm is open. Setting this would prevent the module
+	 * refcount being incremented in probe() but allow it be incremented
+	 * when a pcm is opened and decremented when it is closed.
+	 */
+	unsigned int module_get_upon_open:1;
 
 	/* bits */
 	unsigned int idle_bias_on:1;
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index 57031b6d4d45e..9735e24122514 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -1475,7 +1475,7 @@ static const struct snd_soc_component_driver skl_component  = {
 	.ops		= &skl_platform_ops,
 	.pcm_new	= skl_pcm_new,
 	.pcm_free	= skl_pcm_free,
-	.ignore_module_refcount = 1, /* do not increase the refcount in core */
+	.module_get_upon_open = 1, /* increment refcount when a pcm is opened */
 };
 
 int skl_platform_register(struct device *dev)
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index d887576597290..46e3ab0fced47 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -947,7 +947,7 @@ static void soc_cleanup_component(struct snd_soc_component *component)
 	snd_soc_dapm_free(snd_soc_component_get_dapm(component));
 	soc_cleanup_component_debugfs(component);
 	component->card = NULL;
-	if (!component->driver->ignore_module_refcount)
+	if (!component->driver->module_get_upon_open)
 		module_put(component->dev->driver->owner);
 }
 
@@ -1381,7 +1381,7 @@ static int soc_probe_component(struct snd_soc_card *card,
 		return 0;
 	}
 
-	if (!component->driver->ignore_module_refcount &&
+	if (!component->driver->module_get_upon_open &&
 	    !try_module_get(component->dev->driver->owner))
 		return -ENODEV;
 
-- 
GitLab


From 52034add758e268c39110f33d46e2a9492e82aef Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Fri, 5 Apr 2019 09:57:09 -0700
Subject: [PATCH 0737/1861] ASoC: pcm: update module refcount if
 module_get_upon_open is set

Setting the module_get_upon_open field for component driver
prevents the module refcount from being incremented during
component probe(). This could lead to the module being
allowed to be unloaded when a pcm stream is open. So,
if this field is set, the module's refcount should be
incremented during pcm open to prevent module removal
when the component is in use. And, the refcount should
be decremented upon pcm close.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-pcm.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 2d5d5cac4ba60..d21247546f7f7 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -15,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/export.h>
@@ -463,6 +464,9 @@ static int soc_pcm_components_close(struct snd_pcm_substream *substream,
 			continue;
 
 		component->driver->ops->close(substream);
+
+		if (component->driver->module_get_upon_open)
+			module_put(component->dev->driver->owner);
 	}
 
 	return 0;
@@ -513,6 +517,10 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 		    !component->driver->ops->open)
 			continue;
 
+		if (component->driver->module_get_upon_open &&
+		    !try_module_get(component->dev->driver->owner))
+			return -ENODEV;
+
 		ret = component->driver->ops->open(substream);
 		if (ret < 0) {
 			dev_err(component->dev,
-- 
GitLab


From 1a07a94b47b1f528f39c3e6187b5eaf02efe44ea Mon Sep 17 00:00:00 2001
From: Ondrej Jirman <megous@megous.com>
Date: Sat, 6 Apr 2019 01:30:48 +0200
Subject: [PATCH 0738/1861] drm/sun4i: tcon top: Fix NULL/invalid pointer
 dereference in sun8i_tcon_top_un/bind

There are two problems here:

1. Not all clk_data->hws[] need to be initialized, depending on various
   configured quirks. This leads to NULL ptr deref in
   clk_hw_unregister_gate() in sun8i_tcon_top_unbind()
2. If there is error when registering the clk_data->hws[],
   err_unregister_gates error path will try to unregister
   IS_ERR()=true (invalid) pointer.

For problem (1) I have this stack trace:

Unable to handle kernel NULL pointer dereference at virtual
  address 0000000000000008
Call trace:
 clk_hw_unregister+0x8/0x18
 clk_hw_unregister_gate+0x14/0x28
 sun8i_tcon_top_unbind+0x2c/0x60
 component_unbind.isra.4+0x2c/0x50
 component_bind_all+0x1d4/0x230
 sun4i_drv_bind+0xc4/0x1a0
 try_to_bring_up_master+0x164/0x1c0
 __component_add+0xa0/0x168
 component_add+0x10/0x18
 sun8i_dw_hdmi_probe+0x18/0x20
 platform_drv_probe+0x3c/0x70
 really_probe+0xcc/0x278
 driver_probe_device+0x34/0xa8

Problem (2) was identified by head scratching.

Signed-off-by: Ondrej Jirman <megous@megous.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190405233048.3823-1-megous@megous.com
---
 drivers/gpu/drm/sun4i/sun8i_tcon_top.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c
index fc36e0c10a374..b1e7c76e9c172 100644
--- a/drivers/gpu/drm/sun4i/sun8i_tcon_top.c
+++ b/drivers/gpu/drm/sun4i/sun8i_tcon_top.c
@@ -227,7 +227,7 @@ static int sun8i_tcon_top_bind(struct device *dev, struct device *master,
 
 err_unregister_gates:
 	for (i = 0; i < CLK_NUM; i++)
-		if (clk_data->hws[i])
+		if (!IS_ERR_OR_NULL(clk_data->hws[i]))
 			clk_hw_unregister_gate(clk_data->hws[i]);
 	clk_disable_unprepare(tcon_top->bus);
 err_assert_reset:
@@ -245,7 +245,8 @@ static void sun8i_tcon_top_unbind(struct device *dev, struct device *master,
 
 	of_clk_del_provider(dev->of_node);
 	for (i = 0; i < CLK_NUM; i++)
-		clk_hw_unregister_gate(clk_data->hws[i]);
+		if (clk_data->hws[i])
+			clk_hw_unregister_gate(clk_data->hws[i]);
 
 	clk_disable_unprepare(tcon_top->bus);
 	reset_control_assert(tcon_top->rst);
-- 
GitLab


From 9eca544b1491df90ea7102a7ed14acc3c562d97b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 28 Mar 2019 11:33:21 +0100
Subject: [PATCH 0739/1861] cpufreq: schedutil: Simplify iowait boosting

There is not reason for the minimum iowait boost value in the
schedutil cpufreq governor to depend on the available range of CPU
frequencies.  In fact, that dependency is generally confusing,
because it causes the iowait boost to behave somewhat differently
on CPUs with the same maximum frequency and different minimum
frequencies, for example.

For this reason, replace the min field in struct sugov_cpu
with a constant and choose its values to be 1/8 of
SCHED_CAPACITY_SCALE (for consistency with the intel_pstate
driver's internal governor).

[Note that policy->cpuinfo.max_freq will not be a constant any more
 after a subsequent change, so this change is depended on by it.]

Link: https://lore.kernel.org/lkml/20190305083202.GU32494@hirez.programming.kicks-ass.net/T/#ee20bdc98b7d89f6110c0d00e5c3ee8c2ced93c3d
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 kernel/sched/cpufreq_schedutil.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 5c41ea3674223..b3a878aa593d1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -13,6 +13,8 @@
 #include <linux/sched/cpufreq.h>
 #include <trace/events/power.h>
 
+#define IOWAIT_BOOST_MIN	(SCHED_CAPACITY_SCALE / 8)
+
 struct sugov_tunables {
 	struct gov_attr_set	attr_set;
 	unsigned int		rate_limit_us;
@@ -51,7 +53,6 @@ struct sugov_cpu {
 	u64			last_update;
 
 	unsigned long		bw_dl;
-	unsigned long		min;
 	unsigned long		max;
 
 	/* The field below is for single-CPU policies only: */
@@ -291,8 +292,8 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
  *
  * The IO wait boost of a task is disabled after a tick since the last update
  * of a CPU. If a new IO wait boost is requested after more then a tick, then
- * we enable the boost starting from the minimum frequency, which improves
- * energy efficiency by ignoring sporadic wakeups from IO.
+ * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
+ * efficiency by ignoring sporadic wakeups from IO.
  */
 static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
 			       bool set_iowait_boost)
@@ -303,7 +304,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
 	if (delta_ns <= TICK_NSEC)
 		return false;
 
-	sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
+	sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
 	sg_cpu->iowait_boost_pending = set_iowait_boost;
 
 	return true;
@@ -317,8 +318,9 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
  *
  * Each time a task wakes up after an IO operation, the CPU utilization can be
  * boosted to a certain utilization which doubles at each "frequent and
- * successive" wakeup from IO, ranging from the utilization of the minimum
- * OPP to the utilization of the maximum OPP.
+ * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
+ * of the maximum OPP.
+ *
  * To keep doubling, an IO boost has to be requested at least once per tick,
  * otherwise we restart from the utilization of the minimum OPP.
  */
@@ -349,7 +351,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
 	}
 
 	/* First wakeup after IO: start with minimum boost */
-	sg_cpu->iowait_boost = sg_cpu->min;
+	sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
 }
 
 /**
@@ -389,7 +391,7 @@ static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
 		 * No boost pending; reduce the boost value.
 		 */
 		sg_cpu->iowait_boost >>= 1;
-		if (sg_cpu->iowait_boost < sg_cpu->min) {
+		if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
 			sg_cpu->iowait_boost = 0;
 			return util;
 		}
@@ -826,9 +828,6 @@ static int sugov_start(struct cpufreq_policy *policy)
 		memset(sg_cpu, 0, sizeof(*sg_cpu));
 		sg_cpu->cpu			= cpu;
 		sg_cpu->sg_policy		= sg_policy;
-		sg_cpu->min			=
-			(SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
-			policy->cpuinfo.max_freq;
 	}
 
 	for_each_cpu(cpu, policy->cpus) {
-- 
GitLab


From 9083e4986124389e2a7c0ffca95630a4983887f0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 26 Mar 2019 12:19:52 +0100
Subject: [PATCH 0740/1861] cpufreq: intel_pstate: Update max frequency on
 global turbo changes

While the cpuinfo.max_freq value doesn't really matter for
intel_pstate in the active mode, in the passive mode it is used by
governors as the maximum physical frequency of the CPU and the
results of governor computations generally depend on it.  Also it
is made available to user space via sysfs and it should match the
current HW configuration.

For this reason, make intel_pstate update cpuinfo.max_freq for all
CPUs if it detects a global change of turbo frequency settings from
"disable" to "enable" or the other way associated with a _PPC change
notification from the platform firmware.

Note that policy_is_inactive(),  cpufreq_cpu_acquire(),
cpufreq_cpu_release(), and cpufreq_set_policy() need to be made
available to it for this purpose.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200759
Reported-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Tested-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c      | 16 ++++------------
 drivers/cpufreq/intel_pstate.c | 35 ++++++++++++++++++++++++++++------
 include/linux/cpufreq.h        | 10 ++++++++++
 3 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index d8fc395af7737..f3f79266ab48c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -34,11 +34,6 @@
 
 static LIST_HEAD(cpufreq_policy_list);
 
-static inline bool policy_is_inactive(struct cpufreq_policy *policy)
-{
-	return cpumask_empty(policy->cpus);
-}
-
 /* Macros to iterate over CPU policies */
 #define for_each_suitable_policy(__policy, __active)			 \
 	list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \
@@ -254,7 +249,7 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
  * cpufreq_cpu_release - Unlock a policy and decrement its usage counter.
  * @policy: cpufreq policy returned by cpufreq_cpu_acquire().
  */
-static void cpufreq_cpu_release(struct cpufreq_policy *policy)
+void cpufreq_cpu_release(struct cpufreq_policy *policy)
 {
 	if (WARN_ON(!policy))
 		return;
@@ -278,7 +273,7 @@ static void cpufreq_cpu_release(struct cpufreq_policy *policy)
  * cpufreq_cpu_release() in order to release its rwsem and balance its usage
  * counter properly.
  */
-static struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu)
+struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu)
 {
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 
@@ -714,9 +709,6 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf)
 	return ret;
 }
 
-static int cpufreq_set_policy(struct cpufreq_policy *policy,
-				struct cpufreq_policy *new_policy);
-
 /**
  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
  */
@@ -2274,8 +2266,8 @@ EXPORT_SYMBOL(cpufreq_get_policy);
  *
  * The cpuinfo part of @policy is not updated by this function.
  */
-static int cpufreq_set_policy(struct cpufreq_policy *policy,
-			      struct cpufreq_policy *new_policy)
+int cpufreq_set_policy(struct cpufreq_policy *policy,
+		       struct cpufreq_policy *new_policy)
 {
 	struct cpufreq_governor *old_gov;
 	int ret;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index e2191a570adeb..08d1a1e845aae 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -179,7 +179,7 @@ struct vid_data {
  *			based on the MSR_IA32_MISC_ENABLE value and whether or
  *			not the maximum reported turbo P-state is different from
  *			the maximum reported non-turbo one.
- * @turbo_disabled_s:	Saved @turbo_disabled value.
+ * @turbo_disabled_mf:	The @turbo_disabled value reflected by cpuinfo.max_freq.
  * @min_perf_pct:	Minimum capacity limit in percent of the maximum turbo
  *			P-state capacity.
  * @max_perf_pct:	Maximum capacity limit in percent of the maximum turbo
@@ -188,7 +188,7 @@ struct vid_data {
 struct global_params {
 	bool no_turbo;
 	bool turbo_disabled;
-	bool turbo_disabled_s;
+	bool turbo_disabled_mf;
 	int max_perf_pct;
 	int min_perf_pct;
 };
@@ -899,6 +899,28 @@ static void intel_pstate_update_policies(void)
 		cpufreq_update_policy(cpu);
 }
 
+static void intel_pstate_update_max_freq(unsigned int cpu)
+{
+	struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
+	struct cpufreq_policy new_policy;
+	struct cpudata *cpudata;
+
+	if (!policy)
+		return;
+
+	cpudata = all_cpu_data[cpu];
+	policy->cpuinfo.max_freq = global.turbo_disabled_mf ?
+			cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
+
+	memcpy(&new_policy, policy, sizeof(*policy));
+	new_policy.max = min(policy->user_policy.max, policy->cpuinfo.max_freq);
+	new_policy.min = min(policy->user_policy.min, new_policy.max);
+
+	cpufreq_set_policy(policy, &new_policy);
+
+	cpufreq_cpu_release(policy);
+}
+
 static void intel_pstate_update_limits(unsigned int cpu)
 {
 	mutex_lock(&intel_pstate_driver_lock);
@@ -908,9 +930,10 @@ static void intel_pstate_update_limits(unsigned int cpu)
 	 * If turbo has been turned on or off globally, policy limits for
 	 * all CPUs need to be updated to reflect that.
 	 */
-	if (global.turbo_disabled_s != global.turbo_disabled) {
-		global.turbo_disabled_s = global.turbo_disabled;
-		intel_pstate_update_policies();
+	if (global.turbo_disabled_mf != global.turbo_disabled) {
+		global.turbo_disabled_mf = global.turbo_disabled;
+		for_each_possible_cpu(cpu)
+			intel_pstate_update_max_freq(cpu);
 	} else {
 		cpufreq_update_policy(cpu);
 	}
@@ -2159,7 +2182,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	/* cpuinfo and default policy values */
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	update_turbo_state();
-	global.turbo_disabled_s = global.turbo_disabled;
+	global.turbo_disabled_mf = global.turbo_disabled;
 	policy->cpuinfo.max_freq = global.turbo_disabled ?
 			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 	policy->cpuinfo.max_freq *= cpu->pstate.scaling;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 5005ea40364fc..684caf067003b 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -178,6 +178,11 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
 static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { }
 #endif
 
+static inline bool policy_is_inactive(struct cpufreq_policy *policy)
+{
+	return cpumask_empty(policy->cpus);
+}
+
 static inline bool policy_is_shared(struct cpufreq_policy *policy)
 {
 	return cpumask_weight(policy->cpus) > 1;
@@ -193,7 +198,12 @@ unsigned int cpufreq_quick_get_max(unsigned int cpu);
 void disable_cpufreq(void);
 
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
+
+struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu);
+void cpufreq_cpu_release(struct cpufreq_policy *policy);
 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
+int cpufreq_set_policy(struct cpufreq_policy *policy,
+		       struct cpufreq_policy *new_policy);
 void cpufreq_update_policy(unsigned int cpu);
 void cpufreq_update_limits(unsigned int cpu);
 bool have_governor_per_policy(void);
-- 
GitLab


From 108ec36b699475001f5af81ff7db624427d14dbe Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 30 Mar 2019 12:20:22 +0100
Subject: [PATCH 0741/1861] drivers/cpufreq: Convert some slow-path
 static_cpu_has() callers to boot_cpu_has()

Using static_cpu_has() is pointless on those paths, convert them to the
boot_cpu_has() variant.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/amd_freq_sensitivity.c |  2 +-
 drivers/cpufreq/intel_pstate.c         | 18 +++++++++---------
 drivers/cpufreq/powernow-k8.c          |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index 4ac7c3cf34bef..6927a8c0e7480 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -124,7 +124,7 @@ static int __init amd_freq_sensitivity_init(void)
 			PCI_DEVICE_ID_AMD_KERNCZ_SMBUS, NULL);
 
 	if (!pcidev) {
-		if (!static_cpu_has(X86_FEATURE_PROC_FEEDBACK))
+		if (!boot_cpu_has(X86_FEATURE_PROC_FEEDBACK))
 			return -ENODEV;
 	}
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 08d1a1e845aae..840500b457c63 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -527,7 +527,7 @@ static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
 	u64 epb;
 	int ret;
 
-	if (!static_cpu_has(X86_FEATURE_EPB))
+	if (!boot_cpu_has(X86_FEATURE_EPB))
 		return -ENXIO;
 
 	ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
@@ -541,7 +541,7 @@ static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
 {
 	s16 epp;
 
-	if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+	if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
 		/*
 		 * When hwp_req_data is 0, means that caller didn't read
 		 * MSR_HWP_REQUEST, so need to read and get EPP.
@@ -566,7 +566,7 @@ static int intel_pstate_set_epb(int cpu, s16 pref)
 	u64 epb;
 	int ret;
 
-	if (!static_cpu_has(X86_FEATURE_EPB))
+	if (!boot_cpu_has(X86_FEATURE_EPB))
 		return -ENXIO;
 
 	ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
@@ -614,7 +614,7 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
 	if (epp < 0)
 		return epp;
 
-	if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+	if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
 		if (epp == HWP_EPP_PERFORMANCE)
 			return 1;
 		if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
@@ -623,7 +623,7 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
 			return 3;
 		else
 			return 4;
-	} else if (static_cpu_has(X86_FEATURE_EPB)) {
+	} else if (boot_cpu_has(X86_FEATURE_EPB)) {
 		/*
 		 * Range:
 		 *	0x00-0x03	:	Performance
@@ -651,7 +651,7 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+	if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
 		u64 value;
 
 		ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
@@ -826,7 +826,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
 		epp = cpu_data->epp_powersave;
 	}
 update_epp:
-	if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+	if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
 		value &= ~GENMASK_ULL(31, 24);
 		value |= (u64)epp << 24;
 	} else {
@@ -851,7 +851,7 @@ static void intel_pstate_hwp_force_min_perf(int cpu)
 	value |= HWP_MIN_PERF(min_perf);
 
 	/* Set EPP/EPB to min */
-	if (static_cpu_has(X86_FEATURE_HWP_EPP))
+	if (boot_cpu_has(X86_FEATURE_HWP_EPP))
 		value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
 	else
 		intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE);
@@ -1241,7 +1241,7 @@ static void __init intel_pstate_sysfs_expose_params(void)
 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
 	/* First disable HWP notification interrupt as we don't process them */
-	if (static_cpu_has(X86_FEATURE_HWP_NOTIFY))
+	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
 		wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
 
 	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index fb77b39a4ce36..3c12e03fa3430 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1178,7 +1178,7 @@ static int powernowk8_init(void)
 	unsigned int i, supported_cpus = 0;
 	int ret;
 
-	if (static_cpu_has(X86_FEATURE_HW_PSTATE)) {
+	if (boot_cpu_has(X86_FEATURE_HW_PSTATE)) {
 		__request_acpi_cpufreq();
 		return -ENODEV;
 	}
-- 
GitLab


From bfdd5a67c8cb02c147c6b012543e84cb1f5759ba Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 29 Mar 2019 19:35:24 +0100
Subject: [PATCH 0742/1861] x86/asm: Clarify static_cpu_has()'s intended use

Clarify when one should use static_cpu_has() and when one should use
boot_cpu_has().

Requested-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190330112022.28888-2-bp@alien8.de
---
 arch/x86/include/asm/cpufeature.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2fb791a1b479b..6d6d5cc4302bc 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -155,9 +155,12 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
 #else
 
 /*
- * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These will statically patch the target code for additional
- * performance.
+ * Static testing of CPU features. Used the same as boot_cpu_has(). It
+ * statically patches the target code for additional performance. Use
+ * static_cpu_has() only in fast paths, where every cycle counts. Which
+ * means that the boot_cpu_has() variant is already fast enough for the
+ * majority of cases and you should stick to using it as it is generally
+ * only two instructions: a RIP-relative MOV and a TEST.
  */
 static __always_inline bool _static_cpu_has(u16 bit)
 {
-- 
GitLab


From b623fa320f8360f049a6f3c3ccc487cb85af4c5b Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Mon, 1 Apr 2019 09:37:48 +0800
Subject: [PATCH 0743/1861] cpufreq: ap806: fix possible object reference leak

The call to of_find_compatible_node returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
./drivers/cpufreq/armada-8k-cpufreq.c:187:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 130, but without a corresponding object release within this function.
./drivers/cpufreq/armada-8k-cpufreq.c:191:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 130, but without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Gregory Clement <gregory.clement@bootlin.com>
Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/armada-8k-cpufreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c
index b3f4bd647e9b3..988ebc326bdbb 100644
--- a/drivers/cpufreq/armada-8k-cpufreq.c
+++ b/drivers/cpufreq/armada-8k-cpufreq.c
@@ -132,6 +132,7 @@ static int __init armada_8k_cpufreq_init(void)
 		of_node_put(node);
 		return -ENODEV;
 	}
+	of_node_put(node);
 
 	nb_cpus = num_possible_cpus();
 	freq_tables = kcalloc(nb_cpus, sizeof(*freq_tables), GFP_KERNEL);
-- 
GitLab


From ddb64c5db3cc8fb9c1242214d5798b2c2865681c Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Mon, 1 Apr 2019 09:37:49 +0800
Subject: [PATCH 0744/1861] cpufreq: imx6q: fix possible object reference leak

The call to of_node_get returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
./drivers/cpufreq/imx6q-cpufreq.c:391:4-10: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 348, but without a corresponding object release within this function.
./drivers/cpufreq/imx6q-cpufreq.c:395:3-9: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 348, but without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: Fabio Estevam <festevam@gmail.com>
Cc: NXP Linux Team <linux-imx@nxp.com>
Cc: linux-pm@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/imx6q-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index a4ff09f91c8f8..3e17560b1efe3 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -388,11 +388,11 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 		ret = imx6ul_opp_check_speed_grading(cpu_dev);
 		if (ret) {
 			if (ret == -EPROBE_DEFER)
-				return ret;
+				goto put_node;
 
 			dev_err(cpu_dev, "failed to read ocotp: %d\n",
 				ret);
-			return ret;
+			goto put_node;
 		}
 	} else {
 		imx6q_opp_check_speed_grading(cpu_dev);
-- 
GitLab


From 7c468966f05ac9c17bb5948275283d34e6fe0660 Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Mon, 1 Apr 2019 09:37:50 +0800
Subject: [PATCH 0745/1861] cpufreq: kirkwood: fix possible object reference
 leak

The call to of_get_child_by_name returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
./drivers/cpufreq/kirkwood-cpufreq.c:127:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 118, but without a corresponding object release within this function.
./drivers/cpufreq/kirkwood-cpufreq.c:133:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 118, but without a corresponding object release within this function.

and also do some cleanup:
- of_node_put(np);
- np = NULL;
...
of_node_put(np);

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/kirkwood-cpufreq.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index c2dd43f3f5d8a..8d63a6dc8383c 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -124,13 +124,14 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 	priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk");
 	if (IS_ERR(priv.cpu_clk)) {
 		dev_err(priv.dev, "Unable to get cpuclk\n");
-		return PTR_ERR(priv.cpu_clk);
+		err = PTR_ERR(priv.cpu_clk);
+		goto out_node;
 	}
 
 	err = clk_prepare_enable(priv.cpu_clk);
 	if (err) {
 		dev_err(priv.dev, "Unable to prepare cpuclk\n");
-		return err;
+		goto out_node;
 	}
 
 	kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000;
@@ -161,20 +162,22 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 		goto out_ddr;
 	}
 
-	of_node_put(np);
-	np = NULL;
-
 	err = cpufreq_register_driver(&kirkwood_cpufreq_driver);
-	if (!err)
-		return 0;
+	if (err) {
+		dev_err(priv.dev, "Failed to register cpufreq driver\n");
+		goto out_powersave;
+	}
 
-	dev_err(priv.dev, "Failed to register cpufreq driver\n");
+	of_node_put(np);
+	return 0;
 
+out_powersave:
 	clk_disable_unprepare(priv.powersave_clk);
 out_ddr:
 	clk_disable_unprepare(priv.ddr_clk);
 out_cpu:
 	clk_disable_unprepare(priv.cpu_clk);
+out_node:
 	of_node_put(np);
 
 	return err;
-- 
GitLab


From ddb07fba1c645791ead16d1eee0639a033fb8cf9 Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Mon, 1 Apr 2019 09:37:51 +0800
Subject: [PATCH 0746/1861] cpufreq: maple: fix possible object reference leak

The call to of_cpu_device_node_get returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
./drivers/cpufreq/maple-cpufreq.c:213:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 177, but without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/maple-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index d9df89392b843..a05f1342ec028 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -210,7 +210,7 @@ static int __init maple_cpufreq_init(void)
 	 */
 	valp = of_get_property(cpunode, "clock-frequency", NULL);
 	if (!valp)
-		return -ENODEV;
+		goto bail_noprops;
 	max_freq = (*valp)/1000;
 	maple_cpu_freqs[0].frequency = max_freq;
 	maple_cpu_freqs[1].frequency = max_freq/2;
-- 
GitLab


From a9acc26b75f652f697e02a9febe2ab0da648a571 Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Mon, 1 Apr 2019 09:37:52 +0800
Subject: [PATCH 0747/1861] cpufreq/pasemi: fix possible object reference leak

The call to of_get_cpu_node returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
./drivers/cpufreq/pasemi-cpufreq.c:212:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 147, but without a corresponding object release within this function.
./drivers/cpufreq/pasemi-cpufreq.c:220:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 147, but without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/pasemi-cpufreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 75dfbd2a58ea6..c7710c149de85 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -146,6 +146,7 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = of_get_cpu_node(policy->cpu, NULL);
 
+	of_node_put(cpu);
 	if (!cpu)
 		goto out;
 
-- 
GitLab


From 8d10dc28a9ea6e8c02e825dab28699f3c72b02d9 Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Mon, 1 Apr 2019 09:37:53 +0800
Subject: [PATCH 0748/1861] cpufreq: pmac32: fix possible object reference leak

The call to of_find_node_by_name returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
./drivers/cpufreq/pmac32-cpufreq.c:557:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 552, but without a corresponding object release within this function.
./drivers/cpufreq/pmac32-cpufreq.c:569:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 552, but without a corresponding object release within this function.
./drivers/cpufreq/pmac32-cpufreq.c:598:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 587, but without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linux-pm@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/pmac32-cpufreq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 52f0d91d30c17..9b4ce2eb8222c 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -552,6 +552,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
 	volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
 	if (volt_gpio_np)
 		voltage_gpio = read_gpio(volt_gpio_np);
+	of_node_put(volt_gpio_np);
 	if (!voltage_gpio){
 		pr_err("missing cpu-vcore-select gpio\n");
 		return 1;
@@ -588,6 +589,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
 	if (volt_gpio_np)
 		voltage_gpio = read_gpio(volt_gpio_np);
 
+	of_node_put(volt_gpio_np);
 	pvr = mfspr(SPRN_PVR);
 	has_cpu_l2lve = !((pvr & 0xf00) == 0x100);
 
-- 
GitLab


From 233298032803f2802fe99892d0de4ab653bfece4 Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Mon, 1 Apr 2019 09:37:54 +0800
Subject: [PATCH 0749/1861] cpufreq: ppc_cbe: fix possible object reference
 leak

The call to of_get_cpu_node returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
./drivers/cpufreq/ppc_cbe_cpufreq.c:89:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 76, but without a corresponding object release within this function.
./drivers/cpufreq/ppc_cbe_cpufreq.c:89:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 76, but without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/ppc_cbe_cpufreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
index 41a0f0be3f9ff..8414c3a4ea08c 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -86,6 +86,7 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
 	    !cbe_get_cpu_mic_tm_regs(policy->cpu)) {
 		pr_info("invalid CBE regs pointers for cpufreq\n");
+		of_node_put(cpu);
 		return -EINVAL;
 	}
 
-- 
GitLab


From 67e87d43b794a8886b5d075b3e0fdd0c615a595f Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 29 Mar 2019 19:52:59 +0100
Subject: [PATCH 0750/1861] x86: Convert some slow-path static_cpu_has()
 callers to boot_cpu_has()

Using static_cpu_has() is pointless on those paths, convert them to the
boot_cpu_has() variant.

No functional changes.

Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Juergen Gross <jgross@suse.com> # for paravirt
Cc: Aubrey Li <aubrey.li@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Thomas Lendacky <Thomas.Lendacky@amd.com>
Cc: linux-edac@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: virtualization@lists.linux-foundation.org
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190330112022.28888-3-bp@alien8.de
---
 arch/x86/include/asm/fpu/internal.h  |  7 +++----
 arch/x86/kernel/apic/apic_numachip.c |  2 +-
 arch/x86/kernel/cpu/aperfmperf.c     |  6 +++---
 arch/x86/kernel/cpu/common.c         |  2 +-
 arch/x86/kernel/cpu/mce/inject.c     |  2 +-
 arch/x86/kernel/cpu/proc.c           | 10 +++++-----
 arch/x86/kernel/ldt.c                | 14 +++++++-------
 arch/x86/kernel/paravirt.c           |  2 +-
 arch/x86/kernel/process.c            |  4 ++--
 arch/x86/kernel/reboot.c             |  2 +-
 arch/x86/kernel/vm86_32.c            |  2 +-
 11 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index fb04a3ded7ddb..745a19d34f23f 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -253,7 +253,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has(X86_FEATURE_XSAVES))
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -275,7 +275,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has(X86_FEATURE_XSAVES))
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -497,8 +497,7 @@ static inline void fpregs_activate(struct fpu *fpu)
  *  - switch_fpu_finish() restores the new state as
  *    necessary.
  */
-static inline void
-switch_fpu_prepare(struct fpu *old_fpu, int cpu)
+static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
 	if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 78778b54f904a..a5464b8b6c464 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -175,7 +175,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
 	this_cpu_write(cpu_llc_id, node);
 
 	/* Account for nodes per socket in multi-core-module processors */
-	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
+	if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, val);
 		nodes = ((val >> 3) & 7) + 1;
 	}
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 804c49493938b..64d5aec24203f 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -83,7 +83,7 @@ unsigned int aperfmperf_get_khz(int cpu)
 	if (!cpu_khz)
 		return 0;
 
-	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+	if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
 		return 0;
 
 	aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
@@ -99,7 +99,7 @@ void arch_freq_prepare_all(void)
 	if (!cpu_khz)
 		return;
 
-	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+	if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
 		return;
 
 	for_each_online_cpu(cpu)
@@ -115,7 +115,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
 	if (!cpu_khz)
 		return 0;
 
-	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+	if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
 		return 0;
 
 	if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659a..95a5faf3a6a0f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1668,7 +1668,7 @@ static void setup_getcpu(int cpu)
 	unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
 	struct desc_struct d = { };
 
-	if (static_cpu_has(X86_FEATURE_RDTSCP))
+	if (boot_cpu_has(X86_FEATURE_RDTSCP))
 		write_rdtscp_aux(cpudata);
 
 	/* Store CPU and node number in limit. */
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 8492ef7d90150..3da9a8823e478 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -528,7 +528,7 @@ static void do_inject(void)
 	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
 	 * Fam10h and later BKDGs.
 	 */
-	if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
+	if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
 	    b == 4 &&
 	    boot_cpu_data.x86 < 0x17) {
 		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 2c8522a39ed5d..cb2e49810d687 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -35,11 +35,11 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 		   "fpu_exception\t: %s\n"
 		   "cpuid level\t: %d\n"
 		   "wp\t\t: yes\n",
-		   static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
-		   static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
-		   static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
-		   static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
-		   static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+		   boot_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
+		   boot_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
+		   boot_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
+		   boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+		   boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
 		   c->cpuid_level);
 }
 #else
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 6135ae8ce0364..b2463fcb20a81 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -113,7 +113,7 @@ static void do_sanity_check(struct mm_struct *mm,
 		 * tables.
 		 */
 		WARN_ON(!had_kernel_mapping);
-		if (static_cpu_has(X86_FEATURE_PTI))
+		if (boot_cpu_has(X86_FEATURE_PTI))
 			WARN_ON(!had_user_mapping);
 	} else {
 		/*
@@ -121,7 +121,7 @@ static void do_sanity_check(struct mm_struct *mm,
 		 * Sync the pgd to the usermode tables.
 		 */
 		WARN_ON(had_kernel_mapping);
-		if (static_cpu_has(X86_FEATURE_PTI))
+		if (boot_cpu_has(X86_FEATURE_PTI))
 			WARN_ON(had_user_mapping);
 	}
 }
@@ -156,7 +156,7 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
 	k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
 	u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
 
-	if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
+	if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
 		set_pmd(u_pmd, *k_pmd);
 }
 
@@ -181,7 +181,7 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
 {
 	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
 
-	if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
+	if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
 		set_pgd(kernel_to_user_pgdp(pgd), *pgd);
 }
 
@@ -208,7 +208,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 	spinlock_t *ptl;
 	int i, nr_pages;
 
-	if (!static_cpu_has(X86_FEATURE_PTI))
+	if (!boot_cpu_has(X86_FEATURE_PTI))
 		return 0;
 
 	/*
@@ -271,7 +271,7 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
 		return;
 
 	/* LDT map/unmap is only required for PTI */
-	if (!static_cpu_has(X86_FEATURE_PTI))
+	if (!boot_cpu_has(X86_FEATURE_PTI))
 		return;
 
 	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
@@ -311,7 +311,7 @@ static void free_ldt_pgtables(struct mm_struct *mm)
 	unsigned long start = LDT_BASE_ADDR;
 	unsigned long end = LDT_END_ADDR;
 
-	if (!static_cpu_has(X86_FEATURE_PTI))
+	if (!boot_cpu_has(X86_FEATURE_PTI))
 		return;
 
 	tlb_gather_mmu(&tlb, mm, start, end);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c0e0101133f35..7bbaa6baf37f9 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -121,7 +121,7 @@ DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
 
 void __init native_pv_lock_init(void)
 {
-	if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+	if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		static_branch_disable(&virt_spin_lock_key);
 }
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58ac7be52c7a6..16a7113e91c5a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -236,7 +236,7 @@ static int get_cpuid_mode(void)
 
 static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
 {
-	if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
+	if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
 		return -ENODEV;
 
 	if (cpuid_enabled)
@@ -666,7 +666,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
 	if (c->x86_vendor != X86_VENDOR_INTEL)
 		return 0;
 
-	if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR))
+	if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_cpu_has_bug(X86_BUG_MONITOR))
 		return 0;
 
 	return 1;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 725624b6c0c05..d62ebbc5ec783 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -108,7 +108,7 @@ void __noreturn machine_real_restart(unsigned int type)
 	write_cr3(real_mode_header->trampoline_pgd);
 
 	/* Exiting long mode will fail if CR4.PCIDE is set. */
-	if (static_cpu_has(X86_FEATURE_PCID))
+	if (boot_cpu_has(X86_FEATURE_PCID))
 		cr4_clear_bits(X86_CR4_PCIDE);
 #endif
 
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index a092b6b40c6b5..6a38717d179c4 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -369,7 +369,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	preempt_disable();
 	tsk->thread.sp0 += 16;
 
-	if (static_cpu_has(X86_FEATURE_SEP)) {
+	if (boot_cpu_has(X86_FEATURE_SEP)) {
 		tsk->thread.sysenter_cs = 0;
 		refresh_sysenter_cs(&tsk->thread);
 	}
-- 
GitLab


From 28e3ace70c3d2ea47a62dffe046011d1b74ee839 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 29 Mar 2019 20:00:38 +0100
Subject: [PATCH 0751/1861] x86/mm: Convert some slow-path static_cpu_has()
 callers to boot_cpu_has()

Using static_cpu_has() is pointless on those paths, convert them to the
boot_cpu_has() variant.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190330112022.28888-5-bp@alien8.de
---
 arch/x86/mm/dump_pagetables.c | 4 ++--
 arch/x86/mm/pgtable.c         | 4 ++--
 arch/x86/mm/pti.c             | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ee8f8ab469417..7b71ac15b2359 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -577,7 +577,7 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
 void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
 {
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
-	if (user && static_cpu_has(X86_FEATURE_PTI))
+	if (user && boot_cpu_has(X86_FEATURE_PTI))
 		pgd = kernel_to_user_pgdp(pgd);
 #endif
 	ptdump_walk_pgd_level_core(m, pgd, false, false);
@@ -590,7 +590,7 @@ void ptdump_walk_user_pgd_level_checkwx(void)
 	pgd_t *pgd = INIT_PGD;
 
 	if (!(__supported_pte_mask & _PAGE_NX) ||
-	    !static_cpu_has(X86_FEATURE_PTI))
+	    !boot_cpu_has(X86_FEATURE_PTI))
 		return;
 
 	pr_info("x86/mm: Checking user space page tables\n");
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7bd01709a0914..3dbf440d41143 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -190,7 +190,7 @@ static void pgd_dtor(pgd_t *pgd)
  * when PTI is enabled. We need them to map the per-process LDT into the
  * user-space page-table.
  */
-#define PREALLOCATED_USER_PMDS	 (static_cpu_has(X86_FEATURE_PTI) ? \
+#define PREALLOCATED_USER_PMDS	 (boot_cpu_has(X86_FEATURE_PTI) ? \
 					KERNEL_PGD_PTRS : 0)
 #define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
 
@@ -292,7 +292,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
 
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 
-	if (!static_cpu_has(X86_FEATURE_PTI))
+	if (!boot_cpu_has(X86_FEATURE_PTI))
 		return;
 
 	pgdp = kernel_to_user_pgdp(pgdp);
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 4fee5c3003ed7..8c9a54ebda603 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -626,7 +626,7 @@ void pti_set_kernel_image_nonglobal(void)
  */
 void __init pti_init(void)
 {
-	if (!static_cpu_has(X86_FEATURE_PTI))
+	if (!boot_cpu_has(X86_FEATURE_PTI))
 		return;
 
 	pr_info("enabled\n");
-- 
GitLab


From c1d1090c3c7674c965552e22a65b29423aa4f090 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 8 Apr 2019 15:46:10 +0530
Subject: [PATCH 0752/1861] cpufreq: maple: Remove redundant code from
 maple_cpufreq_init()

The success path and error path both look the same, don't duplicate the
code.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/maple-cpufreq.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index a05f1342ec028..a94355723ef85 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -231,10 +231,6 @@ static int __init maple_cpufreq_init(void)
 
 	rc = cpufreq_register_driver(&maple_cpufreq_driver);
 
-	of_node_put(cpunode);
-
-	return rc;
-
 bail_noprops:
 	of_node_put(cpunode);
 
-- 
GitLab


From fbc9418f099d457b91bf14aef8c4bfc498bb2437 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 3 Apr 2019 23:58:01 +0200
Subject: [PATCH 0753/1861] ACPI: PM: Print debug messages when enabling GPEs
 for wakeup

In sufficiently complicated GPE configurations it is hard to
determine which GPE could be the source of system wakeup from a sleep
state, so make __acpi_device_wakeup_enable() print that information
to the kernel log if debugging is enabled.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 824ae985ad93b..5b50f884712c2 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -728,6 +728,9 @@ static int __acpi_device_wakeup_enable(struct acpi_device *adev,
 		goto out;
 	}
 
+	acpi_handle_debug(adev->handle, "GPE%2X enabled for wakeup\n",
+			  (unsigned int)wakeup->gpe_number);
+
 inc:
 	wakeup->enable_count++;
 
-- 
GitLab


From 1120b0f9850cb01fffcb5f4379a69c8ab7a6658f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 4 Apr 2019 00:03:30 +0200
Subject: [PATCH 0754/1861] cpufreq: intel_pstate: Documentation: Add
 references sections

Add separate refereces sections to the cpufreq.rst and
intel_pstate.rst documents under admin-quide/pm and list the
references to external documentation in there.

Update the ACPI specification URL while at it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 Documentation/admin-guide/pm/cpufreq.rst      | 10 +++++---
 Documentation/admin-guide/pm/intel_pstate.rst | 25 ++++++++++++-------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 7eca9026a9ed2..b97ce64d5976a 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -396,8 +396,8 @@ RT or deadline scheduling classes, the governor will increase the frequency to
 the allowed maximum (that is, the ``scaling_max_freq`` policy limit).  In turn,
 if it is invoked by the CFS scheduling class, the governor will use the
 Per-Entity Load Tracking (PELT) metric for the root control group of the
-given CPU as the CPU utilization estimate (see the `Per-entity load tracking`_
-LWN.net article for a description of the PELT mechanism).  Then, the new
+given CPU as the CPU utilization estimate (see the *Per-entity load tracking*
+LWN.net article [1]_ for a description of the PELT mechanism).  Then, the new
 CPU frequency to apply is computed in accordance with the formula
 
 	f = 1.25 * ``f_0`` * ``util`` / ``max``
@@ -698,4 +698,8 @@ hardware feature (e.g. all Intel ones), even if the
 :c:macro:`CONFIG_X86_ACPI_CPUFREQ_CPB` configuration option is set.
 
 
-.. _Per-entity load tracking: https://lwn.net/Articles/531853/
+References
+==========
+
+.. [1] Jonathan Corbet, *Per-entity load tracking*,
+       https://lwn.net/Articles/531853/
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index ec0f7c111f65b..6dba90b753d73 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -20,11 +20,10 @@ you have not done that yet.]
 
 For the processors supported by ``intel_pstate``, the P-state concept is broader
 than just an operating frequency or an operating performance point (see the
-`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
+LinuxCon Europe 2015 presentation by Kristen Accardi [1]_ for more
 information about that).  For this reason, the representation of P-states used
 by ``intel_pstate`` internally follows the hardware specification (for details
-refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
-Volume 3: System Programming Guide <SDM_>`_).  However, the ``CPUFreq`` core
+refer to Intel Software Developer’s Manual [2]_).  However, the ``CPUFreq`` core
 uses frequencies for identifying operating performance points of CPUs and
 frequencies are involved in the user space interface exposed by it, so
 ``intel_pstate`` maps its internal representation of P-states to frequencies too
@@ -561,9 +560,9 @@ or to pin every task potentially sensitive to them to a specific CPU.]
 
 On the majority of systems supported by ``intel_pstate``, the ACPI tables
 provided by the platform firmware contain ``_PSS`` objects returning information
-that can be used for CPU performance scaling (refer to the `ACPI specification`_
-for details on the ``_PSS`` objects and the format of the information returned
-by them).
+that can be used for CPU performance scaling (refer to the ACPI specification
+[3]_ for details on the ``_PSS`` objects and the format of the information
+returned by them).
 
 The information returned by the ACPI ``_PSS`` objects is used by the
 ``acpi-cpufreq`` scaling driver.  On systems supported by ``intel_pstate``
@@ -728,6 +727,14 @@ P-state is called, the ``ftrace`` filter can be set to to
            <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
 
 
-.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
-.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
-.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
+References
+==========
+
+.. [1] Kristen Accardi, *Balancing Power and Performance in the Linux Kernel*,
+       http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+
+.. [2] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 3: System Programming Guide*,
+       http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
+
+.. [3] *Advanced Configuration and Power Interface Specification*,
+       https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf
-- 
GitLab


From fc7db767b16cf2af3be9e87a4b88e206d0e1a8b2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 4 Apr 2019 00:04:43 +0200
Subject: [PATCH 0755/1861] Documentation: PM: Add SPDX license tags to
 multiple files

Add SPDX license tags to .rst files under Documentation/driver-api/pm
and Documentation/admin-quide/pm.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 Documentation/admin-guide/pm/cpufreq.rst       | 2 ++
 Documentation/admin-guide/pm/cpuidle.rst       | 2 ++
 Documentation/admin-guide/pm/index.rst         | 2 ++
 Documentation/admin-guide/pm/intel_pstate.rst  | 2 ++
 Documentation/admin-guide/pm/sleep-states.rst  | 2 ++
 Documentation/admin-guide/pm/strategies.rst    | 2 ++
 Documentation/admin-guide/pm/system-wide.rst   | 2 ++
 Documentation/admin-guide/pm/working-state.rst | 2 ++
 Documentation/driver-api/pm/cpuidle.rst        | 2 ++
 Documentation/driver-api/pm/devices.rst        | 2 ++
 Documentation/driver-api/pm/index.rst          | 2 ++
 Documentation/driver-api/pm/notifiers.rst      | 2 ++
 Documentation/driver-api/pm/types.rst          | 2 ++
 13 files changed, 26 insertions(+)

diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index b97ce64d5976a..63e54ba1fe0c4 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
 .. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
 
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index 9c58b35a81cbc..ab8fa08560959 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 .. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>`
 .. |cpufreq| replace:: :doc:`CPU Performance Scaling <cpufreq>`
 
diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst
index 49237ac734428..39f8f9f81e7a3 100644
--- a/Documentation/admin-guide/pm/index.rst
+++ b/Documentation/admin-guide/pm/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ================
 Power Management
 ================
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index 6dba90b753d73..5883c6c803156 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===============================================
 ``intel_pstate`` CPU Performance Scaling Driver
 ===============================================
diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst
index dbf5acd49f350..6d3537ae9a947 100644
--- a/Documentation/admin-guide/pm/sleep-states.rst
+++ b/Documentation/admin-guide/pm/sleep-states.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===================
 System Sleep States
 ===================
diff --git a/Documentation/admin-guide/pm/strategies.rst b/Documentation/admin-guide/pm/strategies.rst
index afe4d3f831fe0..863172633fcad 100644
--- a/Documentation/admin-guide/pm/strategies.rst
+++ b/Documentation/admin-guide/pm/strategies.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===========================
 Power Management Strategies
 ===========================
diff --git a/Documentation/admin-guide/pm/system-wide.rst b/Documentation/admin-guide/pm/system-wide.rst
index 0c81e4c5de398..2b1f987b34f00 100644
--- a/Documentation/admin-guide/pm/system-wide.rst
+++ b/Documentation/admin-guide/pm/system-wide.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ============================
 System-Wide Power Management
 ============================
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index b6cef9b5e961e..e5fbf322f2562 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ==============================
 Working-State Power Management
 ==============================
diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst
index 5842ab621a58c..7e0d850e01137 100644
--- a/Documentation/driver-api/pm/cpuidle.rst
+++ b/Documentation/driver-api/pm/cpuidle.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 .. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor <cpuidle_governor>`
 .. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device <cpuidle_device>`
 .. |struct cpuidle_driver| replace:: :c:type:`struct cpuidle_driver <cpuidle_driver>`
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index 090c151aa86bd..6885d72d85ca2 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 .. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops <dev_pm_ops>`
 .. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
 .. |struct bus_type| replace:: :c:type:`struct bus_type <bus_type>`
diff --git a/Documentation/driver-api/pm/index.rst b/Documentation/driver-api/pm/index.rst
index 56975c6bc7895..c2a9ef8d115ce 100644
--- a/Documentation/driver-api/pm/index.rst
+++ b/Documentation/driver-api/pm/index.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ===============================
 CPU and Device Power Management
 ===============================
diff --git a/Documentation/driver-api/pm/notifiers.rst b/Documentation/driver-api/pm/notifiers.rst
index 62f860026992d..69678a0d9cc08 100644
--- a/Documentation/driver-api/pm/notifiers.rst
+++ b/Documentation/driver-api/pm/notifiers.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 =============================
 Suspend/Hibernation Notifiers
 =============================
diff --git a/Documentation/driver-api/pm/types.rst b/Documentation/driver-api/pm/types.rst
index 3ebdecc541043..73a231caf7643 100644
--- a/Documentation/driver-api/pm/types.rst
+++ b/Documentation/driver-api/pm/types.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
 ==================================
 Device Power Management Data Types
 ==================================
-- 
GitLab


From fc1860d6b17fa00d16df5e608eed0526e11ccad1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 4 Apr 2019 00:06:15 +0200
Subject: [PATCH 0756/1861] Documentation: PM: Unify copyright notices

Unify copyright notices in the .rst files under
Documentation/driver-api/pm and Documentation/admin-quide/pm.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 Documentation/admin-guide/pm/cpufreq.rst      |  6 ++++--
 Documentation/admin-guide/pm/cpuidle.rst      |  6 ++++--
 Documentation/admin-guide/pm/intel_pstate.rst |  5 +++--
 Documentation/admin-guide/pm/sleep-states.rst |  6 ++++--
 Documentation/admin-guide/pm/strategies.rst   |  6 ++++--
 Documentation/driver-api/pm/cpuidle.rst       |  5 +++--
 Documentation/driver-api/pm/devices.rst       | 10 ++++++----
 Documentation/driver-api/pm/notifiers.rst     |  6 ++++--
 8 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 63e54ba1fe0c4..0c74a77849648 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -1,4 +1,5 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
 .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
 .. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
@@ -7,9 +8,10 @@
 CPU Performance Scaling
 =======================
 
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 The Concept of CPU Performance Scaling
 ======================================
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index ab8fa08560959..e70b365dbc603 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -1,4 +1,5 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
 .. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>`
 .. |cpufreq| replace:: :doc:`CPU Performance Scaling <cpufreq>`
@@ -7,9 +8,10 @@
 CPU Idle Time Management
 ========================
 
-::
+:Copyright: |copy| 2018 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
- Copyright (c) 2018 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 Concepts
 ========
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index 5883c6c803156..67e414e34f379 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -1,12 +1,13 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
 ===============================================
 ``intel_pstate`` CPU Performance Scaling Driver
 ===============================================
 
-::
+:Copyright: |copy| 2017 Intel Corporation
 
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 
 General Information
diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst
index 6d3537ae9a947..cd3a28cb81f42 100644
--- a/Documentation/admin-guide/pm/sleep-states.rst
+++ b/Documentation/admin-guide/pm/sleep-states.rst
@@ -1,12 +1,14 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
 ===================
 System Sleep States
 ===================
 
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 Sleep states are global low-power states of the entire system in which user
 space code cannot be executed and the overall system activity is significantly
diff --git a/Documentation/admin-guide/pm/strategies.rst b/Documentation/admin-guide/pm/strategies.rst
index 863172633fcad..dd0362e32fa55 100644
--- a/Documentation/admin-guide/pm/strategies.rst
+++ b/Documentation/admin-guide/pm/strategies.rst
@@ -1,12 +1,14 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
 ===========================
 Power Management Strategies
 ===========================
 
-::
+:Copyright: |copy| 2017 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
- Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 The Linux kernel supports two major high-level power management strategies.
 
diff --git a/Documentation/driver-api/pm/cpuidle.rst b/Documentation/driver-api/pm/cpuidle.rst
index 7e0d850e01137..006cf6db40c6d 100644
--- a/Documentation/driver-api/pm/cpuidle.rst
+++ b/Documentation/driver-api/pm/cpuidle.rst
@@ -1,4 +1,5 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
 .. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor <cpuidle_governor>`
 .. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device <cpuidle_device>`
@@ -9,9 +10,9 @@
 CPU Idle Time Management
 ========================
 
-::
+:Copyright: |copy| 2019 Intel Corporation
 
- Copyright (c) 2019 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 
 CPU Idle Time Management Subsystem
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index 6885d72d85ca2..30835683616a6 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -1,4 +1,5 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
 .. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops <dev_pm_ops>`
 .. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
@@ -14,11 +15,12 @@
 Device Power Management Basics
 ==============================
 
-::
+:Copyright: |copy| 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+:Copyright: |copy| 2010 Alan Stern <stern@rowland.harvard.edu>
+:Copyright: |copy| 2016 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
- Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
- Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
- Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 Most of the code in Linux is device drivers, so most of the Linux power
 management (PM) code is also driver-specific.  Most drivers will do very
diff --git a/Documentation/driver-api/pm/notifiers.rst b/Documentation/driver-api/pm/notifiers.rst
index 69678a0d9cc08..186435c43b77e 100644
--- a/Documentation/driver-api/pm/notifiers.rst
+++ b/Documentation/driver-api/pm/notifiers.rst
@@ -1,12 +1,14 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
 =============================
 Suspend/Hibernation Notifiers
 =============================
 
-::
+:Copyright: |copy| 2016 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
- Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 There are some operations that subsystems or drivers may want to carry out
 before hibernation/suspend or after restore/resume, but they require the system
-- 
GitLab


From 7973b799dbea1770742851487a98276a24c961a5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 4 Apr 2019 00:08:18 +0200
Subject: [PATCH 0757/1861] admin-guide: pm: intel_epb: Add SPDX license tag
 and copyright notice

Add an SPDX license tag and a copyright notice to the intel_epb.rst
file under Documentation/admin-quide/pm.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 Documentation/admin-guide/pm/intel_epb.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst
index d100849edfc4e..005121167af7a 100644
--- a/Documentation/admin-guide/pm/intel_epb.rst
+++ b/Documentation/admin-guide/pm/intel_epb.rst
@@ -1,7 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
 ======================================
 Intel Performance and Energy Bias Hint
 ======================================
 
+:Copyright: |copy| 2019 Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
 .. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c
    :doc: overview
 
-- 
GitLab


From 4614bbdee35706ed77c130d23f12e21479b670ff Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 11 Feb 2019 15:24:56 +0000
Subject: [PATCH 0758/1861] docs/memory-barriers.txt: Rewrite "KERNEL I/O
 BARRIER EFFECTS" section

The "KERNEL I/O BARRIER EFFECTS" section of memory-barriers.txt is vague,
x86-centric, out-of-date, incomplete and demonstrably incorrect in places.
This is largely because I/O ordering is a horrible can of worms, but also
because the document has stagnated as our understanding has evolved.

Attempt to address some of that, by rewriting the section based on
recent(-ish) discussions with Arnd, BenH and others. Maybe one day we'll
find a way to formalise this stuff, but for now let's at least try to
make the English easier to understand.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Daniel Lustig <dlustig@nvidia.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: "Maciej W. Rozycki" <macro@linux-mips.org>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Paul E. McKenney <paulmck@linux.ibm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/memory-barriers.txt | 115 ++++++++++++++++++------------
 1 file changed, 70 insertions(+), 45 deletions(-)

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 1c22b21ae922f..5eb6f4c6a1333 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2599,72 +2599,97 @@ likely, then interrupt-disabling locks should be used to guarantee ordering.
 KERNEL I/O BARRIER EFFECTS
 ==========================
 
-When accessing I/O memory, drivers should use the appropriate accessor
-functions:
+Interfacing with peripherals via I/O accesses is deeply architecture and device
+specific. Therefore, drivers which are inherently non-portable may rely on
+specific behaviours of their target systems in order to achieve synchronization
+in the most lightweight manner possible. For drivers intending to be portable
+between multiple architectures and bus implementations, the kernel offers a
+series of accessor functions that provide various degrees of ordering
+guarantees:
 
- (*) inX(), outX():
+ (*) readX(), writeX():
 
-     These are intended to talk to I/O space rather than memory space, but
-     that's primarily a CPU-specific concept.  The i386 and x86_64 processors
-     do indeed have special I/O space access cycles and instructions, but many
-     CPUs don't have such a concept.
+     The readX() and writeX() MMIO accessors take a pointer to the peripheral
+     being accessed as an __iomem * parameter. For pointers mapped with the
+     default I/O attributes (e.g. those returned by ioremap()), then the
+     ordering guarantees are as follows:
 
-     The PCI bus, amongst others, defines an I/O space concept which - on such
-     CPUs as i386 and x86_64 - readily maps to the CPU's concept of I/O
-     space.  However, it may also be mapped as a virtual I/O space in the CPU's
-     memory map, particularly on those CPUs that don't support alternate I/O
-     spaces.
+     1. All readX() and writeX() accesses to the same peripheral are ordered
+        with respect to each other. For example, this ensures that MMIO register
+	writes by the CPU to a particular device will arrive in program order.
 
-     Accesses to this space may be fully synchronous (as on i386), but
-     intermediary bridges (such as the PCI host bridge) may not fully honour
-     that.
+     2. A writeX() by the CPU to the peripheral will first wait for the
+        completion of all prior CPU writes to memory. For example, this ensures
+        that writes by the CPU to an outbound DMA buffer allocated by
+        dma_alloc_coherent() will be visible to a DMA engine when the CPU writes
+        to its MMIO control register to trigger the transfer.
 
-     They are guaranteed to be fully ordered with respect to each other.
+     3. A readX() by the CPU from the peripheral will complete before any
+	subsequent CPU reads from memory can begin. For example, this ensures
+	that reads by the CPU from an incoming DMA buffer allocated by
+	dma_alloc_coherent() will not see stale data after reading from the DMA
+	engine's MMIO status register to establish that the DMA transfer has
+	completed.
 
-     They are not guaranteed to be fully ordered with respect to other types of
-     memory and I/O operation.
+     4. A readX() by the CPU from the peripheral will complete before any
+	subsequent delay() loop can begin execution. For example, this ensures
+	that two MMIO register writes by the CPU to a peripheral will arrive at
+	least 1us apart if the first write is immediately read back with readX()
+	and udelay(1) is called prior to the second writeX().
 
- (*) readX(), writeX():
+     __iomem pointers obtained with non-default attributes (e.g. those returned
+     by ioremap_wc()) are unlikely to provide many of these guarantees.
 
-     Whether these are guaranteed to be fully ordered and uncombined with
-     respect to each other on the issuing CPU depends on the characteristics
-     defined for the memory window through which they're accessing.  On later
-     i386 architecture machines, for example, this is controlled by way of the
-     MTRR registers.
+ (*) readX_relaxed(), writeX_relaxed():
 
-     Ordinarily, these will be guaranteed to be fully ordered and uncombined,
-     provided they're not accessing a prefetchable device.
+     These are similar to readX() and writeX(), but provide weaker memory
+     ordering guarantees. Specifically, they do not guarantee ordering with
+     respect to normal memory accesses or delay() loops (i.e bullets 2-4 above)
+     but they are still guaranteed to be ordered with respect to other accesses
+     to the same peripheral when operating on __iomem pointers mapped with the
+     default I/O attributes.
 
-     However, intermediary hardware (such as a PCI bridge) may indulge in
-     deferral if it so wishes; to flush a store, a load from the same location
-     is preferred[*], but a load from the same device or from configuration
-     space should suffice for PCI.
+ (*) readsX(), writesX():
 
-     [*] NOTE! attempting to load from the same location as was written to may
-	 cause a malfunction - consider the 16550 Rx/Tx serial registers for
-	 example.
+     The readsX() and writesX() MMIO accessors are designed for accessing
+     register-based, memory-mapped FIFOs residing on peripherals that are not
+     capable of performing DMA. Consequently, they provide only the ordering
+     guarantees of readX_relaxed() and writeX_relaxed(), as documented above.
 
-     Used with prefetchable I/O memory, an mmiowb() barrier may be required to
-     force stores to be ordered.
+ (*) inX(), outX():
 
-     Please refer to the PCI specification for more information on interactions
-     between PCI transactions.
+     The inX() and outX() accessors are intended to access legacy port-mapped
+     I/O peripherals, which may require special instructions on some
+     architectures (notably x86). The port number of the peripheral being
+     accessed is passed as an argument.
 
- (*) readX_relaxed(), writeX_relaxed()
+     Since many CPU architectures ultimately access these peripherals via an
+     internal virtual memory mapping, the portable ordering guarantees provided
+     by inX() and outX() are the same as those provided by readX() and writeX()
+     respectively when accessing a mapping with the default I/O attributes.
 
-     These are similar to readX() and writeX(), but provide weaker memory
-     ordering guarantees.  Specifically, they do not guarantee ordering with
-     respect to normal memory accesses (e.g. DMA buffers) nor do they guarantee
-     ordering with respect to LOCK or UNLOCK operations.  If the latter is
-     required, an mmiowb() barrier can be used.  Note that relaxed accesses to
-     the same peripheral are guaranteed to be ordered with respect to each
-     other.
+     Device drivers may expect outX() to emit a non-posted write transaction
+     that waits for a completion response from the I/O peripheral before
+     returning. This is not guaranteed by all architectures and is therefore
+     not part of the portable ordering semantics.
+
+ (*) insX(), outsX():
+
+     As above, the insX() and outsX() accessors provide the same ordering
+     guarantees as readsX() and writesX() respectively when accessing a mapping
+     with the default I/O attributes.
 
  (*) ioreadX(), iowriteX()
 
      These will perform appropriately for the type of access they're actually
      doing, be it inX()/outX() or readX()/writeX().
 
+All of these accessors assume that the underlying peripheral is little-endian,
+and will therefore perform byte-swapping operations on big-endian architectures.
+
+Composing I/O ordering barriers with SMP ordering barriers and LOCK/UNLOCK
+operations is a dangerous sport which may require the use of mmiowb(). See the
+subsection "Acquires vs I/O accesses" for more information.
 
 ========================================
 ASSUMED MINIMUM EXECUTION ORDERING MODEL
-- 
GitLab


From d1be6a28b13ce6d1bc42bf9b6a9454c65839225b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 12:48:44 +0000
Subject: [PATCH 0759/1861] asm-generic/mmiowb: Add generic implementation of
 mmiowb() tracking

In preparation for removing all explicit mmiowb() calls from driver
code, implement a tracking system in asm-generic based loosely on the
PowerPC implementation. This allows architectures with a non-empty
mmiowb() definition to have the barrier automatically inserted in
spin_unlock() following a critical section containing an I/O write.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/asm-generic/mmiowb.h       | 63 ++++++++++++++++++++++++++++++
 include/asm-generic/mmiowb_types.h | 12 ++++++
 kernel/Kconfig.locks               |  7 ++++
 kernel/locking/spinlock.c          |  7 ++++
 4 files changed, 89 insertions(+)
 create mode 100644 include/asm-generic/mmiowb.h
 create mode 100644 include/asm-generic/mmiowb_types.h

diff --git a/include/asm-generic/mmiowb.h b/include/asm-generic/mmiowb.h
new file mode 100644
index 0000000000000..9439ff037b2d1
--- /dev/null
+++ b/include/asm-generic/mmiowb.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_MMIOWB_H
+#define __ASM_GENERIC_MMIOWB_H
+
+/*
+ * Generic implementation of mmiowb() tracking for spinlocks.
+ *
+ * If your architecture doesn't ensure that writes to an I/O peripheral
+ * within two spinlocked sections on two different CPUs are seen by the
+ * peripheral in the order corresponding to the lock handover, then you
+ * need to follow these FIVE easy steps:
+ *
+ * 	1. Implement mmiowb() (and arch_mmiowb_state() if you're fancy)
+ *	   in asm/mmiowb.h, then #include this file
+ *	2. Ensure your I/O write accessors call mmiowb_set_pending()
+ *	3. Select ARCH_HAS_MMIOWB
+ *	4. Untangle the resulting mess of header files
+ *	5. Complain to your architects
+ */
+#ifdef CONFIG_MMIOWB
+
+#include <linux/compiler.h>
+#include <asm-generic/mmiowb_types.h>
+
+#ifndef arch_mmiowb_state
+#include <asm/percpu.h>
+#include <asm/smp.h>
+
+DECLARE_PER_CPU(struct mmiowb_state, __mmiowb_state);
+#define __mmiowb_state()	this_cpu_ptr(&__mmiowb_state)
+#else
+#define __mmiowb_state()	arch_mmiowb_state()
+#endif	/* arch_mmiowb_state */
+
+static inline void mmiowb_set_pending(void)
+{
+	struct mmiowb_state *ms = __mmiowb_state();
+	ms->mmiowb_pending = ms->nesting_count;
+}
+
+static inline void mmiowb_spin_lock(void)
+{
+	struct mmiowb_state *ms = __mmiowb_state();
+	ms->nesting_count++;
+}
+
+static inline void mmiowb_spin_unlock(void)
+{
+	struct mmiowb_state *ms = __mmiowb_state();
+
+	if (unlikely(ms->mmiowb_pending)) {
+		ms->mmiowb_pending = 0;
+		mmiowb();
+	}
+
+	ms->nesting_count--;
+}
+#else
+#define mmiowb_set_pending()		do { } while (0)
+#define mmiowb_spin_lock()		do { } while (0)
+#define mmiowb_spin_unlock()		do { } while (0)
+#endif	/* CONFIG_MMIOWB */
+#endif	/* __ASM_GENERIC_MMIOWB_H */
diff --git a/include/asm-generic/mmiowb_types.h b/include/asm-generic/mmiowb_types.h
new file mode 100644
index 0000000000000..8eb0095655e78
--- /dev/null
+++ b/include/asm-generic/mmiowb_types.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_MMIOWB_TYPES_H
+#define __ASM_GENERIC_MMIOWB_TYPES_H
+
+#include <linux/types.h>
+
+struct mmiowb_state {
+	u16	nesting_count;
+	u16	mmiowb_pending;
+};
+
+#endif	/* __ASM_GENERIC_MMIOWB_TYPES_H */
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index fbba478ae5229..6ba2570eddad1 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -251,3 +251,10 @@ config ARCH_USE_QUEUED_RWLOCKS
 config QUEUED_RWLOCKS
 	def_bool y if ARCH_USE_QUEUED_RWLOCKS
 	depends on SMP
+
+config ARCH_HAS_MMIOWB
+	bool
+
+config MMIOWB
+	def_bool y if ARCH_HAS_MMIOWB
+	depends on SMP
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 936f3d14dd6bf..0ff08380f5318 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -22,6 +22,13 @@
 #include <linux/debug_locks.h>
 #include <linux/export.h>
 
+#ifdef CONFIG_MMIOWB
+#ifndef arch_mmiowb_state
+DEFINE_PER_CPU(struct mmiowb_state, __mmiowb_state);
+EXPORT_PER_CPU_SYMBOL(__mmiowb_state);
+#endif
+#endif
+
 /*
  * If lockdep is enabled then we use the non-preemption spin-ops
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
-- 
GitLab


From fdcd06a8ab775cbe716ff893372bed580e4c8a1c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 12:49:41 +0000
Subject: [PATCH 0760/1861] arch: Use asm-generic header for asm/mmiowb.h

Hook up asm-generic/mmiowb.h to Kbuild for all architectures so that we
can subsequently include asm/mmiowb.h from core code.

Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/alpha/include/asm/Kbuild      | 1 +
 arch/arc/include/asm/Kbuild        | 1 +
 arch/arm/include/asm/Kbuild        | 1 +
 arch/arm64/include/asm/Kbuild      | 1 +
 arch/c6x/include/asm/Kbuild        | 1 +
 arch/csky/include/asm/Kbuild       | 1 +
 arch/h8300/include/asm/Kbuild      | 1 +
 arch/hexagon/include/asm/Kbuild    | 1 +
 arch/ia64/include/asm/Kbuild       | 1 +
 arch/m68k/include/asm/Kbuild       | 1 +
 arch/microblaze/include/asm/Kbuild | 1 +
 arch/mips/include/asm/Kbuild       | 1 +
 arch/nds32/include/asm/Kbuild      | 1 +
 arch/nios2/include/asm/Kbuild      | 1 +
 arch/openrisc/include/asm/Kbuild   | 1 +
 arch/parisc/include/asm/Kbuild     | 1 +
 arch/powerpc/include/asm/Kbuild    | 1 +
 arch/riscv/include/asm/Kbuild      | 1 +
 arch/s390/include/asm/Kbuild       | 1 +
 arch/sh/include/asm/Kbuild         | 1 +
 arch/sparc/include/asm/Kbuild      | 1 +
 arch/um/include/asm/Kbuild         | 1 +
 arch/unicore32/include/asm/Kbuild  | 1 +
 arch/x86/include/asm/Kbuild        | 1 +
 arch/xtensa/include/asm/Kbuild     | 1 +
 25 files changed, 25 insertions(+)

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 70b783333965e..89e87bbc987fa 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index decc306a3b52c..393d4f5e14503 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += msi.h
 generic-y += parport.h
 generic-y += percpu.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index a8a4eb7f6dae0..a3fc0a230a68a 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += kdebug.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += msi.h
 generic-y += parport.h
 generic-y += preempt.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 1e17ea5c372b2..3dae4fd028cf4 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += msi.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 249c9f6f26dce..6b168d32fbffe 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += mmu.h
 generic-y += mmu_context.h
 generic-y += pci.h
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 2a0abe8f2a352..95f4e550db8a1 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -28,6 +28,7 @@ generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += module.h
 generic-y += mutex.h
 generic-y += pci.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index e3dead402e5fb..123d8f54be4a5 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -29,6 +29,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += mmu.h
 generic-y += mmu_context.h
 generic-y += module.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index d046e8ccdf786..d53704d561e6a 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -24,6 +24,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 11f191689c9e8..cabfe0280c33d 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += preempt.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 2c359d9e80f63..0ddae4a74adb2 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += sections.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 1a8285c3f6939..17a8d0a62038b 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 87b86cdf126a9..bf39c2253ec8d 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += msi.h
 generic-y += parport.h
 generic-y += percpu.h
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index 64ceff7ab99b7..688b6ed262278 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -31,6 +31,7 @@ generic-y += limits.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += parport.h
 generic-y += pci.h
 generic-y += percpu.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 88a667d12aaa9..d7ef3512504a6 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -27,6 +27,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += module.h
 generic-y += pci.h
 generic-y += percpu.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 22aa97136c019..1919cc5e0f11d 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -24,6 +24,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += module.h
 generic-y += pci.h
 generic-y += percpu.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 9bcd0c903dbbe..b8c7db777144c 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += seccomp.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a0c132bedfae8..74b6605ca55fc 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += export.h
 generic-y += irq_regs.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mmiowb.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += vtime.h
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index cccd12cf27d44..221cd2ec78a4a 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += mutex.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 12d77cb11fe5a..bdc4f06a04c50 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -20,6 +20,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += rwsem.h
 generic-y += trace_clock.h
 generic-y += unaligned.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 7bf2cb680d328..162c9054561f5 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -14,6 +14,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a22cfd5c0ee86..468440db66575 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += module.h
 generic-y += msi.h
 generic-y += preempt.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 00bcbe2326d9e..b506ad06aefc8 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index d77d953c04c1c..b301a0b3c0b2b 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += module.h
 generic-y += parport.h
 generic-y += percpu.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a0ab9ab61c754..eebd05942e6ca 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -11,3 +11,4 @@ generic-y += early_ioremap.h
 generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 3843198e03d4b..794e461785e16 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -20,6 +20,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
 generic-y += param.h
 generic-y += percpu.h
 generic-y += preempt.h
-- 
GitLab


From 60ca1e5a200cd294a12907fa36dece4241db4ab8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 12:59:59 +0000
Subject: [PATCH 0761/1861] mmiowb: Hook up mmiowb helpers to spinlocks and
 generic I/O accessors

Removing explicit calls to mmiowb() from driver code means that we must
now call into the generic mmiowb_spin_{lock,unlock}() functions from the
core spinlock code. In order to elide barriers following critical
sections without any I/O writes, we also hook into the asm-generic I/O
routines.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/asm-generic/io.h        |  3 ++-
 include/linux/spinlock.h        | 11 ++++++++++-
 kernel/locking/spinlock_debug.c |  6 +++++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 303871651f8aa..bc490a7466021 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -19,6 +19,7 @@
 #include <asm-generic/iomap.h>
 #endif
 
+#include <asm/mmiowb.h>
 #include <asm-generic/pci_iomap.h>
 
 #ifndef mmiowb
@@ -49,7 +50,7 @@
 
 /* serialize device access against a spin_unlock, usually handled there. */
 #ifndef __io_aw
-#define __io_aw()      barrier()
+#define __io_aw()      mmiowb_set_pending()
 #endif
 
 #ifndef __io_pbw
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index e089157dcf97c..ed7c4d6b8235f 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -57,6 +57,7 @@
 #include <linux/stringify.h>
 #include <linux/bottom_half.h>
 #include <asm/barrier.h>
+#include <asm/mmiowb.h>
 
 
 /*
@@ -178,6 +179,7 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
 {
 	__acquire(lock);
 	arch_spin_lock(&lock->raw_lock);
+	mmiowb_spin_lock();
 }
 
 #ifndef arch_spin_lock_flags
@@ -189,15 +191,22 @@ do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lo
 {
 	__acquire(lock);
 	arch_spin_lock_flags(&lock->raw_lock, *flags);
+	mmiowb_spin_lock();
 }
 
 static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
 {
-	return arch_spin_trylock(&(lock)->raw_lock);
+	int ret = arch_spin_trylock(&(lock)->raw_lock);
+
+	if (ret)
+		mmiowb_spin_lock();
+
+	return ret;
 }
 
 static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 {
+	mmiowb_spin_unlock();
 	arch_spin_unlock(&lock->raw_lock);
 	__release(lock);
 }
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 9aa0fccd5d432..399669f7eba8e 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -111,6 +111,7 @@ void do_raw_spin_lock(raw_spinlock_t *lock)
 {
 	debug_spin_lock_before(lock);
 	arch_spin_lock(&lock->raw_lock);
+	mmiowb_spin_lock();
 	debug_spin_lock_after(lock);
 }
 
@@ -118,8 +119,10 @@ int do_raw_spin_trylock(raw_spinlock_t *lock)
 {
 	int ret = arch_spin_trylock(&lock->raw_lock);
 
-	if (ret)
+	if (ret) {
+		mmiowb_spin_lock();
 		debug_spin_lock_after(lock);
+	}
 #ifndef CONFIG_SMP
 	/*
 	 * Must not happen on UP:
@@ -131,6 +134,7 @@ int do_raw_spin_trylock(raw_spinlock_t *lock)
 
 void do_raw_spin_unlock(raw_spinlock_t *lock)
 {
+	mmiowb_spin_unlock();
 	debug_spin_unlock(lock);
 	arch_spin_unlock(&lock->raw_lock);
 }
-- 
GitLab


From 7fdae81dd415b14b601952163f7c09c67e9f4e81 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 13:03:18 +0000
Subject: [PATCH 0762/1861] ARM/io: Remove useless definition of mmiowb()

ARM includes asm-generic/io.h, which provides a dummy definition of
mmiowb() if one isn't already provided by the architecture.

Remove the useless definition.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/io.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 6b51826ab3d10..7e22c81398c42 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -281,8 +281,6 @@ extern void _memcpy_fromio(void *, const volatile void __iomem *, size_t);
 extern void _memcpy_toio(volatile void __iomem *, const void *, size_t);
 extern void _memset_io(volatile void __iomem *, int, size_t);
 
-#define mmiowb()
-
 /*
  *  Memory access primitives
  *  ------------------------
-- 
GitLab


From d51575621f0fd6c6dd62f7b29a0309425681b813 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 13:03:18 +0000
Subject: [PATCH 0763/1861] arm64/io: Remove useless definition of mmiowb()

arm64 includes asm-generic/io.h, which provides a dummy definition of
mmiowb() if one isn't already provided by the architecture.

Remove the useless definition.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/io.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 8bb7210ac286c..b807cb9b517d3 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -124,8 +124,6 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 #define __io_par(v)		__iormb(v)
 #define __iowmb()		wmb()
 
-#define mmiowb()		do { } while (0)
-
 /*
  * Relaxed I/O memory access primitives. These follow the Device memory
  * ordering rules but do not guarantee any ordering relative to Normal memory
-- 
GitLab


From 08f1f3a72f4cea136686585b81a251baa3539f12 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 13:03:18 +0000
Subject: [PATCH 0764/1861] x86/io: Remove useless definition of mmiowb()

x86 maps mmiowb() to barrier(), but this is superfluous because a
compiler barrier is already implied by spin_unlock(). Since x86 also
includes asm-generic/io.h in its asm/io.h file, remove the definition
entirely and pick up the dummy definition from core code.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/x86/include/asm/io.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 686247db3106f..a06a9f8294ea5 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -90,8 +90,6 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
 #define __raw_writew __writew
 #define __raw_writel __writel
 
-#define mmiowb() barrier()
-
 #ifdef CONFIG_X86_64
 
 build_mmio_read(readq, "q", u64, "=r", :"memory")
-- 
GitLab


From 335b5c638bfd72b959b22c80f65dea3f614b6cca Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 13:07:37 +0000
Subject: [PATCH 0765/1861] nds32/io: Remove useless definition of mmiowb()

mmiowb() only makes sense for SMP platforms, so remove it entirely for
nds32.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/nds32/include/asm/io.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h
index 71cd226d6863e..5ef8ae5ba8336 100644
--- a/arch/nds32/include/asm/io.h
+++ b/arch/nds32/include/asm/io.h
@@ -55,8 +55,6 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
 #define __iormb()               rmb()
 #define __iowmb()               wmb()
 
-#define mmiowb()        __asm__ __volatile__ ("msync all" : : : "memory");
-
 /*
  * {read,write}{b,w,l,q}_relaxed() are like the regular version, but
  * are not guaranteed to provide ordering against spinlocks or memory
-- 
GitLab


From 0f43ca692dcb55108ea9a59c11a1a0e359dba367 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 13:03:18 +0000
Subject: [PATCH 0766/1861] m68k/io: Remove useless definition of mmiowb()

m68k includes asm-generic/io.h, which provides a dummy definition of
mmiowb() if one isn't already provided by the architecture.

Remove the useless definition.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/m68k/include/asm/io_mm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index 782b78f8a0489..6c03ca5bc4365 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -377,8 +377,6 @@ static inline void isa_delay(void)
 #define writesw(port, buf, nr)    raw_outsw((port), (u16 *)(buf), (nr))
 #define writesl(port, buf, nr)    raw_outsl((port), (u32 *)(buf), (nr))
 
-#define mmiowb()
-
 #ifndef CONFIG_SUN3
 #define IO_SPACE_LIMIT 0xffff
 #else
-- 
GitLab


From e9e8543fecd2e1ca53616ba82fbd55a25cd2ab8a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 13:37:21 +0000
Subject: [PATCH 0767/1861] sh/mmiowb: Add unconditional mmiowb() to
 arch_spin_unlock()

The mmiowb() macro is horribly difficult to use and drivers will continue
to work most of the time if they omit a call when it is required.

Rather than rely on driver authors getting this right, push mmiowb() into
arch_spin_unlock() for sh. If this is deemed to be a performance issue,
a subsequent optimisation could make use of ARCH_HAS_MMIOWB to elide
the barrier in cases where no I/O writes were performed inside the
critical section.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/sh/include/asm/Kbuild          |  1 -
 arch/sh/include/asm/io.h            |  3 ---
 arch/sh/include/asm/mmiowb.h        | 12 ++++++++++++
 arch/sh/include/asm/spinlock-llsc.h |  2 ++
 4 files changed, 14 insertions(+), 4 deletions(-)
 create mode 100644 arch/sh/include/asm/mmiowb.h

diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 162c9054561f5..7bf2cb680d328 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 4f7f235f15f85..c28e37a344adc 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -229,9 +229,6 @@ __BUILD_IOPORT_STRING(q, u64)
 
 #define IO_SPACE_LIMIT 0xffffffff
 
-/* synco on SH-4A, otherwise a nop */
-#define mmiowb()		wmb()
-
 /* We really want to try and get these to memcpy etc */
 void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
 void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
diff --git a/arch/sh/include/asm/mmiowb.h b/arch/sh/include/asm/mmiowb.h
new file mode 100644
index 0000000000000..535d59735f1d8
--- /dev/null
+++ b/arch/sh/include/asm/mmiowb.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SH_MMIOWB_H
+#define __ASM_SH_MMIOWB_H
+
+#include <asm/barrier.h>
+
+/* synco on SH-4A, otherwise a nop */
+#define mmiowb()			wmb()
+
+#include <asm-generic/mmiowb.h>
+
+#endif	/* __ASM_SH_MMIOWB_H */
diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h
index 786ee0fde3b01..7fd929cd2e7a0 100644
--- a/arch/sh/include/asm/spinlock-llsc.h
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -47,6 +47,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
 
+	/* This could be optimised with ARCH_HAS_MMIOWB */
+	mmiowb();
 	__asm__ __volatile__ (
 		"mov		#1, %0 ! arch_spin_unlock	\n\t"
 		"mov.l		%0, @%1				\n\t"
-- 
GitLab


From 346e91ee090b07da8d15e36bc3169ddea6968713 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 13:37:21 +0000
Subject: [PATCH 0768/1861] mips/mmiowb: Add unconditional mmiowb() to
 arch_spin_unlock()

The mmiowb() macro is horribly difficult to use and drivers will continue
to work most of the time if they omit a call when it is required.

Rather than rely on driver authors getting this right, push mmiowb() into
arch_spin_unlock() for mips. If this is deemed to be a performance issue,
a subsequent optimisation could make use of ARCH_HAS_MMIOWB to elide
the barrier in cases where no I/O writes were performed inside the
critical section.

Acked-by: Paul Burton <paul.burton@mips.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/mips/include/asm/Kbuild     |  1 -
 arch/mips/include/asm/io.h       |  3 ---
 arch/mips/include/asm/mmiowb.h   | 11 +++++++++++
 arch/mips/include/asm/spinlock.h | 15 +++++++++++++++
 4 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 arch/mips/include/asm/mmiowb.h

diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index bf39c2253ec8d..87b86cdf126a9 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
 generic-y += msi.h
 generic-y += parport.h
 generic-y += percpu.h
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 845fbbc7a2e34..29997e42480e9 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -102,9 +102,6 @@ static inline void set_io_port_base(unsigned long base)
 #define iobarrier_w() wmb()
 #define iobarrier_sync() iob()
 
-/* Some callers use this older API instead.  */
-#define mmiowb() iobarrier_w()
-
 /*
  *     virt_to_phys    -       map virtual addresses to physical
  *     @address: address to remap
diff --git a/arch/mips/include/asm/mmiowb.h b/arch/mips/include/asm/mmiowb.h
new file mode 100644
index 0000000000000..a40824e3ef8e3
--- /dev/null
+++ b/arch/mips/include/asm/mmiowb.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_MMIOWB_H
+#define _ASM_MMIOWB_H
+
+#include <asm/io.h>
+
+#define mmiowb()	iobarrier_w()
+
+#include <asm-generic/mmiowb.h>
+
+#endif	/* _ASM_MMIOWB_H */
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index ee81297d9117e..8a88eb2655160 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -11,6 +11,21 @@
 
 #include <asm/processor.h>
 #include <asm/qrwlock.h>
+
+#include <asm-generic/qspinlock_types.h>
+
+#define	queued_spin_unlock queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	/* This could be optimised with ARCH_HAS_MMIOWB */
+	mmiowb();
+	smp_store_release(&lock->locked, 0);
+}
+
 #include <asm/qspinlock.h>
 
 #endif /* _ASM_SPINLOCK_H */
-- 
GitLab


From 49ca6462fc9e0f5a67cd96eeddd844efc3fb33b9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 13:37:21 +0000
Subject: [PATCH 0769/1861] ia64/mmiowb: Add unconditional mmiowb() to
 arch_spin_unlock()

The mmiowb() macro is horribly difficult to use and drivers will continue
to work most of the time if they omit a call when it is required.

Rather than rely on driver authors getting this right, push mmiowb() into
arch_spin_unlock() for ia64. If this is deemed to be a performance issue,
a subsequent optimisation could make use of ARCH_HAS_MMIOWB to elide
the barrier in cases where no I/O writes were performed inside the
critical section.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/ia64/include/asm/Kbuild     |  1 -
 arch/ia64/include/asm/io.h       | 17 -----------------
 arch/ia64/include/asm/mmiowb.h   | 25 +++++++++++++++++++++++++
 arch/ia64/include/asm/spinlock.h |  2 ++
 4 files changed, 27 insertions(+), 18 deletions(-)
 create mode 100644 arch/ia64/include/asm/mmiowb.h

diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index cabfe0280c33d..11f191689c9e8 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,7 +5,6 @@ generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
 generic-y += preempt.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 1e6fef69bb01c..a511d62d447ac 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -113,20 +113,6 @@ extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count);
  */
 #define __ia64_mf_a()	ia64_mfa()
 
-/**
- * ___ia64_mmiowb - I/O write barrier
- *
- * Ensure ordering of I/O space writes.  This will make sure that writes
- * following the barrier will arrive after all previous writes.  For most
- * ia64 platforms, this is a simple 'mf.a' instruction.
- *
- * See Documentation/driver-api/device-io.rst for more information.
- */
-static inline void ___ia64_mmiowb(void)
-{
-	ia64_mfa();
-}
-
 static inline void*
 __ia64_mk_io_addr (unsigned long port)
 {
@@ -161,7 +147,6 @@ __ia64_mk_io_addr (unsigned long port)
 #define __ia64_writew	___ia64_writew
 #define __ia64_writel	___ia64_writel
 #define __ia64_writeq	___ia64_writeq
-#define __ia64_mmiowb	___ia64_mmiowb
 
 /*
  * For the in/out routines, we need to do "mf.a" _after_ doing the I/O access to ensure
@@ -296,7 +281,6 @@ __outsl (unsigned long port, const void *src, unsigned long count)
 #define __outb		platform_outb
 #define __outw		platform_outw
 #define __outl		platform_outl
-#define __mmiowb	platform_mmiowb
 
 #define inb(p)		__inb(p)
 #define inw(p)		__inw(p)
@@ -310,7 +294,6 @@ __outsl (unsigned long port, const void *src, unsigned long count)
 #define outsb(p,s,c)	__outsb(p,s,c)
 #define outsw(p,s,c)	__outsw(p,s,c)
 #define outsl(p,s,c)	__outsl(p,s,c)
-#define mmiowb()	__mmiowb()
 
 /*
  * The address passed to these functions are ioremap()ped already.
diff --git a/arch/ia64/include/asm/mmiowb.h b/arch/ia64/include/asm/mmiowb.h
new file mode 100644
index 0000000000000..297b85ac84a01
--- /dev/null
+++ b/arch/ia64/include/asm/mmiowb.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_IA64_MMIOWB_H
+#define _ASM_IA64_MMIOWB_H
+
+#include <asm/machvec.h>
+
+/**
+ * ___ia64_mmiowb - I/O write barrier
+ *
+ * Ensure ordering of I/O space writes.  This will make sure that writes
+ * following the barrier will arrive after all previous writes.  For most
+ * ia64 platforms, this is a simple 'mf.a' instruction.
+ */
+static inline void ___ia64_mmiowb(void)
+{
+	ia64_mfa();
+}
+
+#define __ia64_mmiowb	___ia64_mmiowb
+#define mmiowb()	platform_mmiowb()
+
+#include <asm-generic/mmiowb.h>
+
+#endif	/* _ASM_IA64_MMIOWB_H */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index afd0b3121b4c9..5f620e66384e3 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -73,6 +73,8 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
 	unsigned short	*p = (unsigned short *)&lock->lock + 1, tmp;
 
+	/* This could be optimised with ARCH_HAS_MMIOWB */
+	mmiowb();
 	asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
 	WRITE_ONCE(*p, (tmp + 2) & ~1);
 }
-- 
GitLab


From 420af1554790a95e6813f56f63b6d2361614082b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 14:45:42 +0000
Subject: [PATCH 0770/1861] powerpc/mmiowb: Hook up mmwiob() implementation to
 asm-generic code

In a bid to kill off explicit mmiowb() usage in driver code, hook up
the asm-generic mmiowb() tracking code but provide a definition of
arch_mmiowb_state() so that the tracking data can remain in the paca
as it does at present

This replaces the existing (flawed) implementation.

Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/powerpc/Kconfig                |  1 +
 arch/powerpc/include/asm/Kbuild     |  1 -
 arch/powerpc/include/asm/io.h       | 33 +++--------------------------
 arch/powerpc/include/asm/mmiowb.h   | 20 +++++++++++++++++
 arch/powerpc/include/asm/paca.h     |  6 +++++-
 arch/powerpc/include/asm/spinlock.h | 17 ---------------
 arch/powerpc/xmon/xmon.c            |  5 ++++-
 7 files changed, 33 insertions(+), 50 deletions(-)
 create mode 100644 arch/powerpc/include/asm/mmiowb.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d0be82c30619..5e3d0853c31d2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -132,6 +132,7 @@ config PPC
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV
+	select ARCH_HAS_MMIOWB			if PPC64
 	select ARCH_HAS_PHYS_TO_DMA
 	select ARCH_HAS_PMEM_API                if PPC64
 	select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 74b6605ca55fc..a0c132bedfae8 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -7,7 +7,6 @@ generic-y += export.h
 generic-y += irq_regs.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
-generic-y += mmiowb.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += vtime.h
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 4b73847e9b955..1fad67b464092 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -34,14 +34,11 @@ extern struct pci_dev *isa_bridge_pcidev;
 #include <asm/byteorder.h>
 #include <asm/synch.h>
 #include <asm/delay.h>
+#include <asm/mmiowb.h>
 #include <asm/mmu.h>
 #include <asm/ppc_asm.h>
 #include <asm/pgtable.h>
 
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-#endif
-
 #define SIO_CONFIG_RA	0x398
 #define SIO_CONFIG_RD	0x399
 
@@ -107,12 +104,6 @@ extern bool isa_io_special;
  *
  */
 
-#ifdef CONFIG_PPC64
-#define IO_SET_SYNC_FLAG()	do { local_paca->io_sync = 1; } while(0)
-#else
-#define IO_SET_SYNC_FLAG()
-#endif
-
 #define DEF_MMIO_IN_X(name, size, insn)				\
 static inline u##size name(const volatile u##size __iomem *addr)	\
 {									\
@@ -127,7 +118,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val)	\
 {									\
 	__asm__ __volatile__("sync;"#insn" %1,%y0"			\
 		: "=Z" (*addr) : "r" (val) : "memory");			\
-	IO_SET_SYNC_FLAG();						\
+	mmiowb_set_pending();						\
 }
 
 #define DEF_MMIO_IN_D(name, size, insn)				\
@@ -144,7 +135,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val)	\
 {									\
 	__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0"			\
 		: "=m" (*addr) : "r" (val) : "memory");			\
-	IO_SET_SYNC_FLAG();						\
+	mmiowb_set_pending();						\
 }
 
 DEF_MMIO_IN_D(in_8,     8, lbz);
@@ -652,24 +643,6 @@ static inline void name at					\
 
 #include <asm-generic/iomap.h>
 
-#ifdef CONFIG_PPC32
-#define mmiowb()
-#else
-/*
- * Enforce synchronisation of stores vs. spin_unlock
- * (this does it explicitly, though our implementation of spin_unlock
- * does it implicitely too)
- */
-static inline void mmiowb(void)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__("sync; li %0,0; stb %0,%1(13)"
-	: "=&r" (tmp) : "i" (offsetof(struct paca_struct, io_sync))
-	: "memory");
-}
-#endif /* !CONFIG_PPC32 */
-
 static inline void iosync(void)
 {
         __asm__ __volatile__ ("sync" : : : "memory");
diff --git a/arch/powerpc/include/asm/mmiowb.h b/arch/powerpc/include/asm/mmiowb.h
new file mode 100644
index 0000000000000..b10180613507c
--- /dev/null
+++ b/arch/powerpc/include/asm/mmiowb.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMIOWB_H
+#define _ASM_POWERPC_MMIOWB_H
+
+#ifdef CONFIG_MMIOWB
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <asm/paca.h>
+
+#define arch_mmiowb_state()	(&local_paca->mmiowb_state)
+#define mmiowb()		mb()
+
+#else
+#define mmiowb()		do { } while (0)
+#endif /* CONFIG_MMIOWB */
+
+#include <asm-generic/mmiowb.h>
+
+#endif	/* _ASM_POWERPC_MMIOWB_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e843bc5d1a0f2..134e912d403fb 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -34,6 +34,8 @@
 #include <asm/cpuidle.h>
 #include <asm/atomic.h>
 
+#include <asm-generic/mmiowb_types.h>
+
 register struct paca_struct *local_paca asm("r13");
 
 #if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_SMP)
@@ -171,7 +173,6 @@ struct paca_struct {
 	u16 trap_save;			/* Used when bad stack is encountered */
 	u8 irq_soft_mask;		/* mask for irq soft masking */
 	u8 irq_happened;		/* irq happened while soft-disabled */
-	u8 io_sync;			/* writel() needs spin_unlock sync */
 	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
 	u8 nap_state_lost;		/* NV GPR values lost in power7_idle */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -264,6 +265,9 @@ struct paca_struct {
 #ifdef CONFIG_STACKPROTECTOR
 	unsigned long canary;
 #endif
+#ifdef CONFIG_MMIOWB
+	struct mmiowb_state mmiowb_state;
+#endif
 } ____cacheline_aligned;
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 685c72310f5d0..15b39c407c4e9 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -39,19 +39,6 @@
 #define LOCK_TOKEN	1
 #endif
 
-#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
-#define CLEAR_IO_SYNC	(get_paca()->io_sync = 0)
-#define SYNC_IO		do {						\
-				if (unlikely(get_paca()->io_sync)) {	\
-					mb();				\
-					get_paca()->io_sync = 0;	\
-				}					\
-			} while (0)
-#else
-#define CLEAR_IO_SYNC
-#define SYNC_IO
-#endif
-
 #ifdef CONFIG_PPC_PSERIES
 #define vcpu_is_preempted vcpu_is_preempted
 static inline bool vcpu_is_preempted(int cpu)
@@ -99,7 +86,6 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
-	CLEAR_IO_SYNC;
 	return __arch_spin_trylock(lock) == 0;
 }
 
@@ -130,7 +116,6 @@ extern void __rw_yield(arch_rwlock_t *lock);
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-	CLEAR_IO_SYNC;
 	while (1) {
 		if (likely(__arch_spin_trylock(lock) == 0))
 			break;
@@ -148,7 +133,6 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	unsigned long flags_dis;
 
-	CLEAR_IO_SYNC;
 	while (1) {
 		if (likely(__arch_spin_trylock(lock) == 0))
 			break;
@@ -167,7 +151,6 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	SYNC_IO;
 	__asm__ __volatile__("# arch_spin_unlock\n\t"
 				PPC_RELEASE_BARRIER: : :"memory");
 	lock->slock = 0;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a0f44f9923608..13c6a47e61505 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2429,7 +2429,10 @@ static void dump_one_paca(int cpu)
 	DUMP(p, trap_save, "%#-*x");
 	DUMP(p, irq_soft_mask, "%#-*x");
 	DUMP(p, irq_happened, "%#-*x");
-	DUMP(p, io_sync, "%#-*x");
+#ifdef CONFIG_MMIOWB
+	DUMP(p, mmiowb_state.nesting_count, "%#-*x");
+	DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x");
+#endif
 	DUMP(p, irq_work_pending, "%#-*x");
 	DUMP(p, nap_state_lost, "%#-*x");
 	DUMP(p, sprg_vdso, "%#-*llx");
-- 
GitLab


From b012980d1c6e27f5c4adf0c19defca8658956820 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 14:45:42 +0000
Subject: [PATCH 0771/1861] riscv/mmiowb: Hook up mmwiob() implementation to
 asm-generic code

In a bid to kill off explicit mmiowb() usage in driver code, hook up
the asm-generic mmiowb() tracking code for riscv, so that an mmiowb()
is automatically issued from spin_unlock() if an I/O write was performed
in the critical section.

Reviewed-by: Palmer Dabbelt <palmer@sifive.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/riscv/Kconfig              |  1 +
 arch/riscv/include/asm/Kbuild   |  1 -
 arch/riscv/include/asm/io.h     | 15 ++-------------
 arch/riscv/include/asm/mmiowb.h | 14 ++++++++++++++
 4 files changed, 17 insertions(+), 14 deletions(-)
 create mode 100644 arch/riscv/include/asm/mmiowb.h

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index eb56c82d8aa14..6e30e8126799f 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -48,6 +48,7 @@ config RISCV
 	select RISCV_TIMER
 	select GENERIC_IRQ_MULTI_HANDLER
 	select ARCH_HAS_PTE_SPECIAL
+	select ARCH_HAS_MMIOWB
 	select HAVE_EBPF_JIT if 64BIT
 
 config MMU
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 221cd2ec78a4a..cccd12cf27d44 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -21,7 +21,6 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
 generic-y += mutex.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 1d9c1376dc642..744fd92e77bc7 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -20,6 +20,7 @@
 #define _ASM_RISCV_IO_H
 
 #include <linux/types.h>
+#include <asm/mmiowb.h>
 
 extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
@@ -99,18 +100,6 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 }
 #endif
 
-/*
- * FIXME: I'm flip-flopping on whether or not we should keep this or enforce
- * the ordering with I/O on spinlocks like PowerPC does.  The worry is that
- * drivers won't get this correct, but I also don't want to introduce a fence
- * into the lock code that otherwise only uses AMOs (and is essentially defined
- * by the ISA to be correct).   For now I'm leaving this here: "o,w" is
- * sufficient to ensure that all writes to the device have completed before the
- * write to the spinlock is allowed to commit.  I surmised this from reading
- * "ACQUIRES VS I/O ACCESSES" in memory-barriers.txt.
- */
-#define mmiowb()	__asm__ __volatile__ ("fence o,w" : : : "memory");
-
 /*
  * Unordered I/O memory access primitives.  These are even more relaxed than
  * the relaxed versions, as they don't even order accesses between successive
@@ -165,7 +154,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 #define __io_br()	do {} while (0)
 #define __io_ar(v)	__asm__ __volatile__ ("fence i,r" : : : "memory");
 #define __io_bw()	__asm__ __volatile__ ("fence w,o" : : : "memory");
-#define __io_aw()	do {} while (0)
+#define __io_aw()	mmiowb_set_pending()
 
 #define readb(c)	({ u8  __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
 #define readw(c)	({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(__v); __v; })
diff --git a/arch/riscv/include/asm/mmiowb.h b/arch/riscv/include/asm/mmiowb.h
new file mode 100644
index 0000000000000..5d7e3a2b4e3b2
--- /dev/null
+++ b/arch/riscv/include/asm/mmiowb.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_MMIOWB_H
+#define _ASM_RISCV_MMIOWB_H
+
+/*
+ * "o,w" is sufficient to ensure that all writes to the device have completed
+ * before the write to the spinlock is allowed to commit.
+ */
+#define mmiowb()	__asm__ __volatile__ ("fence o,w" : : : "memory");
+
+#include <asm-generic/mmiowb.h>
+
+#endif	/* ASM_RISCV_MMIOWB_H */
-- 
GitLab


From 915530396c788d75c40f200edd67b56ac363c728 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 16:17:54 +0000
Subject: [PATCH 0772/1861] Documentation: Kill all references to mmiowb()

The guarantees provided by mmiowb() are now provided implicitly by
spin_unlock(), so remove all references to this most confusing of
barriers from our Documentation.

Good riddance.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/driver-api/device-io.rst  |  45 -----------
 Documentation/driver-api/pci/p2pdma.rst |   4 -
 Documentation/memory-barriers.txt       | 103 +-----------------------
 3 files changed, 4 insertions(+), 148 deletions(-)

diff --git a/Documentation/driver-api/device-io.rst b/Documentation/driver-api/device-io.rst
index b00b239030788..0e389378f71d5 100644
--- a/Documentation/driver-api/device-io.rst
+++ b/Documentation/driver-api/device-io.rst
@@ -103,51 +103,6 @@ continuing execution::
         ha->flags.ints_enabled = 0;
     }
 
-In addition to write posting, on some large multiprocessing systems
-(e.g. SGI Challenge, Origin and Altix machines) posted writes won't be
-strongly ordered coming from different CPUs. Thus it's important to
-properly protect parts of your driver that do memory-mapped writes with
-locks and use the :c:func:`mmiowb()` to make sure they arrive in the
-order intended. Issuing a regular readX() will also ensure write ordering,
-but should only be used when the 
-driver has to be sure that the write has actually arrived at the device
-(not that it's simply ordered with respect to other writes), since a
-full readX() is a relatively expensive operation.
-
-Generally, one should use :c:func:`mmiowb()` prior to releasing a spinlock
-that protects regions using :c:func:`writeb()` or similar functions that
-aren't surrounded by readb() calls, which will ensure ordering
-and flushing. The following pseudocode illustrates what might occur if
-write ordering isn't guaranteed via :c:func:`mmiowb()` or one of the
-readX() functions::
-
-    CPU A:  spin_lock_irqsave(&dev_lock, flags)
-    CPU A:  ...
-    CPU A:  writel(newval, ring_ptr);
-    CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
-            ...
-    CPU B:  spin_lock_irqsave(&dev_lock, flags)
-    CPU B:  writel(newval2, ring_ptr);
-    CPU B:  ...
-    CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
-
-In the case above, newval2 could be written to ring_ptr before newval.
-Fixing it is easy though::
-
-    CPU A:  spin_lock_irqsave(&dev_lock, flags)
-    CPU A:  ...
-    CPU A:  writel(newval, ring_ptr);
-    CPU A:  mmiowb(); /* ensure no other writes beat us to the device */
-    CPU A:  spin_unlock_irqrestore(&dev_lock, flags)
-            ...
-    CPU B:  spin_lock_irqsave(&dev_lock, flags)
-    CPU B:  writel(newval2, ring_ptr);
-    CPU B:  ...
-    CPU B:  mmiowb();
-    CPU B:  spin_unlock_irqrestore(&dev_lock, flags)
-
-See tg3.c for a real world example of how to use :c:func:`mmiowb()`
-
 PCI ordering rules also guarantee that PIO read responses arrive after any
 outstanding DMA writes from that bus, since for some devices the result of
 a readb() call may signal to the driver that a DMA transaction is
diff --git a/Documentation/driver-api/pci/p2pdma.rst b/Documentation/driver-api/pci/p2pdma.rst
index 6d85b5a2598db..44deb52beeb47 100644
--- a/Documentation/driver-api/pci/p2pdma.rst
+++ b/Documentation/driver-api/pci/p2pdma.rst
@@ -132,10 +132,6 @@ precludes passing these pages to userspace.
 P2P memory is also technically IO memory but should never have any side
 effects behind it. Thus, the order of loads and stores should not be important
 and ioreadX(), iowriteX() and friends should not be necessary.
-However, as the memory is not cache coherent, if access ever needs to
-be protected by a spinlock then :c:func:`mmiowb()` must be used before
-unlocking the lock. (See ACQUIRES VS I/O ACCESSES in
-Documentation/memory-barriers.txt)
 
 
 P2P DMA Support Library
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 5eb6f4c6a1333..3522f0cc772f9 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1937,21 +1937,6 @@ There are some more advanced barrier functions:
      information on consistent memory.
 
 
-MMIO WRITE BARRIER
-------------------
-
-The Linux kernel also has a special barrier for use with memory-mapped I/O
-writes:
-
-	mmiowb();
-
-This is a variation on the mandatory write barrier that causes writes to weakly
-ordered I/O regions to be partially ordered.  Its effects may go beyond the
-CPU->Hardware interface and actually affect the hardware at some level.
-
-See the subsection "Acquires vs I/O accesses" for more information.
-
-
 ===============================
 IMPLICIT KERNEL MEMORY BARRIERS
 ===============================
@@ -2317,75 +2302,6 @@ But it won't see any of:
 	*E, *F or *G following RELEASE Q
 
 
-
-ACQUIRES VS I/O ACCESSES
-------------------------
-
-Under certain circumstances (especially involving NUMA), I/O accesses within
-two spinlocked sections on two different CPUs may be seen as interleaved by the
-PCI bridge, because the PCI bridge does not necessarily participate in the
-cache-coherence protocol, and is therefore incapable of issuing the required
-read memory barriers.
-
-For example:
-
-	CPU 1				CPU 2
-	===============================	===============================
-	spin_lock(Q)
-	writel(0, ADDR)
-	writel(1, DATA);
-	spin_unlock(Q);
-					spin_lock(Q);
-					writel(4, ADDR);
-					writel(5, DATA);
-					spin_unlock(Q);
-
-may be seen by the PCI bridge as follows:
-
-	STORE *ADDR = 0, STORE *ADDR = 4, STORE *DATA = 1, STORE *DATA = 5
-
-which would probably cause the hardware to malfunction.
-
-
-What is necessary here is to intervene with an mmiowb() before dropping the
-spinlock, for example:
-
-	CPU 1				CPU 2
-	===============================	===============================
-	spin_lock(Q)
-	writel(0, ADDR)
-	writel(1, DATA);
-	mmiowb();
-	spin_unlock(Q);
-					spin_lock(Q);
-					writel(4, ADDR);
-					writel(5, DATA);
-					mmiowb();
-					spin_unlock(Q);
-
-this will ensure that the two stores issued on CPU 1 appear at the PCI bridge
-before either of the stores issued on CPU 2.
-
-
-Furthermore, following a store by a load from the same device obviates the need
-for the mmiowb(), because the load forces the store to complete before the load
-is performed:
-
-	CPU 1				CPU 2
-	===============================	===============================
-	spin_lock(Q)
-	writel(0, ADDR)
-	a = readl(DATA);
-	spin_unlock(Q);
-					spin_lock(Q);
-					writel(4, ADDR);
-					b = readl(DATA);
-					spin_unlock(Q);
-
-
-See Documentation/driver-api/device-io.rst for more information.
-
-
 =================================
 WHERE ARE MEMORY BARRIERS NEEDED?
 =================================
@@ -2532,16 +2448,9 @@ the device to malfunction.
 Inside of the Linux kernel, I/O should be done through the appropriate accessor
 routines - such as inb() or writel() - which know how to make such accesses
 appropriately sequential.  While this, for the most part, renders the explicit
-use of memory barriers unnecessary, there are a couple of situations where they
-might be needed:
-
- (1) On some systems, I/O stores are not strongly ordered across all CPUs, and
-     so for _all_ general drivers locks should be used and mmiowb() must be
-     issued prior to unlocking the critical section.
-
- (2) If the accessor functions are used to refer to an I/O memory window with
-     relaxed memory access properties, then _mandatory_ memory barriers are
-     required to enforce ordering.
+use of memory barriers unnecessary, if the accessor functions are used to refer
+to an I/O memory window with relaxed memory access properties, then _mandatory_
+memory barriers are required to enforce ordering.
 
 See Documentation/driver-api/device-io.rst for more information.
 
@@ -2586,8 +2495,7 @@ explicit barriers are used.
 
 Normally this won't be a problem because the I/O accesses done inside such
 sections will include synchronous load operations on strictly ordered I/O
-registers that form implicit I/O barriers.  If this isn't sufficient then an
-mmiowb() may need to be used explicitly.
+registers that form implicit I/O barriers.
 
 
 A similar situation may occur between an interrupt routine and two routines
@@ -2687,9 +2595,6 @@ guarantees:
 All of these accessors assume that the underlying peripheral is little-endian,
 and will therefore perform byte-swapping operations on big-endian architectures.
 
-Composing I/O ordering barriers with SMP ordering barriers and LOCK/UNLOCK
-operations is a dangerous sport which may require the use of mmiowb(). See the
-subsection "Acquires vs I/O accesses" for more information.
 
 ========================================
 ASSUMED MINIMUM EXECUTION ORDERING MODEL
-- 
GitLab


From 949b8c72768e3a7c69d270962b8a142ee8deec1b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 16:56:31 +0000
Subject: [PATCH 0773/1861] drivers: Remove useless trailing comments from
 mmiowb() invocations

In preparation for using coccinelle to remove all mmiowb() instances
from drivers, remove all trailing comments since they won't be picked up
by spatch later on and will end up being preserved in the code.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/infiniband/hw/hfi1/chip.c                | 2 +-
 drivers/infiniband/hw/qedr/verbs.c               | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h  | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +-
 drivers/scsi/bnx2i/bnx2i_hwi.c                   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 612f04190ed83..12e67a91e5781 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8365,7 +8365,7 @@ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
 	struct hfi1_devdata *dd = rcd->dd;
 	u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
 
-	mmiowb();	/* make sure everything before is written */
+	mmiowb();
 	write_csr(dd, addr, rcd->imask);
 	/* force the above write on the chip and get a value back */
 	(void)read_csr(dd, addr);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 59ad4202422c1..4dab2b5ffb0ea 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -3700,7 +3700,7 @@ int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 
 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
-			mmiowb();	/* for second doorbell */
+			mmiowb();
 		}
 
 		wr = wr->next;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 2462e7aa0c5d3..1ed068509337c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -527,7 +527,7 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 		REG_WR_RELAXED(bp, fp->ustorm_rx_prods_offset + i * 4,
 			       ((u32 *)&rx_prods)[i]);
 
-	mmiowb(); /* keep prod updates ordered */
+	mmiowb();
 
 	DP(NETIF_MSG_RX_STATUS,
 	   "queue[%d]:  wrote  bd_prod %u  cqe_prod %u  sge_prod %u\n",
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 626b491f7674f..e46786a56b0ca 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -5244,7 +5244,7 @@ static void bnx2x_update_eq_prod(struct bnx2x *bp, u16 prod)
 {
 	/* No memory barriers */
 	storm_memset_eq_prod(bp, prod, BP_FUNC(bp));
-	mmiowb(); /* keep prod updates ordered */
+	mmiowb();
 }
 
 static int  bnx2x_cnic_handle_cfc_del(struct bnx2x *bp, u32 cid,
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index fae6f71e677d7..d56a78f411cd3 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -280,7 +280,7 @@ static void bnx2i_ring_sq_dbell(struct bnx2i_conn *bnx2i_conn, int count)
 	} else
 		writew(count, ep->qp.ctx_base + CNIC_SEND_DOORBELL);
 
-	mmiowb(); /* flush posted PCI writes */
+	mmiowb();
 }
 
 
-- 
GitLab


From fb24ea52f78e0d595852e09e3a55697c8f442189 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 17:14:59 +0000
Subject: [PATCH 0774/1861] drivers: Remove explicit invocations of mmiowb()

mmiowb() is now implied by spin_unlock() on architectures that require
it, so there is no reason to call it from driver code. This patch was
generated using coccinelle:

	@mmiowb@
	@@
	- mmiowb();

and invoked as:

$ for d in drivers include/linux/qed sound; do \
spatch --include-headers --sp-file mmiowb.cocci --dir $d --in-place; done

NOTE: mmiowb() has only ever guaranteed ordering in conjunction with
spin_unlock(). However, pairing each mmiowb() removal in this patch with
the corresponding call to spin_unlock() is not at all trivial, so there
is a small chance that this change may regress any drivers incorrectly
relying on mmiowb() to order MMIO writes between CPUs using lock-free
synchronisation. If you've ended up bisecting to this commit, you can
reintroduce the mmiowb() calls using wmb() instead, which should restore
the old behaviour on all architectures other than some esoteric ia64
systems.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c  |  4 ---
 drivers/dma/txx9dmac.c                        |  3 --
 drivers/firewire/ohci.c                       |  1 -
 drivers/gpu/drm/i915/intel_hdmi.c             | 10 -------
 drivers/ide/tx4939ide.c                       |  2 --
 drivers/infiniband/hw/hfi1/chip.c             |  3 --
 drivers/infiniband/hw/hfi1/pio.c              |  1 -
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c    |  2 --
 drivers/infiniband/hw/mlx4/qp.c               |  6 ----
 drivers/infiniband/hw/mlx5/qp.c               |  1 -
 drivers/infiniband/hw/mthca/mthca_cmd.c       |  6 ----
 drivers/infiniband/hw/mthca/mthca_cq.c        |  5 ----
 drivers/infiniband/hw/mthca/mthca_qp.c        | 17 -----------
 drivers/infiniband/hw/mthca/mthca_srq.c       |  6 ----
 drivers/infiniband/hw/qedr/verbs.c            | 12 --------
 drivers/infiniband/hw/qib/qib_iba6120.c       |  4 ---
 drivers/infiniband/hw/qib/qib_iba7220.c       |  3 --
 drivers/infiniband/hw/qib/qib_iba7322.c       |  3 --
 drivers/infiniband/hw/qib/qib_sd7220.c        |  4 ---
 drivers/media/pci/dt3155/dt3155.c             |  8 -----
 drivers/memstick/host/jmb38x_ms.c             |  4 ---
 drivers/misc/ioc4.c                           |  2 --
 drivers/misc/mei/hw-me.c                      |  3 --
 drivers/misc/tifm_7xx1.c                      |  1 -
 drivers/mmc/host/alcor.c                      |  1 -
 drivers/mmc/host/sdhci.c                      | 13 ---------
 drivers/mmc/host/tifm_sd.c                    |  3 --
 drivers/mmc/host/via-sdmmc.c                  | 10 -------
 drivers/mtd/nand/raw/r852.c                   |  2 --
 drivers/mtd/nand/raw/txx9ndfmc.c              |  1 -
 drivers/net/ethernet/aeroflex/greth.c         |  1 -
 drivers/net/ethernet/alacritech/slicoss.c     |  4 ---
 drivers/net/ethernet/amazon/ena/ena_com.c     |  1 -
 drivers/net/ethernet/atheros/atlx/atl1.c      |  1 -
 drivers/net/ethernet/atheros/atlx/atl2.c      |  1 -
 drivers/net/ethernet/broadcom/bnx2.c          |  4 ---
 .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   |  2 --
 .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.h   |  4 ---
 .../ethernet/broadcom/bnx2x/bnx2x_ethtool.c   |  1 -
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 29 -------------------
 .../net/ethernet/broadcom/bnx2x/bnx2x_sp.c    |  1 -
 .../net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  2 --
 .../net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |  4 ---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  3 --
 drivers/net/ethernet/broadcom/tg3.c           |  6 ----
 .../ethernet/cavium/liquidio/cn66xx_device.c  | 10 -------
 .../ethernet/cavium/liquidio/octeon_device.c  |  1 -
 .../ethernet/cavium/liquidio/octeon_droq.c    |  4 ---
 .../cavium/liquidio/request_manager.c         |  1 -
 drivers/net/ethernet/intel/e1000/e1000_main.c |  5 ----
 drivers/net/ethernet/intel/e1000e/netdev.c    |  7 -----
 drivers/net/ethernet/intel/fm10k/fm10k_iov.c  |  2 --
 drivers/net/ethernet/intel/fm10k/fm10k_main.c |  5 ----
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   |  5 ----
 drivers/net/ethernet/intel/iavf/iavf_txrx.c   |  5 ----
 drivers/net/ethernet/intel/ice/ice_txrx.c     |  5 ----
 drivers/net/ethernet/intel/igb/igb_main.c     |  5 ----
 drivers/net/ethernet/intel/igbvf/netdev.c     |  4 ---
 drivers/net/ethernet/intel/igc/igc_main.c     |  5 ----
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  5 ----
 drivers/net/ethernet/marvell/sky2.c           |  4 ---
 drivers/net/ethernet/mellanox/mlx4/catas.c    |  4 ---
 drivers/net/ethernet/mellanox/mlx4/cmd.c      | 13 ---------
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |  1 -
 .../net/ethernet/myricom/myri10ge/myri10ge.c  |  2 --
 drivers/net/ethernet/neterion/s2io.c          |  2 --
 .../net/ethernet/neterion/vxge/vxge-main.c    |  5 ----
 .../net/ethernet/neterion/vxge/vxge-traffic.c |  4 ---
 drivers/net/ethernet/qlogic/qed/qed_int.c     | 13 ---------
 drivers/net/ethernet/qlogic/qed/qed_spq.c     |  3 --
 .../net/ethernet/qlogic/qede/qede_ethtool.c   |  8 -----
 drivers/net/ethernet/qlogic/qede/qede_fp.c    |  8 -----
 drivers/net/ethernet/qlogic/qla3xxx.c         |  1 -
 drivers/net/ethernet/qlogic/qlge/qlge.h       |  1 -
 drivers/net/ethernet/qlogic/qlge/qlge_main.c  |  1 -
 drivers/net/ethernet/renesas/ravb_main.c      |  9 ------
 drivers/net/ethernet/renesas/ravb_ptp.c       |  3 --
 drivers/net/ethernet/renesas/sh_eth.c         |  1 -
 drivers/net/ethernet/sfc/falcon/io.h          |  2 --
 drivers/net/ethernet/sfc/io.h                 |  2 --
 drivers/net/ethernet/silan/sc92031.c          | 14 ---------
 drivers/net/ethernet/via/via-rhine.c          |  3 --
 drivers/net/ethernet/wiznet/w5100.c           |  6 ----
 drivers/net/ethernet/wiznet/w5300.c           | 15 ----------
 drivers/net/wireless/ath/ath5k/base.c         |  4 ---
 drivers/net/wireless/ath/ath5k/mac80211-ops.c |  2 --
 drivers/net/wireless/broadcom/b43/main.c      |  7 -----
 drivers/net/wireless/broadcom/b43/sysfs.c     |  1 -
 drivers/net/wireless/broadcom/b43legacy/ilt.c |  2 --
 .../net/wireless/broadcom/b43legacy/main.c    | 20 -------------
 drivers/net/wireless/broadcom/b43legacy/phy.c |  1 -
 drivers/net/wireless/broadcom/b43legacy/pio.h |  1 -
 .../net/wireless/broadcom/b43legacy/radio.c   |  4 ---
 .../net/wireless/broadcom/b43legacy/sysfs.c   |  1 -
 drivers/net/wireless/intel/iwlegacy/common.h  |  7 -----
 .../net/wireless/intel/iwlwifi/pcie/trans.c   |  1 -
 drivers/ntb/hw/idt/ntb_hw_idt.c               |  7 -----
 drivers/ntb/test/ntb_perf.c                   |  3 --
 drivers/scsi/bfa/bfa.h                        |  3 +-
 drivers/scsi/bfa/bfa_hw_cb.c                  |  2 --
 drivers/scsi/bfa/bfa_hw_ct.c                  |  2 --
 drivers/scsi/bnx2fc/bnx2fc_hwi.c              |  2 --
 drivers/scsi/bnx2i/bnx2i_hwi.c                |  3 --
 drivers/scsi/megaraid/megaraid_sas_base.c     |  1 -
 drivers/scsi/megaraid/megaraid_sas_fusion.c   |  1 -
 drivers/scsi/mpt3sas/mpt3sas_base.c           |  1 -
 drivers/scsi/qedf/qedf_io.c                   |  1 -
 drivers/scsi/qedi/qedi_fw.c                   |  1 -
 drivers/scsi/qla1280.c                        |  5 ----
 drivers/ssb/pci.c                             |  1 -
 drivers/ssb/pcmcia.c                          |  4 ---
 drivers/staging/comedi/drivers/mite.c         |  3 --
 drivers/staging/comedi/drivers/ni_660x.c      |  2 --
 .../staging/comedi/drivers/ni_mio_common.c    |  1 -
 drivers/staging/comedi/drivers/ni_pcidio.c    |  2 --
 drivers/staging/comedi/drivers/ni_tio.c       |  1 -
 drivers/staging/comedi/drivers/s626.c         |  2 --
 drivers/tty/serial/men_z135_uart.c            |  1 -
 drivers/tty/serial/serial_txx9.c              |  1 -
 drivers/usb/early/xhci-dbc.c                  |  4 ---
 drivers/usb/host/xhci-dbgcap.c                |  2 --
 include/linux/qed/qed_if.h                    |  2 --
 sound/soc/txx9/txx9aclc-ac97.c                |  1 -
 123 files changed, 1 insertion(+), 508 deletions(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index 4c97478d44bd2..5826c2c98a507 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -303,8 +303,6 @@ static void post_se_instr(struct nitrox_softreq *sr,
 
 	/* Ring doorbell with count 1 */
 	writeq(1, cmdq->dbell_csr_addr);
-	/* orders the doorbell rings */
-	mmiowb();
 
 	cmdq->write_idx = incr_index(idx, 1, ndev->qlen);
 
@@ -599,8 +597,6 @@ void pkt_slc_resp_tasklet(unsigned long data)
 	 * MSI-X interrupt generates if Completion count > Threshold
 	 */
 	writeq(slc_cnts.value, cmdq->compl_cnt_csr_addr);
-	/* order the writes */
-	mmiowb();
 
 	if (atomic_read(&cmdq->backlog_count))
 		schedule_work(&cmdq->backlog_qflush);
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index eb45af71d3a34..e8d0881b64d89 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -327,7 +327,6 @@ static void txx9dmac_reset_chan(struct txx9dmac_chan *dc)
 	channel_writel(dc, SAIR, 0);
 	channel_writel(dc, DAIR, 0);
 	channel_writel(dc, CCR, 0);
-	mmiowb();
 }
 
 /* Called with dc->lock held and bh disabled */
@@ -954,7 +953,6 @@ static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc,
 	dma_sync_single_for_device(chan2parent(&dc->chan),
 				   prev->txd.phys, ddev->descsize,
 				   DMA_TO_DEVICE);
-	mmiowb();
 	if (!(channel_readl(dc, CSR) & TXX9_DMA_CSR_CHNEN) &&
 	    channel_read_CHAR(dc) == prev->txd.phys)
 		/* Restart chain DMA */
@@ -1080,7 +1078,6 @@ static void txx9dmac_free_chan_resources(struct dma_chan *chan)
 static void txx9dmac_off(struct txx9dmac_dev *ddev)
 {
 	dma_writel(ddev, MCR, 0);
-	mmiowb();
 }
 
 static int __init txx9dmac_chan_probe(struct platform_device *pdev)
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 45c048751f3bd..7183ab34269eb 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2939,7 +2939,6 @@ static void set_multichannel_mask(struct fw_ohci *ohci, u64 channels)
 	reg_write(ohci, OHCI1394_IRMultiChanMaskLoClear, ~lo);
 	reg_write(ohci, OHCI1394_IRMultiChanMaskHiSet, hi);
 	reg_write(ohci, OHCI1394_IRMultiChanMaskLoSet, lo);
-	mmiowb();
 	ohci->mc_channels = channels;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index f125a62eba8cf..a46bffe2b288f 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -182,7 +182,6 @@ static void g4x_write_infoframe(struct intel_encoder *encoder,
 
 	I915_WRITE(VIDEO_DIP_CTL, val);
 
-	mmiowb();
 	for (i = 0; i < len; i += 4) {
 		I915_WRITE(VIDEO_DIP_DATA, *data);
 		data++;
@@ -190,7 +189,6 @@ static void g4x_write_infoframe(struct intel_encoder *encoder,
 	/* Write every possible data byte to force correct ECC calculation. */
 	for (; i < VIDEO_DIP_DATA_SIZE; i += 4)
 		I915_WRITE(VIDEO_DIP_DATA, 0);
-	mmiowb();
 
 	val |= g4x_infoframe_enable(type);
 	val &= ~VIDEO_DIP_FREQ_MASK;
@@ -237,7 +235,6 @@ static void ibx_write_infoframe(struct intel_encoder *encoder,
 
 	I915_WRITE(reg, val);
 
-	mmiowb();
 	for (i = 0; i < len; i += 4) {
 		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), *data);
 		data++;
@@ -245,7 +242,6 @@ static void ibx_write_infoframe(struct intel_encoder *encoder,
 	/* Write every possible data byte to force correct ECC calculation. */
 	for (; i < VIDEO_DIP_DATA_SIZE; i += 4)
 		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), 0);
-	mmiowb();
 
 	val |= g4x_infoframe_enable(type);
 	val &= ~VIDEO_DIP_FREQ_MASK;
@@ -298,7 +294,6 @@ static void cpt_write_infoframe(struct intel_encoder *encoder,
 
 	I915_WRITE(reg, val);
 
-	mmiowb();
 	for (i = 0; i < len; i += 4) {
 		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), *data);
 		data++;
@@ -306,7 +301,6 @@ static void cpt_write_infoframe(struct intel_encoder *encoder,
 	/* Write every possible data byte to force correct ECC calculation. */
 	for (; i < VIDEO_DIP_DATA_SIZE; i += 4)
 		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), 0);
-	mmiowb();
 
 	val |= g4x_infoframe_enable(type);
 	val &= ~VIDEO_DIP_FREQ_MASK;
@@ -352,7 +346,6 @@ static void vlv_write_infoframe(struct intel_encoder *encoder,
 
 	I915_WRITE(reg, val);
 
-	mmiowb();
 	for (i = 0; i < len; i += 4) {
 		I915_WRITE(VLV_TVIDEO_DIP_DATA(intel_crtc->pipe), *data);
 		data++;
@@ -360,7 +353,6 @@ static void vlv_write_infoframe(struct intel_encoder *encoder,
 	/* Write every possible data byte to force correct ECC calculation. */
 	for (; i < VIDEO_DIP_DATA_SIZE; i += 4)
 		I915_WRITE(VLV_TVIDEO_DIP_DATA(intel_crtc->pipe), 0);
-	mmiowb();
 
 	val |= g4x_infoframe_enable(type);
 	val &= ~VIDEO_DIP_FREQ_MASK;
@@ -406,7 +398,6 @@ static void hsw_write_infoframe(struct intel_encoder *encoder,
 	val &= ~hsw_infoframe_enable(type);
 	I915_WRITE(ctl_reg, val);
 
-	mmiowb();
 	for (i = 0; i < len; i += 4) {
 		I915_WRITE(hsw_dip_data_reg(dev_priv, cpu_transcoder,
 					    type, i >> 2), *data);
@@ -416,7 +407,6 @@ static void hsw_write_infoframe(struct intel_encoder *encoder,
 	for (; i < data_size; i += 4)
 		I915_WRITE(hsw_dip_data_reg(dev_priv, cpu_transcoder,
 					    type, i >> 2), 0);
-	mmiowb();
 
 	val |= hsw_infoframe_enable(type);
 	I915_WRITE(ctl_reg, val);
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 67d4a7d4acc8a..88d132edc4e30 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -156,7 +156,6 @@ static u16 tx4939ide_check_error_ints(ide_hwif_t *hwif)
 		u16 sysctl = tx4939ide_readw(base, TX4939IDE_Sys_Ctl);
 
 		tx4939ide_writew(sysctl | 0x4000, base, TX4939IDE_Sys_Ctl);
-		mmiowb();
 		/* wait 12GBUSCLK (typ. 60ns @ GBUS200MHz, max 270ns) */
 		ndelay(270);
 		tx4939ide_writew(sysctl, base, TX4939IDE_Sys_Ctl);
@@ -396,7 +395,6 @@ static void tx4939ide_init_hwif(ide_hwif_t *hwif)
 
 	/* Soft Reset */
 	tx4939ide_writew(0x8000, base, TX4939IDE_Sys_Ctl);
-	mmiowb();
 	/* at least 20 GBUSCLK (typ. 100ns @ GBUS200MHz, max 450ns) */
 	ndelay(450);
 	tx4939ide_writew(0x0000, base, TX4939IDE_Sys_Ctl);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 12e67a91e5781..8f270459b63ef 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8365,7 +8365,6 @@ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
 	struct hfi1_devdata *dd = rcd->dd;
 	u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
 
-	mmiowb();
 	write_csr(dd, addr, rcd->imask);
 	/* force the above write on the chip and get a value back */
 	(void)read_csr(dd, addr);
@@ -11803,12 +11802,10 @@ void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
 			<< RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
 		write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
 	}
-	mmiowb();
 	reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
 		(((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
 			<< RCV_HDR_HEAD_HEAD_SHIFT);
 	write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
-	mmiowb();
 }
 
 u32 hdrqempty(struct hfi1_ctxtdata *rcd)
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index a1de566fe95e4..16ba9d52e1b9b 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1578,7 +1578,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
 		sc_del_credit_return_intr(sc);
 	trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl);
 	if (needint) {
-		mmiowb();
 		sc_return_credits(sc);
 	}
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 97515c340134c..c8555f7704d84 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1750,8 +1750,6 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
 
 	writel(val, hcr + 5);
 
-	mmiowb();
-
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 429a59c5801cc..9426936460f8e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -3744,12 +3744,6 @@ out:
 		writel_relaxed(qp->doorbell_qpn,
 			to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
 
-		/*
-		 * Make sure doorbells don't leak out of SQ spinlock
-		 * and reach the HCA out of order.
-		 */
-		mmiowb();
-
 		stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1);
 
 		qp->sq_next_wqe = ind;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7cd006da1daef..b680be1f3f473 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -5123,7 +5123,6 @@ out:
 		/* Make sure doorbells don't leak out of SQ spinlock
 		 * and reach the HCA out of order.
 		 */
-		mmiowb();
 		bf->offset ^= bf->buf_size;
 	}
 
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 83aa47eb81a92..bdf5ed38de220 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -292,12 +292,6 @@ static int mthca_cmd_post(struct mthca_dev *dev,
 		err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
 					 op_modifier, op, token, event);
 
-	/*
-	 * Make sure that our HCR writes don't get mixed in with
-	 * writes from another CPU starting a FW command.
-	 */
-	mmiowb();
-
 	mutex_unlock(&dev->cmd.hcr_mutex);
 	return err;
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index a6531ffe29a6f..877a6daffa98a 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -211,11 +211,6 @@ static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
 		mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
 			      dev->kar + MTHCA_CQ_DOORBELL,
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
-		/*
-		 * Make sure doorbells don't leak out of CQ spinlock
-		 * and reach the HCA out of order:
-		 */
-		mmiowb();
 	}
 }
 
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 7a5b25d13faa8..d65b189f20ead 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1809,11 +1809,6 @@ out:
 			      (qp->qpn << 8) | size0,
 			      dev->kar + MTHCA_SEND_DOORBELL,
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
-		/*
-		 * Make sure doorbells don't leak out of SQ spinlock
-		 * and reach the HCA out of order:
-		 */
-		mmiowb();
 	}
 
 	qp->sq.next_ind = ind;
@@ -1924,12 +1919,6 @@ out:
 	qp->rq.next_ind = ind;
 	qp->rq.head    += nreq;
 
-	/*
-	 * Make sure doorbells don't leak out of RQ spinlock and reach
-	 * the HCA out of order:
-	 */
-	mmiowb();
-
 	spin_unlock_irqrestore(&qp->rq.lock, flags);
 	return err;
 }
@@ -2164,12 +2153,6 @@ out:
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 
-	/*
-	 * Make sure doorbells don't leak out of SQ spinlock and reach
-	 * the HCA out of order:
-	 */
-	mmiowb();
-
 	spin_unlock_irqrestore(&qp->sq.lock, flags);
 	return err;
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 06b9203855122..a85935ccce881 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -570,12 +570,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 
-	/*
-	 * Make sure doorbells don't leak out of SRQ spinlock and
-	 * reach the HCA out of order:
-	 */
-	mmiowb();
-
 	spin_unlock_irqrestore(&srq->lock, flags);
 	return err;
 }
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 4dab2b5ffb0ea..8686a98e113d3 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -773,9 +773,6 @@ static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
 	cq->db.data.agg_flags = flags;
 	cq->db.data.value = cpu_to_le32(cons);
 	writeq(cq->db.raw, cq->db_addr);
-
-	/* Make sure write would stick */
-	mmiowb();
 }
 
 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
@@ -2084,8 +2081,6 @@ static int qedr_update_qp_state(struct qedr_dev *dev,
 
 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
 				writel(qp->rq.db_data.raw, qp->rq.db);
-				/* Make sure write takes effect */
-				mmiowb();
 			}
 			break;
 		case QED_ROCE_QP_STATE_ERR:
@@ -3502,9 +3497,6 @@ int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	smp_wmb();
 	writel(qp->sq.db_data.raw, qp->sq.db);
 
-	/* Make sure write sticks */
-	mmiowb();
-
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 
 	return rc;
@@ -3695,12 +3687,8 @@ int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 
 		writel(qp->rq.db_data.raw, qp->rq.db);
 
-		/* Make sure write sticks */
-		mmiowb();
-
 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
-			mmiowb();
 		}
 
 		wr = wr->next;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index cdbf707fa2671..531d8a1db2c3e 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1884,7 +1884,6 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
 	qib_write_kreg(dd, kr_scratch, 0xfeeddeaf);
 	writel(pa, tidp32);
 	qib_write_kreg(dd, kr_scratch, 0xdeadbeef);
-	mmiowb();
 	spin_unlock_irqrestore(tidlockp, flags);
 }
 
@@ -1928,7 +1927,6 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr,
 			pa |= 2 << 29;
 	}
 	writel(pa, tidp32);
-	mmiowb();
 }
 
 
@@ -2053,9 +2051,7 @@ static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd,
 {
 	if (updegr)
 		qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
-	mmiowb();
 	qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
-	mmiowb();
 }
 
 static u32 qib_6120_hdrqempty(struct qib_ctxtdata *rcd)
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 9fde45538f6e9..ea3ddb05cbadf 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -2175,7 +2175,6 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
 		pa = chippa;
 	}
 	writeq(pa, tidptr);
-	mmiowb();
 }
 
 /**
@@ -2704,9 +2703,7 @@ static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd,
 {
 	if (updegr)
 		qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
-	mmiowb();
 	qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
-	mmiowb();
 }
 
 static u32 qib_7220_hdrqempty(struct qib_ctxtdata *rcd)
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 17d6b24b34736..ac6a84f11ad08 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -3793,7 +3793,6 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
 		pa = chippa;
 	}
 	writeq(pa, tidptr);
-	mmiowb();
 }
 
 /**
@@ -4440,10 +4439,8 @@ static void qib_update_7322_usrhead(struct qib_ctxtdata *rcd, u64 hd,
 		adjust_rcv_timeout(rcd, npkts);
 	if (updegr)
 		qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
-	mmiowb();
 	qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
 	qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
-	mmiowb();
 }
 
 static u32 qib_7322_hdrqempty(struct qib_ctxtdata *rcd)
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 12caf3db8c349..4f4a09c2dbcd3 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1068,7 +1068,6 @@ static int qib_sd_setvals(struct qib_devdata *dd)
 	for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
 		data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT;
 		writeq(data, iaddr + idx);
-		mmiowb();
 		qib_read_kreg32(dd, kr_scratch);
 		dds_reg_map >>= 4;
 		for (midx = 0; midx < DDS_ROWS; ++midx) {
@@ -1076,7 +1075,6 @@ static int qib_sd_setvals(struct qib_devdata *dd)
 
 			data = dds_init_vals[midx].reg_vals[idx];
 			writeq(data, daddr);
-			mmiowb();
 			qib_read_kreg32(dd, kr_scratch);
 		} /* End inner for (vals for this reg, each row) */
 	} /* end outer for (regs to be stored) */
@@ -1098,13 +1096,11 @@ static int qib_sd_setvals(struct qib_devdata *dd)
 		didx = idx + min_idx;
 		/* Store the next RXEQ register address */
 		writeq(rxeq_init_vals[idx].rdesc, iaddr + didx);
-		mmiowb();
 		qib_read_kreg32(dd, kr_scratch);
 		/* Iterate through RXEQ values */
 		for (vidx = 0; vidx < 4; vidx++) {
 			data = rxeq_init_vals[idx].rdata[vidx];
 			writeq(data, taddr + (vidx << 6) + idx);
-			mmiowb();
 			qib_read_kreg32(dd, kr_scratch);
 		}
 	} /* end outer for (Reg-writes for RXEQ) */
diff --git a/drivers/media/pci/dt3155/dt3155.c b/drivers/media/pci/dt3155/dt3155.c
index 17d69bd5d7f11..49677ee889e3f 100644
--- a/drivers/media/pci/dt3155/dt3155.c
+++ b/drivers/media/pci/dt3155/dt3155.c
@@ -46,7 +46,6 @@ static int read_i2c_reg(void __iomem *addr, u8 index, u8 *data)
 	u32 tmp = index;
 
 	iowrite32((tmp << 17) | IIC_READ, addr + IIC_CSR2);
-	mmiowb();
 	udelay(45); /* wait at least 43 usec for NEW_CYCLE to clear */
 	if (ioread32(addr + IIC_CSR2) & NEW_CYCLE)
 		return -EIO; /* error: NEW_CYCLE not cleared */
@@ -77,7 +76,6 @@ static int write_i2c_reg(void __iomem *addr, u8 index, u8 data)
 	u32 tmp = index;
 
 	iowrite32((tmp << 17) | IIC_WRITE | data, addr + IIC_CSR2);
-	mmiowb();
 	udelay(65); /* wait at least 63 usec for NEW_CYCLE to clear */
 	if (ioread32(addr + IIC_CSR2) & NEW_CYCLE)
 		return -EIO; /* error: NEW_CYCLE not cleared */
@@ -104,7 +102,6 @@ static void write_i2c_reg_nowait(void __iomem *addr, u8 index, u8 data)
 	u32 tmp = index;
 
 	iowrite32((tmp << 17) | IIC_WRITE | data, addr + IIC_CSR2);
-	mmiowb();
 }
 
 /**
@@ -264,7 +261,6 @@ static irqreturn_t dt3155_irq_handler_even(int irq, void *dev_id)
 						FLD_DN_ODD | FLD_DN_EVEN |
 						CAP_CONT_EVEN | CAP_CONT_ODD,
 							ipd->regs + CSR1);
-		mmiowb();
 	}
 
 	spin_lock(&ipd->lock);
@@ -282,7 +278,6 @@ static irqreturn_t dt3155_irq_handler_even(int irq, void *dev_id)
 		iowrite32(dma_addr + ipd->width, ipd->regs + ODD_DMA_START);
 		iowrite32(ipd->width, ipd->regs + EVEN_DMA_STRIDE);
 		iowrite32(ipd->width, ipd->regs + ODD_DMA_STRIDE);
-		mmiowb();
 	}
 
 	/* enable interrupts, clear all irq flags */
@@ -437,12 +432,10 @@ static int dt3155_init_board(struct dt3155_priv *pd)
 	/*  resetting the adapter  */
 	iowrite32(ADDR_ERR_ODD | ADDR_ERR_EVEN | FLD_CRPT_ODD | FLD_CRPT_EVEN |
 			FLD_DN_ODD | FLD_DN_EVEN, pd->regs + CSR1);
-	mmiowb();
 	msleep(20);
 
 	/*  initializing adapter registers  */
 	iowrite32(FIFO_EN | SRST, pd->regs + CSR1);
-	mmiowb();
 	iowrite32(0xEEEEEE01, pd->regs + EVEN_PIXEL_FMT);
 	iowrite32(0xEEEEEE01, pd->regs + ODD_PIXEL_FMT);
 	iowrite32(0x00000020, pd->regs + FIFO_TRIGER);
@@ -454,7 +447,6 @@ static int dt3155_init_board(struct dt3155_priv *pd)
 	iowrite32(0, pd->regs + MASK_LENGTH);
 	iowrite32(0x0005007C, pd->regs + FIFO_FLAG_CNT);
 	iowrite32(0x01010101, pd->regs + IIC_CLK_DUR);
-	mmiowb();
 
 	/* verifying that we have a DT3155 board (not just a SAA7116 chip) */
 	read_i2c_reg(pd->regs, DT_ID, &tmp);
diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c
index bcdca9fbef51c..e3a5af65dbce7 100644
--- a/drivers/memstick/host/jmb38x_ms.c
+++ b/drivers/memstick/host/jmb38x_ms.c
@@ -644,7 +644,6 @@ static int jmb38x_ms_reset(struct jmb38x_ms_host *host)
 	writel(HOST_CONTROL_RESET_REQ | HOST_CONTROL_CLOCK_EN
 	       | readl(host->addr + HOST_CONTROL),
 	       host->addr + HOST_CONTROL);
-	mmiowb();
 
 	for (cnt = 0; cnt < 20; ++cnt) {
 		if (!(HOST_CONTROL_RESET_REQ
@@ -659,7 +658,6 @@ reset_next:
 	writel(HOST_CONTROL_RESET | HOST_CONTROL_CLOCK_EN
 	       | readl(host->addr + HOST_CONTROL),
 	       host->addr + HOST_CONTROL);
-	mmiowb();
 
 	for (cnt = 0; cnt < 20; ++cnt) {
 		if (!(HOST_CONTROL_RESET
@@ -672,7 +670,6 @@ reset_next:
 	return -EIO;
 
 reset_ok:
-	mmiowb();
 	writel(INT_STATUS_ALL, host->addr + INT_SIGNAL_ENABLE);
 	writel(INT_STATUS_ALL, host->addr + INT_STATUS_ENABLE);
 	return 0;
@@ -1009,7 +1006,6 @@ static void jmb38x_ms_remove(struct pci_dev *dev)
 		tasklet_kill(&host->notify);
 		writel(0, host->addr + INT_SIGNAL_ENABLE);
 		writel(0, host->addr + INT_STATUS_ENABLE);
-		mmiowb();
 		dev_dbg(&jm->pdev->dev, "interrupts off\n");
 		spin_lock_irqsave(&host->lock, flags);
 		if (host->req) {
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
index ec08322781708..9d0445a567dba 100644
--- a/drivers/misc/ioc4.c
+++ b/drivers/misc/ioc4.c
@@ -156,7 +156,6 @@ ioc4_clock_calibrate(struct ioc4_driver_data *idd)
 
 	/* Reset to power-on state */
 	writel(0, &idd->idd_misc_regs->int_out.raw);
-	mmiowb();
 
 	/* Set up square wave */
 	int_out.raw = 0;
@@ -164,7 +163,6 @@ ioc4_clock_calibrate(struct ioc4_driver_data *idd)
 	int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE;
 	int_out.fields.diag = 0;
 	writel(int_out.raw, &idd->idd_misc_regs->int_out.raw);
-	mmiowb();
 
 	/* Check square wave period averaged over some number of cycles */
 	start = ktime_get_ns();
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 3fbbadfa2ae15..8a47a6fc3fc77 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -350,9 +350,6 @@ static void mei_me_hw_reset_release(struct mei_device *dev)
 	hcsr |= H_IG;
 	hcsr &= ~H_RST;
 	mei_hcsr_set(dev, hcsr);
-
-	/* complete this write before we set host ready on another CPU */
-	mmiowb();
 }
 
 /**
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
index 9ac95b48ef924..cc729f7ab32e5 100644
--- a/drivers/misc/tifm_7xx1.c
+++ b/drivers/misc/tifm_7xx1.c
@@ -403,7 +403,6 @@ static void tifm_7xx1_remove(struct pci_dev *dev)
 	fm->eject = tifm_7xx1_dummy_eject;
 	fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif;
 	writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
-	mmiowb();
 	free_irq(dev->irq, fm);
 
 	tifm_remove_adapter(fm);
diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c
index 82a97866e0cf4..546b1fc30e7d8 100644
--- a/drivers/mmc/host/alcor.c
+++ b/drivers/mmc/host/alcor.c
@@ -967,7 +967,6 @@ static void alcor_timeout_timer(struct work_struct *work)
 		alcor_request_complete(host, 0);
 	}
 
-	mmiowb();
 	mutex_unlock(&host->cmd_mutex);
 }
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a8141ff9be038..42e1bad024f46 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1807,7 +1807,6 @@ void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 			sdhci_send_command(host, mrq->cmd);
 	}
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 EXPORT_SYMBOL_GPL(sdhci_request);
@@ -2010,8 +2009,6 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	 */
 	if (host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS)
 		sdhci_do_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
-
-	mmiowb();
 }
 EXPORT_SYMBOL_GPL(sdhci_set_ios);
 
@@ -2105,7 +2102,6 @@ static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
 
 		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
 		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
-		mmiowb();
 	}
 }
 
@@ -2353,7 +2349,6 @@ void sdhci_send_tuning(struct sdhci_host *host, u32 opcode)
 
 	host->tuning_done = 0;
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	/* Wait for Buffer Read Ready interrupt */
@@ -2705,7 +2700,6 @@ static bool sdhci_request_done(struct sdhci_host *host)
 
 	host->mrqs_done[i] = NULL;
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	mmc_request_done(host->mmc, mrq);
@@ -2739,7 +2733,6 @@ static void sdhci_timeout_timer(struct timer_list *t)
 		sdhci_finish_mrq(host, host->cmd->mrq);
 	}
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
@@ -2770,7 +2763,6 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
 		}
 	}
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
@@ -3251,7 +3243,6 @@ int sdhci_resume_host(struct sdhci_host *host)
 		mmc->ops->set_ios(mmc, &mmc->ios);
 	} else {
 		sdhci_init(host, (host->mmc->pm_flags & MMC_PM_KEEP_POWER));
-		mmiowb();
 	}
 
 	if (host->irq_wake_enabled) {
@@ -3391,7 +3382,6 @@ void sdhci_cqe_enable(struct mmc_host *mmc)
 		 mmc_hostname(mmc), host->ier,
 		 sdhci_readl(host, SDHCI_INT_STATUS));
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 EXPORT_SYMBOL_GPL(sdhci_cqe_enable);
@@ -3416,7 +3406,6 @@ void sdhci_cqe_disable(struct mmc_host *mmc, bool recovery)
 		 mmc_hostname(mmc), host->ier,
 		 sdhci_readl(host, SDHCI_INT_STATUS));
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 EXPORT_SYMBOL_GPL(sdhci_cqe_disable);
@@ -4255,8 +4244,6 @@ int __sdhci_add_host(struct sdhci_host *host)
 		goto unirq;
 	}
 
-	mmiowb();
-
 	ret = mmc_add_host(mmc);
 	if (ret)
 		goto unled;
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index b6644ce296b20..35dd34b82a4d0 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -889,7 +889,6 @@ static int tifm_sd_initialize_host(struct tifm_sd *host)
 	struct tifm_dev *sock = host->dev;
 
 	writel(0, sock->addr + SOCK_MMCSD_INT_ENABLE);
-	mmiowb();
 	host->clk_div = 61;
 	host->clk_freq = 20000000;
 	writel(TIFM_MMCSD_RESET, sock->addr + SOCK_MMCSD_SYSTEM_CONTROL);
@@ -940,7 +939,6 @@ static int tifm_sd_initialize_host(struct tifm_sd *host)
 	writel(TIFM_MMCSD_CERR | TIFM_MMCSD_BRS | TIFM_MMCSD_EOC
 	       | TIFM_MMCSD_ERRMASK,
 	       sock->addr + SOCK_MMCSD_INT_ENABLE);
-	mmiowb();
 
 	return 0;
 }
@@ -1005,7 +1003,6 @@ static void tifm_sd_remove(struct tifm_dev *sock)
 	spin_lock_irqsave(&sock->lock, flags);
 	host->eject = 1;
 	writel(0, sock->addr + SOCK_MMCSD_INT_ENABLE);
-	mmiowb();
 	spin_unlock_irqrestore(&sock->lock, flags);
 
 	tasklet_kill(&host->finish_tasklet);
diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c
index 32c4211506fc8..412395ac29352 100644
--- a/drivers/mmc/host/via-sdmmc.c
+++ b/drivers/mmc/host/via-sdmmc.c
@@ -686,7 +686,6 @@ static void via_sdc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		via_sdc_send_command(host, mrq->cmd);
 	}
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
@@ -711,7 +710,6 @@ static void via_sdc_set_power(struct via_crdr_mmc_host *host,
 		gatt &= ~VIA_CRDR_PCICLKGATT_PAD_PWRON;
 	writeb(gatt, host->pcictrl_mmiobase + VIA_CRDR_PCICLKGATT);
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	via_pwron_sleep(host);
@@ -770,7 +768,6 @@ static void via_sdc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (readb(addrbase + VIA_CRDR_PCISDCCLK) != clock)
 		writeb(clock, addrbase + VIA_CRDR_PCISDCCLK);
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	if (ios->power_mode != MMC_POWER_OFF)
@@ -830,7 +827,6 @@ static void via_reset_pcictrl(struct via_crdr_mmc_host *host)
 	via_restore_pcictrlreg(host);
 	via_restore_sdcreg(host);
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
@@ -925,7 +921,6 @@ static irqreturn_t via_sdc_isr(int irq, void *dev_id)
 
 	result = IRQ_HANDLED;
 
-	mmiowb();
 out:
 	spin_unlock(&sdhost->lock);
 
@@ -960,7 +955,6 @@ static void via_sdc_timeout(struct timer_list *t)
 		}
 	}
 
-	mmiowb();
 	spin_unlock_irqrestore(&sdhost->lock, flags);
 }
 
@@ -1012,7 +1006,6 @@ static void via_sdc_card_detect(struct work_struct *work)
 			tasklet_schedule(&host->finish_tasklet);
 		}
 
-		mmiowb();
 		spin_unlock_irqrestore(&host->lock, flags);
 
 		via_reset_pcictrl(host);
@@ -1020,7 +1013,6 @@ static void via_sdc_card_detect(struct work_struct *work)
 		spin_lock_irqsave(&host->lock, flags);
 	}
 
-	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	via_print_pcictrl(host);
@@ -1188,7 +1180,6 @@ static void via_sd_remove(struct pci_dev *pcidev)
 
 	/* Disable generating further interrupts */
 	writeb(0x0, sdhost->pcictrl_mmiobase + VIA_CRDR_PCIINTCTRL);
-	mmiowb();
 
 	if (sdhost->mrq) {
 		pr_err("%s: Controller removed during "
@@ -1197,7 +1188,6 @@ static void via_sd_remove(struct pci_dev *pcidev)
 		/* make sure all DMA is stopped */
 		writel(VIA_CRDR_DMACTRL_SFTRST,
 			sdhost->ddma_mmiobase + VIA_CRDR_DMACTRL);
-		mmiowb();
 		sdhost->mrq->cmd->error = -ENOMEDIUM;
 		if (sdhost->mrq->stop)
 			sdhost->mrq->stop->error = -ENOMEDIUM;
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 86456216fb93f..7b99831aa0462 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -45,7 +45,6 @@ static inline void r852_write_reg(struct r852_device *dev,
 						int address, uint8_t value)
 {
 	writeb(value, dev->mmio + address);
-	mmiowb();
 }
 
 
@@ -61,7 +60,6 @@ static inline void r852_write_reg_dword(struct r852_device *dev,
 							int address, uint32_t value)
 {
 	writel(cpu_to_le32(value), dev->mmio + address);
-	mmiowb();
 }
 
 /* returns pointer to our private structure */
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index ddf0420c09976..97978227aa558 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -159,7 +159,6 @@ static void txx9ndfmc_cmd_ctrl(struct nand_chip *chip, int cmd,
 		if ((ctrl & NAND_CTRL_CHANGE) && cmd == NAND_CMD_NONE)
 			txx9ndfmc_write(dev, 0, TXX9_NDFDTR);
 	}
-	mmiowb();
 }
 
 static int txx9ndfmc_dev_ready(struct nand_chip *chip)
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 47e5984f16fbd..3155f7fa83eb6 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -613,7 +613,6 @@ static irqreturn_t greth_interrupt(int irq, void *dev_id)
 		napi_schedule(&greth->napi);
 	}
 
-	mmiowb();
 	spin_unlock(&greth->devlock);
 
 	return retval;
diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
index 16477aa6d61f6..4f7e792e50e9f 100644
--- a/drivers/net/ethernet/alacritech/slicoss.c
+++ b/drivers/net/ethernet/alacritech/slicoss.c
@@ -345,8 +345,6 @@ static void slic_set_rx_mode(struct net_device *dev)
 	if (sdev->promisc != set_promisc) {
 		sdev->promisc = set_promisc;
 		slic_configure_rcv(sdev);
-		/* make sure writes to receiver cant leak out of the lock */
-		mmiowb();
 	}
 	spin_unlock_bh(&sdev->link_lock);
 }
@@ -1461,8 +1459,6 @@ static netdev_tx_t slic_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (slic_get_free_tx_descs(txq) < SLIC_MAX_REQ_TX_DESCS)
 		netif_stop_queue(dev);
-	/* make sure writes to io-memory cant leak out of tx queue lock */
-	mmiowb();
 
 	return NETDEV_TX_OK;
 drop_skb:
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index b17d435de09fe..05798aa5bb73a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2016,7 +2016,6 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 	mb();
 	writel_relaxed((u32)aenq->head,
 		       dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
-	mmiowb();
 }
 
 int ena_com_dev_reset(struct ena_com_dev *ena_dev,
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 9e07b469066a4..f7583c5d95097 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -2439,7 +2439,6 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
 	atl1_tx_map(adapter, skb, ptpd);
 	atl1_tx_queue(adapter, count, ptpd);
 	atl1_update_mailbox(adapter);
-	mmiowb();
 	return NETDEV_TX_OK;
 }
 
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index d99317b3d891b..1474cac7e8924 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -908,7 +908,6 @@ static netdev_tx_t atl2_xmit_frame(struct sk_buff *skb,
 	ATL2_WRITE_REGW(&adapter->hw, REG_MB_TXD_WR_IDX,
 		(adapter->txd_write_ptr >> 2));
 
-	mmiowb();
 	dev_consume_skb_any(skb);
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index d63371d70bcef..dfdd14eadd572 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -3305,8 +3305,6 @@ next_rx:
 
 	BNX2_WR(bp, rxr->rx_bseq_addr, rxr->rx_prod_bseq);
 
-	mmiowb();
-
 	return rx_pkt;
 
 }
@@ -6723,8 +6721,6 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	BNX2_WR16(bp, txr->tx_bidx_addr, prod);
 	BNX2_WR(bp, txr->tx_bseq_addr, txr->tx_prod_bseq);
 
-	mmiowb();
-
 	txr->tx_prod = prod;
 
 	if (unlikely(bnx2_tx_avail(bp, txr) <= MAX_SKB_FRAGS)) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index ecb1bd7eb5080..0c8f5b546c6f4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4166,8 +4166,6 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw);
 
-	mmiowb();
-
 	txdata->tx_bd_prod += nbd;
 
 	if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_DESC_PER_TX_PKT)) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 1ed068509337c..2d57af9c061cc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -527,8 +527,6 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 		REG_WR_RELAXED(bp, fp->ustorm_rx_prods_offset + i * 4,
 			       ((u32 *)&rx_prods)[i]);
 
-	mmiowb();
-
 	DP(NETIF_MSG_RX_STATUS,
 	   "queue[%d]:  wrote  bd_prod %u  cqe_prod %u  sge_prod %u\n",
 	   fp->index, bd_prod, rx_comp_prod, rx_sge_prod);
@@ -653,7 +651,6 @@ static inline void bnx2x_igu_ack_sb_gen(struct bnx2x *bp, u8 igu_sb_id,
 	REG_WR(bp, igu_addr, cmd_data.sb_id_and_flags);
 
 	/* Make sure that ACK is written */
-	mmiowb();
 	barrier();
 }
 
@@ -674,7 +671,6 @@ static inline void bnx2x_hc_ack_sb(struct bnx2x *bp, u8 sb_id,
 	REG_WR(bp, hc_addr, (*(u32 *)&igu_ack));
 
 	/* Make sure that ACK is written */
-	mmiowb();
 	barrier();
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 749d0ef443717..0745cccd416db 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -2623,7 +2623,6 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
 	wmb();
 	DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw);
 
-	mmiowb();
 	barrier();
 
 	num_pkts++;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index e46786a56b0ca..3716c828ff5d9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -869,9 +869,6 @@ static void bnx2x_hc_int_disable(struct bnx2x *bp)
 	   "write %x to HC %d (addr 0x%x)\n",
 	   val, port, addr);
 
-	/* flush all outstanding writes */
-	mmiowb();
-
 	REG_WR(bp, addr, val);
 	if (REG_RD(bp, addr) != val)
 		BNX2X_ERR("BUG! Proper val not read from IGU!\n");
@@ -887,9 +884,6 @@ static void bnx2x_igu_int_disable(struct bnx2x *bp)
 
 	DP(NETIF_MSG_IFDOWN, "write %x to IGU\n", val);
 
-	/* flush all outstanding writes */
-	mmiowb();
-
 	REG_WR(bp, IGU_REG_PF_CONFIGURATION, val);
 	if (REG_RD(bp, IGU_REG_PF_CONFIGURATION) != val)
 		BNX2X_ERR("BUG! Proper val not read from IGU!\n");
@@ -1595,7 +1589,6 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp)
 	/*
 	 * Ensure that HC_CONFIG is written before leading/trailing edge config
 	 */
-	mmiowb();
 	barrier();
 
 	if (!CHIP_IS_E1(bp)) {
@@ -1611,9 +1604,6 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp)
 		REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
 		REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
 	}
-
-	/* Make sure that interrupts are indeed enabled from here on */
-	mmiowb();
 }
 
 static void bnx2x_igu_int_enable(struct bnx2x *bp)
@@ -1674,9 +1664,6 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp)
 
 	REG_WR(bp, IGU_REG_TRAILING_EDGE_LATCH, val);
 	REG_WR(bp, IGU_REG_LEADING_EDGE_LATCH, val);
-
-	/* Make sure that interrupts are indeed enabled from here on */
-	mmiowb();
 }
 
 void bnx2x_int_enable(struct bnx2x *bp)
@@ -3833,7 +3820,6 @@ static void bnx2x_sp_prod_update(struct bnx2x *bp)
 
 	REG_WR16_RELAXED(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
 			 bp->spq_prod_idx);
-	mmiowb();
 }
 
 /**
@@ -5244,7 +5230,6 @@ static void bnx2x_update_eq_prod(struct bnx2x *bp, u16 prod)
 {
 	/* No memory barriers */
 	storm_memset_eq_prod(bp, prod, BP_FUNC(bp));
-	mmiowb();
 }
 
 static int  bnx2x_cnic_handle_cfc_del(struct bnx2x *bp, u32 cid,
@@ -6513,7 +6498,6 @@ void bnx2x_nic_init_cnic(struct bnx2x *bp)
 
 	/* flush all */
 	mb();
-	mmiowb();
 }
 
 void bnx2x_pre_irq_nic_init(struct bnx2x *bp)
@@ -6553,7 +6537,6 @@ void bnx2x_post_irq_nic_init(struct bnx2x *bp, u32 load_code)
 
 	/* flush all before enabling interrupts */
 	mb();
-	mmiowb();
 
 	bnx2x_int_enable(bp);
 
@@ -7775,12 +7758,10 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, bool is_pf)
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 			 data, igu_addr_data);
 	REG_WR(bp, igu_addr_data, data);
-	mmiowb();
 	barrier();
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 			  ctl, igu_addr_ctl);
 	REG_WR(bp, igu_addr_ctl, ctl);
-	mmiowb();
 	barrier();
 
 	/* wait for clean up to finish */
@@ -9550,7 +9531,6 @@ static void bnx2x_set_234_gates(struct bnx2x *bp, bool close)
 
 	DP(NETIF_MSG_HW | NETIF_MSG_IFUP, "%s gates #2, #3 and #4\n",
 		close ? "closing" : "opening");
-	mmiowb();
 }
 
 #define SHARED_MF_CLP_MAGIC  0x80000000 /* `magic' bit */
@@ -9674,7 +9654,6 @@ static void bnx2x_pxp_prep(struct bnx2x *bp)
 	if (!CHIP_IS_E1(bp)) {
 		REG_WR(bp, PXP2_REG_RD_START_INIT, 0);
 		REG_WR(bp, PXP2_REG_RQ_RBC_DONE, 0);
-		mmiowb();
 	}
 }
 
@@ -9774,16 +9753,13 @@ static void bnx2x_process_kill_chip_reset(struct bnx2x *bp, bool global)
 	       reset_mask1 & (~not_reset_mask1));
 
 	barrier();
-	mmiowb();
 
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
 	       reset_mask2 & (~stay_reset2));
 
 	barrier();
-	mmiowb();
 
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, reset_mask1);
-	mmiowb();
 }
 
 /**
@@ -9867,9 +9843,6 @@ static int bnx2x_process_kill(struct bnx2x *bp, bool global)
 	REG_WR(bp, MISC_REG_UNPREPARED, 0);
 	barrier();
 
-	/* Make sure all is written to the chip before the reset */
-	mmiowb();
-
 	/* Wait for 1ms to empty GLUE and PCI-E core queues,
 	 * PSWHST, GRC and PSWRD Tetris buffer.
 	 */
@@ -14828,7 +14801,6 @@ static int bnx2x_drv_ctl(struct net_device *dev, struct drv_ctl_info *ctl)
 		if (rc)
 			break;
 
-		mmiowb();
 		barrier();
 
 		/* Start accepting on iSCSI L2 ring */
@@ -14863,7 +14835,6 @@ static int bnx2x_drv_ctl(struct net_device *dev, struct drv_ctl_info *ctl)
 		if (!bnx2x_wait_sp_comp(bp, sp_bits))
 			BNX2X_ERR("rx_mode completion timed out!\n");
 
-		mmiowb();
 		barrier();
 
 		/* Unset iSCSI L2 MAC */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 7b22a6d8514c6..80d250a6d048e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -5039,7 +5039,6 @@ static inline int bnx2x_q_init(struct bnx2x *bp,
 	/* As no ramrod is sent, complete the command immediately  */
 	o->complete_cmd(bp, o, BNX2X_Q_CMD_INIT);
 
-	mmiowb();
 	smp_mb();
 
 	return 0;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index c97b642e65379..0edbb0a768472 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -100,13 +100,11 @@ static void bnx2x_vf_igu_ack_sb(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 	   cmd_data.sb_id_and_flags, igu_addr_data);
 	REG_WR(bp, igu_addr_data, cmd_data.sb_id_and_flags);
-	mmiowb();
 	barrier();
 
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 	   ctl, igu_addr_ctl);
 	REG_WR(bp, igu_addr_ctl, ctl);
-	mmiowb();
 	barrier();
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index a9bdc21873d32..672b57f0b84d9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -172,8 +172,6 @@ static int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping)
 	/* Trigger the PF FW */
 	writeb_relaxed(1, &zone_data->trigger.vf_pf_channel.addr_valid);
 
-	mmiowb();
-
 	/* Wait for PF to complete */
 	while ((tout >= 0) && (!*done)) {
 		msleep(interval);
@@ -1179,7 +1177,6 @@ static void bnx2x_vf_mbx_resp_send_msg(struct bnx2x *bp,
 
 	/* ack the FW */
 	storm_memset_vf_mbx_ack(bp, vf->abs_vfid);
-	mmiowb();
 
 	/* copy the response header including status-done field,
 	 * must be last dmae, must be after FW is acked
@@ -2174,7 +2171,6 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		 */
 		storm_memset_vf_mbx_ack(bp, vf->abs_vfid);
 		/* Firmware ack should be written before unlocking channel */
-		mmiowb();
 		bnx2x_unlock_vf_pf_channel(bp, vf, mbx->first_tlv.tl.type);
 	}
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0bb9d7b3a2b62..b8b68d408ad05 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -556,8 +556,6 @@ normal_tx:
 
 tx_done:
 
-	mmiowb();
-
 	if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) {
 		if (skb->xmit_more && !tx_buf->is_push)
 			bnxt_db_write(bp, &txr->tx_db, prod);
@@ -2123,7 +2121,6 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
 			       &dim_sample);
 		net_dim(&cpr->dim, dim_sample);
 	}
-	mmiowb();
 	return work_done;
 }
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 328373e0578ff..821bccc0915c6 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -1073,7 +1073,6 @@ static void tg3_int_reenable(struct tg3_napi *tnapi)
 	struct tg3 *tp = tnapi->tp;
 
 	tw32_mailbox(tnapi->int_mbox, tnapi->last_tag << 24);
-	mmiowb();
 
 	/* When doing tagged status, this work check is unnecessary.
 	 * The last_tag we write above tells the chip which piece of
@@ -6999,7 +6998,6 @@ next_pkt_nopost:
 			tw32_rx_mbox(TG3_RX_JMB_PROD_IDX_REG,
 				     tpr->rx_jmb_prod_idx);
 		}
-		mmiowb();
 	} else if (work_mask) {
 		/* rx_std_buffers[] and rx_jmb_buffers[] entries must be
 		 * updated before the producer indices can be updated.
@@ -7210,8 +7208,6 @@ static int tg3_poll_work(struct tg3_napi *tnapi, int work_done, int budget)
 			tw32_rx_mbox(TG3_RX_JMB_PROD_IDX_REG,
 				     dpr->rx_jmb_prod_idx);
 
-		mmiowb();
-
 		if (err)
 			tw32_f(HOSTCC_MODE, tp->coal_now);
 	}
@@ -7278,7 +7274,6 @@ static int tg3_poll_msix(struct napi_struct *napi, int budget)
 						  HOSTCC_MODE_ENABLE |
 						  tnapi->coal_now);
 			}
-			mmiowb();
 			break;
 		}
 	}
@@ -8159,7 +8154,6 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!skb->xmit_more || netif_xmit_stopped(txq)) {
 		/* Packets are ready, update Tx producer idx on card. */
 		tw32_tx_mbox(tnapi->prodmbox, entry);
-		mmiowb();
 	}
 
 	return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
index 2df7440f58df5..39643be8c30a9 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
@@ -38,9 +38,6 @@ int lio_cn6xxx_soft_reset(struct octeon_device *oct)
 	lio_pci_readq(oct, CN6XXX_CIU_SOFT_RST);
 	lio_pci_writeq(oct, 1, CN6XXX_CIU_SOFT_RST);
 
-	/* make sure that the reset is written before starting timer */
-	mmiowb();
-
 	/* Wait for 10ms as Octeon resets. */
 	mdelay(100);
 
@@ -487,9 +484,6 @@ void lio_cn6xxx_disable_interrupt(struct octeon_device *oct,
 
 	/* Disable Interrupts */
 	writeq(0, cn6xxx->intr_enb_reg64);
-
-	/* make sure interrupts are really disabled */
-	mmiowb();
 }
 
 static void lio_cn6xxx_get_pcie_qlmport(struct octeon_device *oct)
@@ -555,10 +549,6 @@ static int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
 				value &= ~(1 << oq_no);
 				octeon_write_csr(oct, reg, value);
 
-				/* Ensure that the enable register is written.
-				 */
-				mmiowb();
-
 				spin_unlock(&cn6xxx->lock_for_droq_int_enb_reg);
 			}
 		}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index ce8c3f8186668..934115d18488e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -1449,7 +1449,6 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
 		iq->pkt_in_done -= iq->pkts_processed;
 		iq->pkts_processed = 0;
 		/* this write needs to be flushed before we release the lock */
-		mmiowb();
 		spin_unlock_bh(&iq->lock);
 		oct = iq->oct_dev;
 	}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index a0c099f715245..017169023cca3 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -513,8 +513,6 @@ int octeon_retry_droq_refill(struct octeon_droq *droq)
 		 */
 		wmb();
 		writel(desc_refilled, droq->pkts_credit_reg);
-		/* make sure mmio write completes */
-		mmiowb();
 
 		if (pkts_credit + desc_refilled >= CN23XX_SLI_DEF_BP)
 			reschedule = 0;
@@ -712,8 +710,6 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 				 */
 				wmb();
 				writel(desc_refilled, droq->pkts_credit_reg);
-				/* make sure mmio write completes */
-				mmiowb();
 			}
 		}
 	}                       /* for (each packet)... */
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index c6f4cbda040f1..fcf20a8f92d94 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -278,7 +278,6 @@ ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
 	if (atomic_read(&oct->status) == OCT_DEV_RUNNING) {
 		writel(iq->fill_cnt, iq->doorbell_reg);
 		/* make sure doorbell write goes through */
-		mmiowb();
 		iq->fill_cnt = 0;
 		iq->last_db_time = jiffies;
 		return;
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 8fe9af0e2ab77..466bf1ea186d2 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -3270,11 +3270,6 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 		if (!skb->xmit_more ||
 		    netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
 			writel(tx_ring->next_to_use, hw->hw_addr + tx_ring->tdt);
-			/* we need this if more than one processor can write to
-			 * our tail at a time, it synchronizes IO on IA64/Altix
-			 * systems
-			 */
-			mmiowb();
 		}
 	} else {
 		dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 7acc61e4f6456..022c3ac0e40f2 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3816,7 +3816,6 @@ static void e1000_flush_tx_ring(struct e1000_adapter *adapter)
 	if (tx_ring->next_to_use == tx_ring->count)
 		tx_ring->next_to_use = 0;
 	ew32(TDT(0), tx_ring->next_to_use);
-	mmiowb();
 	usleep_range(200, 250);
 }
 
@@ -5904,12 +5903,6 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 						     tx_ring->next_to_use);
 			else
 				writel(tx_ring->next_to_use, tx_ring->tail);
-
-			/* we need this if more than one processor can write
-			 * to our tail at a time, it synchronizes IO on
-			 *IA64/Altix systems
-			 */
-			mmiowb();
 		}
 	} else {
 		dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 5d4f1761dc0c2..8de77155f2e7e 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -321,8 +321,6 @@ static void fm10k_mask_aer_comp_abort(struct pci_dev *pdev)
 	pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, &err_mask);
 	err_mask |= PCI_ERR_UNC_COMP_ABORT;
 	pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, err_mask);
-
-	mmiowb();
 }
 
 int fm10k_iov_resume(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 5a0419421511f..1f48298f01e65 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -1037,11 +1037,6 @@ static void fm10k_tx_map(struct fm10k_ring *tx_ring,
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 6c97667d20eff..ffb611bbedfa4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3471,11 +3471,6 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 9b4d7cec2e18a..6bfef82e7607b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -2360,11 +2360,6 @@ static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb,
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index c289d97f477d5..1af21bbe180ee 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1356,11 +1356,6 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 69b230c53fed5..09ba944967423 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6028,11 +6028,6 @@ static int igb_tx_map(struct igb_ring *tx_ring,
 
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 	return 0;
 
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 4eab83faec620..34cd30d7162f9 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2279,10 +2279,6 @@ static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
 	tx_ring->buffer_info[first].next_to_watch = tx_desc;
 	tx_ring->next_to_use = i;
 	writel(i, adapter->hw.hw_addr + tx_ring->tail);
-	/* we need this if more than one processor can write to our tail
-	 * at a time, it synchronizes IO on IA64/Altix systems
-	 */
-	mmiowb();
 }
 
 static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 87a11879bf2dc..f8d692f6aa4fe 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -892,11 +892,6 @@ static int igc_tx_map(struct igc_ring *tx_ring,
 
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index e100054a3765e..99e23cf6a73ab 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8299,11 +8299,6 @@ static int ixgbe_tx_map(struct ixgbe_ring *tx_ring,
 
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
 		writel(i, tx_ring->tail);
-
-		/* we need this if more than one processor can write to our tail
-		 * at a time, it synchronizes IO on IA64/Altix systems
-		 */
-		mmiowb();
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 8b3495ee2b6eb..49486c10ef813 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -1139,9 +1139,6 @@ static inline void sky2_put_idx(struct sky2_hw *hw, unsigned q, u16 idx)
 	/* Make sure write' to descriptors are complete before we tell hardware */
 	wmb();
 	sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx);
-
-	/* Synchronize I/O on since next processor may write to tail */
-	mmiowb();
 }
 
 
@@ -1354,7 +1351,6 @@ stopped:
 
 	/* reset the Rx prefetch unit */
 	sky2_write32(hw, Y2_QADDR(rxq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET);
-	mmiowb();
 }
 
 /* Clean out receive buffer area, assumes receiver hardware stopped */
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index c81d15bf259c8..87e90b5d4d7d0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -129,10 +129,6 @@ static int mlx4_reset_slave(struct mlx4_dev *dev)
 	comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
 	__raw_writel((__force u32)cpu_to_be32(comm_flags),
 		     (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
-	/* Make sure that our comm channel write doesn't
-	 * get mixed in with writes from another CPU.
-	 */
-	mmiowb();
 
 	end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
 	while (time_before(jiffies, end)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index a5d5d6fc1da00..c678344d22a2c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -281,7 +281,6 @@ static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
 	val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31);
 	__raw_writel((__force u32) cpu_to_be32(val),
 		     &priv->mfunc.comm->slave_write);
-	mmiowb();
 	mutex_unlock(&dev->persist->device_state_mutex);
 	return 0;
 }
@@ -496,12 +495,6 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 					       (op_modifier << HCR_OPMOD_SHIFT) |
 					       op), hcr + 6);
 
-	/*
-	 * Make sure that our HCR writes don't get mixed in with
-	 * writes from another CPU starting a FW command.
-	 */
-	mmiowb();
-
 	cmd->toggle = cmd->toggle ^ 1;
 
 	ret = 0;
@@ -2206,7 +2199,6 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
 	}
 	__raw_writel((__force u32) cpu_to_be32(reply),
 		     &priv->mfunc.comm[slave].slave_read);
-	mmiowb();
 
 	return;
 
@@ -2410,7 +2402,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 				     &priv->mfunc.comm[i].slave_write);
 			__raw_writel((__force u32) 0,
 				     &priv->mfunc.comm[i].slave_read);
-			mmiowb();
 			for (port = 1; port <= MLX4_MAX_PORTS; port++) {
 				struct mlx4_vport_state *admin_vport;
 				struct mlx4_vport_state *oper_vport;
@@ -2576,10 +2567,6 @@ void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
 		slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
 		__raw_writel((__force u32)cpu_to_be32(slave_read),
 			     &priv->mfunc.comm[slave].slave_read);
-		/* Make sure that our comm channel write doesn't
-		 * get mixed in with writes from another CPU.
-		 */
-		mmiowb();
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index be48c6440251f..c087d1014b097 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -917,7 +917,6 @@ static void cmd_work_handler(struct work_struct *work)
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
 	iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
-	mmiowb();
 	/* if not in polling don't use ent after this point */
 	if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
 		poll_timeout(ent);
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index e0340f778d8fb..d8b7fba96d58e 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -1439,7 +1439,6 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
 			tx->queue_active = 0;
 			put_be32(htonl(1), tx->send_stop);
 			mb();
-			mmiowb();
 		}
 		__netif_tx_unlock(dev_queue);
 	}
@@ -2861,7 +2860,6 @@ again:
 		tx->queue_active = 1;
 		put_be32(htonl(1), tx->send_go);
 		mb();
-		mmiowb();
 	}
 	tx->pkt_start++;
 	if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index feda9644289d4..3b2ae1a216786 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -4153,8 +4153,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	writeq(val64, &tx_fifo->List_Control);
 
-	mmiowb();
-
 	put_off++;
 	if (put_off == fifo->tx_curr_put_info.fifo_len + 1)
 		put_off = 0;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index b877acec5cde6..1d334f2e0a568 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -1826,7 +1826,6 @@ static int vxge_poll_msix(struct napi_struct *napi, int budget)
 		vxge_hw_channel_msix_unmask(
 				(struct __vxge_hw_channel *)ring->handle,
 				ring->rx_vector_no);
-		mmiowb();
 	}
 
 	/* We are copying and returning the local variable, in case if after
@@ -2234,8 +2233,6 @@ static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id)
 	vxge_hw_channel_msix_unmask((struct __vxge_hw_channel *)fifo->handle,
 				    fifo->tx_vector_no);
 
-	mmiowb();
-
 	return IRQ_HANDLED;
 }
 
@@ -2272,14 +2269,12 @@ vxge_alarm_msix_handle(int irq, void *dev_id)
 		 */
 		vxge_hw_vpath_msix_mask(vdev->vpaths[i].handle, msix_id);
 		vxge_hw_vpath_msix_clear(vdev->vpaths[i].handle, msix_id);
-		mmiowb();
 
 		status = vxge_hw_vpath_alarm_process(vdev->vpaths[i].handle,
 			vdev->exec_mode);
 		if (status == VXGE_HW_OK) {
 			vxge_hw_vpath_msix_unmask(vdev->vpaths[i].handle,
 						  msix_id);
-			mmiowb();
 			continue;
 		}
 		vxge_debug_intr(VXGE_ERR,
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
index 59e77e3086bb3..709d20d9938fb 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
@@ -1399,11 +1399,7 @@ static void __vxge_hw_non_offload_db_post(struct __vxge_hw_fifo *fifo,
 		VXGE_HW_NODBW_GET_NO_SNOOP(no_snoop),
 		&fifo->nofl_db->control_0);
 
-	mmiowb();
-
 	writeq(txdl_ptr, &fifo->nofl_db->txdl_ptr);
-
-	mmiowb();
 }
 
 /**
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index e23980e301b6a..69e6a90edf2fe 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -774,18 +774,12 @@ static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn,
 {
 	u16 rc = 0, index;
 
-	/* Make certain HW write took affect */
-	mmiowb();
-
 	index = le16_to_cpu(p_sb_desc->sb_attn->sb_index);
 	if (p_sb_desc->index != index) {
 		p_sb_desc->index	= index;
 		rc		      = QED_SB_ATT_IDX;
 	}
 
-	/* Make certain we got a consistent view with HW */
-	mmiowb();
-
 	return rc;
 }
 
@@ -1170,7 +1164,6 @@ static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn,
 	/* Both segments (interrupts & acks) are written to same place address;
 	 * Need to guarantee all commands will be received (in-order) by HW.
 	 */
-	mmiowb();
 	barrier();
 }
 
@@ -1805,9 +1798,6 @@ static void qed_int_igu_enable_attn(struct qed_hwfn *p_hwfn,
 	qed_wr(p_hwfn, p_ptt, IGU_REG_TRAILING_EDGE_LATCH, 0xfff);
 	qed_wr(p_hwfn, p_ptt, IGU_REG_ATTENTION_ENABLE, 0xfff);
 
-	/* Flush the writes to IGU */
-	mmiowb();
-
 	/* Unmask AEU signals toward IGU */
 	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff);
 }
@@ -1871,9 +1861,6 @@ static void qed_int_igu_cleanup_sb(struct qed_hwfn *p_hwfn,
 
 	qed_wr(p_hwfn, p_ptt, IGU_REG_COMMAND_REG_CTRL, cmd_ctrl);
 
-	/* Flush the write to IGU */
-	mmiowb();
-
 	/* calculate where to read the status bit from */
 	sb_bit = 1 << (igu_sb_id % 32);
 	sb_bit_addr = igu_sb_id / 32 * sizeof(u32);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 79b311b86f664..f5f3c03b9dd24 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -341,9 +341,6 @@ void qed_eq_prod_update(struct qed_hwfn *p_hwfn, u16 prod)
 		   USTORM_EQE_CONS_OFFSET(p_hwfn->rel_pf_id);
 
 	REG_WR16(p_hwfn, addr, prod);
-
-	/* keep prod updates ordered */
-	mmiowb();
 }
 
 int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index b4c8949933f16..4555c0b161efb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -1526,14 +1526,6 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 	barrier();
 	writel(txq->tx_db.raw, txq->doorbell_addr);
 
-	/* mmiowb is needed to synchronize doorbell writes from more than one
-	 * processor. It guarantees that the write arrives to the device before
-	 * the queue lock is released and another start_xmit is called (possibly
-	 * on another CPU). Without this barrier, the next doorbell can bypass
-	 * this doorbell. This is applicable to IA64/Altix systems.
-	 */
-	mmiowb();
-
 	for (i = 0; i < QEDE_SELFTEST_POLL_COUNT; i++) {
 		if (qede_txq_has_work(txq))
 			break;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 31b046e24565f..6f7e3622c6b41 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -580,14 +580,6 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq)
 
 	internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
 			(u32 *)&rx_prods);
-
-	/* mmiowb is needed to synchronize doorbell writes from more than one
-	 * processor. It guarantees that the write arrives to the device before
-	 * the napi lock is released and another qede_poll is called (possibly
-	 * on another CPU). Without this barrier, the next doorbell can bypass
-	 * this doorbell. This is applicable to IA64/Altix systems.
-	 */
-	mmiowb();
 }
 
 static void qede_get_rxhash(struct sk_buff *skb, u8 bitfields, __le32 rss_hash)
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index b61b88cbc0c7d..457444894d807 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -1858,7 +1858,6 @@ static void ql_update_small_bufq_prod_index(struct ql3_adapter *qdev)
 		wmb();
 		writel_relaxed(qdev->small_buf_q_producer_index,
 			       &port_regs->CommonRegs.rxSmallQProducerIndex);
-		mmiowb();
 	}
 }
 
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 3e71b65a95465..ad7c5eb8a3b6b 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -2181,7 +2181,6 @@ static inline void ql_write32(const struct ql_adapter *qdev, int reg, u32 val)
 static inline void ql_write_db_reg(u32 val, void __iomem *addr)
 {
 	writel(val, addr);
-	mmiowb();
 }
 
 /*
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 07e1c623048e5..6cae33072496c 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2695,7 +2695,6 @@ static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
 	wmb();
 
 	ql_write_db_reg_relaxed(tx_ring->prod_idx, tx_ring->prod_idx_db_reg);
-	mmiowb();
 	netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
 		     "tx queued, slot %d, len %d\n",
 		     tx_ring->prod_idx, skb->len);
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 8154b38c08f71..316b47741d3f7 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -728,7 +728,6 @@ static irqreturn_t ravb_emac_interrupt(int irq, void *dev_id)
 
 	spin_lock(&priv->lock);
 	ravb_emac_interrupt_unlocked(ndev);
-	mmiowb();
 	spin_unlock(&priv->lock);
 	return IRQ_HANDLED;
 }
@@ -848,7 +847,6 @@ static irqreturn_t ravb_interrupt(int irq, void *dev_id)
 		result = IRQ_HANDLED;
 	}
 
-	mmiowb();
 	spin_unlock(&priv->lock);
 	return result;
 }
@@ -881,7 +879,6 @@ static irqreturn_t ravb_multi_interrupt(int irq, void *dev_id)
 		result = IRQ_HANDLED;
 	}
 
-	mmiowb();
 	spin_unlock(&priv->lock);
 	return result;
 }
@@ -898,7 +895,6 @@ static irqreturn_t ravb_dma_interrupt(int irq, void *dev_id, int q)
 	if (ravb_queue_interrupt(ndev, q))
 		result = IRQ_HANDLED;
 
-	mmiowb();
 	spin_unlock(&priv->lock);
 	return result;
 }
@@ -943,7 +939,6 @@ static int ravb_poll(struct napi_struct *napi, int budget)
 			ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
 			ravb_tx_free(ndev, q, true);
 			netif_wake_subqueue(ndev, q);
-			mmiowb();
 			spin_unlock_irqrestore(&priv->lock, flags);
 		}
 	}
@@ -959,7 +954,6 @@ static int ravb_poll(struct napi_struct *napi, int budget)
 		ravb_write(ndev, mask, RIE0);
 		ravb_write(ndev, mask, TIE);
 	}
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* Receive error message handling */
@@ -1008,7 +1002,6 @@ static void ravb_adjust_link(struct net_device *ndev)
 	if (priv->no_avb_link && phydev->link)
 		ravb_rcv_snd_enable(ndev);
 
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	if (new_state && netif_msg_link(priv))
@@ -1601,7 +1594,6 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		netif_stop_subqueue(ndev, q);
 
 exit:
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 	return NETDEV_TX_OK;
 
@@ -1673,7 +1665,6 @@ static void ravb_set_rx_mode(struct net_device *ndev)
 	spin_lock_irqsave(&priv->lock, flags);
 	ravb_modify(ndev, ECMR, ECMR_PRM,
 		    ndev->flags & IFF_PROMISC ? ECMR_PRM : 0);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index dce2a40a31e33..9a42580693cb1 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c
@@ -196,7 +196,6 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
 		ravb_write(ndev, GIE_PTCS, GIE);
 	else
 		ravb_write(ndev, GID_PTCD, GID);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return 0;
@@ -259,7 +258,6 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
 		else
 			ravb_write(ndev, GID_PTMD0, GID);
 	}
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return error;
@@ -331,7 +329,6 @@ void ravb_ptp_init(struct net_device *ndev, struct platform_device *pdev)
 	spin_lock_irqsave(&priv->lock, flags);
 	ravb_wait(ndev, GCCR, GCCR_TCR, GCCR_TCR_NOREQ);
 	ravb_modify(ndev, GCCR, GCCR_TCSS, GCCR_TCSS_ADJGPTP);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	priv->ptp.clock = ptp_clock_register(&priv->ptp.info, &pdev->dev);
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index e33af371b169e..ed30aebdb941a 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2010,7 +2010,6 @@ static void sh_eth_adjust_link(struct net_device *ndev)
 	if ((mdp->cd->no_psr || mdp->no_ether_link) && phydev->link)
 		sh_eth_rcv_snd_enable(ndev);
 
-	mmiowb();
 	spin_unlock_irqrestore(&mdp->lock, flags);
 
 	if (new_state && netif_msg_link(mdp))
diff --git a/drivers/net/ethernet/sfc/falcon/io.h b/drivers/net/ethernet/sfc/falcon/io.h
index 7085ee1d5e2b6..c3577643fbdae 100644
--- a/drivers/net/ethernet/sfc/falcon/io.h
+++ b/drivers/net/ethernet/sfc/falcon/io.h
@@ -108,7 +108,6 @@ static inline void ef4_writeo(struct ef4_nic *efx, const ef4_oword_t *value,
 	_ef4_writed(efx, value->u32[2], reg + 8);
 	_ef4_writed(efx, value->u32[3], reg + 12);
 #endif
-	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
 
@@ -130,7 +129,6 @@ static inline void ef4_sram_writeq(struct ef4_nic *efx, void __iomem *membase,
 	__raw_writel((__force u32)value->u32[0], membase + addr);
 	__raw_writel((__force u32)value->u32[1], membase + addr + 4);
 #endif
-	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
 
diff --git a/drivers/net/ethernet/sfc/io.h b/drivers/net/ethernet/sfc/io.h
index 89563170af521..2774a10f44e95 100644
--- a/drivers/net/ethernet/sfc/io.h
+++ b/drivers/net/ethernet/sfc/io.h
@@ -120,7 +120,6 @@ static inline void efx_writeo(struct efx_nic *efx, const efx_oword_t *value,
 	_efx_writed(efx, value->u32[2], reg + 8);
 	_efx_writed(efx, value->u32[3], reg + 12);
 #endif
-	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
 
@@ -142,7 +141,6 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
 	__raw_writel((__force u32)value->u32[0], membase + addr);
 	__raw_writel((__force u32)value->u32[1], membase + addr + 4);
 #endif
-	mmiowb();
 	spin_unlock_irqrestore(&efx->biu_lock, flags);
 }
 
diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index c07fd594fe71a..db5dc8ce0aff8 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c
@@ -361,7 +361,6 @@ static void sc92031_disable_interrupts(struct net_device *dev)
 	/* stop interrupts */
 	iowrite32(0, port_base + IntrMask);
 	_sc92031_dummy_read(port_base);
-	mmiowb();
 
 	/* wait for any concurrent interrupt/tasklet to finish */
 	synchronize_irq(priv->pdev->irq);
@@ -379,7 +378,6 @@ static void sc92031_enable_interrupts(struct net_device *dev)
 	wmb();
 
 	iowrite32(IntrBits, port_base + IntrMask);
-	mmiowb();
 }
 
 static void _sc92031_disable_tx_rx(struct net_device *dev)
@@ -867,7 +865,6 @@ out:
 	rmb();
 
 	iowrite32(intr_mask, port_base + IntrMask);
-	mmiowb();
 
 	spin_unlock(&priv->lock);
 }
@@ -901,7 +898,6 @@ out_none:
 	rmb();
 
 	iowrite32(intr_mask, port_base + IntrMask);
-	mmiowb();
 
 	return IRQ_NONE;
 }
@@ -978,7 +974,6 @@ static netdev_tx_t sc92031_start_xmit(struct sk_buff *skb,
 	iowrite32(priv->tx_bufs_dma_addr + entry * TX_BUF_SIZE,
 			port_base + TxAddr0 + entry * 4);
 	iowrite32(tx_status, port_base + TxStatus0 + entry * 4);
-	mmiowb();
 
 	if (priv->tx_head - priv->tx_tail >= NUM_TX_DESC)
 		netif_stop_queue(dev);
@@ -1024,7 +1019,6 @@ static int sc92031_open(struct net_device *dev)
 	spin_lock_bh(&priv->lock);
 
 	_sc92031_reset(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 	sc92031_enable_interrupts(dev);
@@ -1060,7 +1054,6 @@ static int sc92031_stop(struct net_device *dev)
 
 	_sc92031_disable_tx_rx(dev);
 	_sc92031_tx_clear(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1081,7 +1074,6 @@ static void sc92031_set_multicast_list(struct net_device *dev)
 
 	_sc92031_set_mar(dev);
 	_sc92031_set_rx_config(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 }
@@ -1098,7 +1090,6 @@ static void sc92031_tx_timeout(struct net_device *dev)
 	priv->tx_timeouts++;
 
 	_sc92031_reset(dev);
-	mmiowb();
 
 	spin_unlock(&priv->lock);
 
@@ -1140,7 +1131,6 @@ sc92031_ethtool_get_link_ksettings(struct net_device *dev,
 
 	output_status = _sc92031_mii_read(port_base, MII_OutputStatus);
 	_sc92031_mii_scan(port_base);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1311,7 +1301,6 @@ static int sc92031_ethtool_set_wol(struct net_device *dev,
 
 	priv->pm_config = pm_config;
 	iowrite32(pm_config, port_base + PMConfig);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1337,7 +1326,6 @@ static int sc92031_ethtool_nway_reset(struct net_device *dev)
 
 out:
 	_sc92031_mii_scan(port_base);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1530,7 +1518,6 @@ static int sc92031_suspend(struct pci_dev *pdev, pm_message_t state)
 
 	_sc92031_disable_tx_rx(dev);
 	_sc92031_tx_clear(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 
@@ -1555,7 +1542,6 @@ static int sc92031_resume(struct pci_dev *pdev)
 	spin_lock_bh(&priv->lock);
 
 	_sc92031_reset(dev);
-	mmiowb();
 
 	spin_unlock_bh(&priv->lock);
 	sc92031_enable_interrupts(dev);
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 33949248c829e..ab55416a10fab 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -571,7 +571,6 @@ static void rhine_ack_events(struct rhine_private *rp, u32 mask)
 	if (rp->quirks & rqStatusWBRace)
 		iowrite8(mask >> 16, ioaddr + IntrStatus2);
 	iowrite16(mask, ioaddr + IntrStatus);
-	mmiowb();
 }
 
 /*
@@ -863,7 +862,6 @@ static int rhine_napipoll(struct napi_struct *napi, int budget)
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
 		iowrite16(enable_mask, ioaddr + IntrEnable);
-		mmiowb();
 	}
 	return work_done;
 }
@@ -1893,7 +1891,6 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 static void rhine_irq_disable(struct rhine_private *rp)
 {
 	iowrite16(0x0000, rp->base + IntrEnable);
-	mmiowb();
 }
 
 /* The interrupt handler does all of the Rx thread work and cleans up
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index d8ba512f166ad..1713c2d2dccf2 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -219,7 +219,6 @@ static inline int __w5100_write_direct(struct net_device *ndev, u32 addr,
 static inline int w5100_write_direct(struct net_device *ndev, u32 addr, u8 data)
 {
 	__w5100_write_direct(ndev, addr, data);
-	mmiowb();
 
 	return 0;
 }
@@ -236,7 +235,6 @@ static int w5100_write16_direct(struct net_device *ndev, u32 addr, u16 data)
 {
 	__w5100_write_direct(ndev, addr, data >> 8);
 	__w5100_write_direct(ndev, addr + 1, data);
-	mmiowb();
 
 	return 0;
 }
@@ -260,8 +258,6 @@ static int w5100_writebulk_direct(struct net_device *ndev, u32 addr,
 	for (i = 0; i < len; i++, addr++)
 		__w5100_write_direct(ndev, addr, *buf++);
 
-	mmiowb();
-
 	return 0;
 }
 
@@ -375,7 +371,6 @@ static int w5100_readbulk_indirect(struct net_device *ndev, u32 addr, u8 *buf,
 	for (i = 0; i < len; i++)
 		*buf++ = w5100_read_direct(ndev, W5100_IDM_DR);
 
-	mmiowb();
 	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
 
 	return 0;
@@ -394,7 +389,6 @@ static int w5100_writebulk_indirect(struct net_device *ndev, u32 addr,
 	for (i = 0; i < len; i++)
 		__w5100_write_direct(ndev, W5100_IDM_DR, *buf++);
 
-	mmiowb();
 	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
 
 	return 0;
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index f9da5d6172e38..3f03eecc04798 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -141,7 +141,6 @@ static u16 w5300_read_indirect(struct w5300_priv *priv, u16 addr)
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5300_write_direct(priv, W5300_IDM_AR, addr);
-	mmiowb();
 	data = w5300_read_direct(priv, W5300_IDM_DR);
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
 
@@ -154,9 +153,7 @@ static void w5300_write_indirect(struct w5300_priv *priv, u16 addr, u16 data)
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5300_write_direct(priv, W5300_IDM_AR, addr);
-	mmiowb();
 	w5300_write_direct(priv, W5300_IDM_DR, data);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
 }
 
@@ -192,7 +189,6 @@ static int w5300_command(struct w5300_priv *priv, u16 cmd)
 	unsigned long timeout = jiffies + msecs_to_jiffies(100);
 
 	w5300_write(priv, W5300_S0_CR, cmd);
-	mmiowb();
 
 	while (w5300_read(priv, W5300_S0_CR) != 0) {
 		if (time_after(jiffies, timeout))
@@ -241,18 +237,15 @@ static void w5300_write_macaddr(struct w5300_priv *priv)
 	w5300_write(priv, W5300_SHARH,
 		      ndev->dev_addr[4] << 8 |
 		      ndev->dev_addr[5]);
-	mmiowb();
 }
 
 static void w5300_hw_reset(struct w5300_priv *priv)
 {
 	w5300_write_direct(priv, W5300_MR, MR_RST);
-	mmiowb();
 	mdelay(5);
 	w5300_write_direct(priv, W5300_MR, priv->indirect ?
 				 MR_WDF(7) | MR_PB | MR_IND :
 				 MR_WDF(7) | MR_PB);
-	mmiowb();
 	w5300_write(priv, W5300_IMR, 0);
 	w5300_write_macaddr(priv);
 
@@ -264,24 +257,20 @@ static void w5300_hw_reset(struct w5300_priv *priv)
 	w5300_write32(priv, W5300_TMSRL, 64 << 24);
 	w5300_write32(priv, W5300_TMSRH, 0);
 	w5300_write(priv, W5300_MTYPE, 0x00ff);
-	mmiowb();
 }
 
 static void w5300_hw_start(struct w5300_priv *priv)
 {
 	w5300_write(priv, W5300_S0_MR, priv->promisc ?
 			  S0_MR_MACRAW : S0_MR_MACRAW_MF);
-	mmiowb();
 	w5300_command(priv, S0_CR_OPEN);
 	w5300_write(priv, W5300_S0_IMR, S0_IR_RECV | S0_IR_SENDOK);
 	w5300_write(priv, W5300_IMR, IR_S0);
-	mmiowb();
 }
 
 static void w5300_hw_close(struct w5300_priv *priv)
 {
 	w5300_write(priv, W5300_IMR, 0);
-	mmiowb();
 	w5300_command(priv, S0_CR_CLOSE);
 }
 
@@ -372,7 +361,6 @@ static netdev_tx_t w5300_start_tx(struct sk_buff *skb, struct net_device *ndev)
 	netif_stop_queue(ndev);
 
 	w5300_write_frame(priv, skb->data, skb->len);
-	mmiowb();
 	ndev->stats.tx_packets++;
 	ndev->stats.tx_bytes += skb->len;
 	dev_kfree_skb(skb);
@@ -419,7 +407,6 @@ static int w5300_napi_poll(struct napi_struct *napi, int budget)
 	if (rx_count < budget) {
 		napi_complete_done(napi, rx_count);
 		w5300_write(priv, W5300_IMR, IR_S0);
-		mmiowb();
 	}
 
 	return rx_count;
@@ -434,7 +421,6 @@ static irqreturn_t w5300_interrupt(int irq, void *ndev_instance)
 	if (!ir)
 		return IRQ_NONE;
 	w5300_write(priv, W5300_S0_IR, ir);
-	mmiowb();
 
 	if (ir & S0_IR_SENDOK) {
 		netif_dbg(priv, tx_done, ndev, "tx done\n");
@@ -444,7 +430,6 @@ static irqreturn_t w5300_interrupt(int irq, void *ndev_instance)
 	if (ir & S0_IR_RECV) {
 		if (napi_schedule_prep(&priv->napi)) {
 			w5300_write(priv, W5300_IMR, 0);
-			mmiowb();
 			__napi_schedule(&priv->napi);
 		}
 	}
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index a2351ef45ae0e..65a4c142640d0 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -837,7 +837,6 @@ ath5k_txbuf_setup(struct ath5k_hw *ah, struct ath5k_buf *bf,
 
 	txq->link = &ds->ds_link;
 	ath5k_hw_start_tx_dma(ah, txq->qnum);
-	mmiowb();
 	spin_unlock_bh(&txq->lock);
 
 	return 0;
@@ -2174,7 +2173,6 @@ ath5k_beacon_config(struct ath5k_hw *ah)
 	}
 
 	ath5k_hw_set_imr(ah, ah->imask);
-	mmiowb();
 	spin_unlock_bh(&ah->block);
 }
 
@@ -2779,7 +2777,6 @@ int ath5k_start(struct ieee80211_hw *hw)
 
 	ret = 0;
 done:
-	mmiowb();
 	mutex_unlock(&ah->lock);
 
 	set_bit(ATH_STAT_STARTED, ah->status);
@@ -2839,7 +2836,6 @@ void ath5k_stop(struct ieee80211_hw *hw)
 				"putting device to sleep\n");
 	}
 
-	mmiowb();
 	mutex_unlock(&ah->lock);
 
 	ath5k_stop_tasklets(ah);
diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
index 16e052d02c940..5e866a193ed04 100644
--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c
+++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
@@ -263,7 +263,6 @@ ath5k_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		memcpy(common->curbssid, bss_conf->bssid, ETH_ALEN);
 		common->curaid = 0;
 		ath5k_hw_set_bssid(ah);
-		mmiowb();
 	}
 
 	if (changes & BSS_CHANGED_BEACON_INT)
@@ -528,7 +527,6 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		ret = -EINVAL;
 	}
 
-	mmiowb();
 	mutex_unlock(&ah->lock);
 	return ret;
 }
diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c
index 74be3c809225a..4c7980f845912 100644
--- a/drivers/net/wireless/broadcom/b43/main.c
+++ b/drivers/net/wireless/broadcom/b43/main.c
@@ -485,7 +485,6 @@ static void b43_ram_write(struct b43_wldev *dev, u16 offset, u32 val)
 		val = swab32(val);
 
 	b43_write32(dev, B43_MMIO_RAM_CONTROL, offset);
-	mmiowb();
 	b43_write32(dev, B43_MMIO_RAM_DATA, val);
 }
 
@@ -656,9 +655,7 @@ static void b43_tsf_write_locked(struct b43_wldev *dev, u64 tsf)
 	/* The hardware guarantees us an atomic write, if we
 	 * write the low register first. */
 	b43_write32(dev, B43_MMIO_REV3PLUS_TSF_LOW, low);
-	mmiowb();
 	b43_write32(dev, B43_MMIO_REV3PLUS_TSF_HIGH, high);
-	mmiowb();
 }
 
 void b43_tsf_write(struct b43_wldev *dev, u64 tsf)
@@ -1822,11 +1819,9 @@ static void b43_beacon_update_trigger_work(struct work_struct *work)
 		if (b43_bus_host_is_sdio(dev->dev)) {
 			/* wl->mutex is enough. */
 			b43_do_beacon_update_trigger_work(dev);
-			mmiowb();
 		} else {
 			spin_lock_irq(&wl->hardirq_lock);
 			b43_do_beacon_update_trigger_work(dev);
-			mmiowb();
 			spin_unlock_irq(&wl->hardirq_lock);
 		}
 	}
@@ -2078,7 +2073,6 @@ static irqreturn_t b43_interrupt_thread_handler(int irq, void *dev_id)
 
 	mutex_lock(&dev->wl->mutex);
 	b43_do_interrupt_thread(dev);
-	mmiowb();
 	mutex_unlock(&dev->wl->mutex);
 
 	return IRQ_HANDLED;
@@ -2143,7 +2137,6 @@ static irqreturn_t b43_interrupt_handler(int irq, void *dev_id)
 
 	spin_lock(&dev->wl->hardirq_lock);
 	ret = b43_do_interrupt(dev);
-	mmiowb();
 	spin_unlock(&dev->wl->hardirq_lock);
 
 	return ret;
diff --git a/drivers/net/wireless/broadcom/b43/sysfs.c b/drivers/net/wireless/broadcom/b43/sysfs.c
index 3190493bd07f4..93d03b673670d 100644
--- a/drivers/net/wireless/broadcom/b43/sysfs.c
+++ b/drivers/net/wireless/broadcom/b43/sysfs.c
@@ -129,7 +129,6 @@ static ssize_t b43_attr_interfmode_store(struct device *dev,
 	} else
 		err = -ENOSYS;
 
-	mmiowb();
 	mutex_unlock(&wldev->wl->mutex);
 
 	return err ? err : count;
diff --git a/drivers/net/wireless/broadcom/b43legacy/ilt.c b/drivers/net/wireless/broadcom/b43legacy/ilt.c
index ee5682e54204d..6d15fb4d30c6a 100644
--- a/drivers/net/wireless/broadcom/b43legacy/ilt.c
+++ b/drivers/net/wireless/broadcom/b43legacy/ilt.c
@@ -315,14 +315,12 @@ const u16 b43legacy_ilt_sigmasqr2[B43legacy_ILT_SIGMASQR_SIZE] = {
 void b43legacy_ilt_write(struct b43legacy_wldev *dev, u16 offset, u16 val)
 {
 	b43legacy_phy_write(dev, B43legacy_PHY_ILT_G_CTRL, offset);
-	mmiowb();
 	b43legacy_phy_write(dev, B43legacy_PHY_ILT_G_DATA1, val);
 }
 
 void b43legacy_ilt_write32(struct b43legacy_wldev *dev, u16 offset, u32 val)
 {
 	b43legacy_phy_write(dev, B43legacy_PHY_ILT_G_CTRL, offset);
-	mmiowb();
 	b43legacy_phy_write(dev, B43legacy_PHY_ILT_G_DATA2,
 			    (val & 0xFFFF0000) >> 16);
 	b43legacy_phy_write(dev, B43legacy_PHY_ILT_G_DATA1,
diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
index 55f411925960e..c777efc6dc131 100644
--- a/drivers/net/wireless/broadcom/b43legacy/main.c
+++ b/drivers/net/wireless/broadcom/b43legacy/main.c
@@ -264,7 +264,6 @@ static void b43legacy_ram_write(struct b43legacy_wldev *dev, u16 offset,
 		val = swab32(val);
 
 	b43legacy_write32(dev, B43legacy_MMIO_RAM_CONTROL, offset);
-	mmiowb();
 	b43legacy_write32(dev, B43legacy_MMIO_RAM_DATA, val);
 }
 
@@ -341,14 +340,11 @@ void b43legacy_shm_write32(struct b43legacy_wldev *dev,
 		if (offset & 0x0003) {
 			/* Unaligned access */
 			b43legacy_shm_control_word(dev, routing, offset >> 2);
-			mmiowb();
 			b43legacy_write16(dev,
 					  B43legacy_MMIO_SHM_DATA_UNALIGNED,
 					  (value >> 16) & 0xffff);
-			mmiowb();
 			b43legacy_shm_control_word(dev, routing,
 						   (offset >> 2) + 1);
-			mmiowb();
 			b43legacy_write16(dev, B43legacy_MMIO_SHM_DATA,
 					  value & 0xffff);
 			return;
@@ -356,7 +352,6 @@ void b43legacy_shm_write32(struct b43legacy_wldev *dev,
 		offset >>= 2;
 	}
 	b43legacy_shm_control_word(dev, routing, offset);
-	mmiowb();
 	b43legacy_write32(dev, B43legacy_MMIO_SHM_DATA, value);
 }
 
@@ -368,7 +363,6 @@ void b43legacy_shm_write16(struct b43legacy_wldev *dev, u16 routing, u16 offset,
 		if (offset & 0x0003) {
 			/* Unaligned access */
 			b43legacy_shm_control_word(dev, routing, offset >> 2);
-			mmiowb();
 			b43legacy_write16(dev,
 					  B43legacy_MMIO_SHM_DATA_UNALIGNED,
 					  value);
@@ -377,7 +371,6 @@ void b43legacy_shm_write16(struct b43legacy_wldev *dev, u16 routing, u16 offset,
 		offset >>= 2;
 	}
 	b43legacy_shm_control_word(dev, routing, offset);
-	mmiowb();
 	b43legacy_write16(dev, B43legacy_MMIO_SHM_DATA, value);
 }
 
@@ -471,7 +464,6 @@ static void b43legacy_time_lock(struct b43legacy_wldev *dev)
 	status = b43legacy_read32(dev, B43legacy_MMIO_MACCTL);
 	status |= B43legacy_MACCTL_TBTTHOLD;
 	b43legacy_write32(dev, B43legacy_MMIO_MACCTL, status);
-	mmiowb();
 }
 
 static void b43legacy_time_unlock(struct b43legacy_wldev *dev)
@@ -494,10 +486,8 @@ static void b43legacy_tsf_write_locked(struct b43legacy_wldev *dev, u64 tsf)
 		u32 hi = (tsf & 0xFFFFFFFF00000000ULL) >> 32;
 
 		b43legacy_write32(dev, B43legacy_MMIO_REV3PLUS_TSF_LOW, 0);
-		mmiowb();
 		b43legacy_write32(dev, B43legacy_MMIO_REV3PLUS_TSF_HIGH,
 				    hi);
-		mmiowb();
 		b43legacy_write32(dev, B43legacy_MMIO_REV3PLUS_TSF_LOW,
 				    lo);
 	} else {
@@ -507,13 +497,9 @@ static void b43legacy_tsf_write_locked(struct b43legacy_wldev *dev, u64 tsf)
 		u16 v3 = (tsf & 0xFFFF000000000000ULL) >> 48;
 
 		b43legacy_write16(dev, B43legacy_MMIO_TSF_0, 0);
-		mmiowb();
 		b43legacy_write16(dev, B43legacy_MMIO_TSF_3, v3);
-		mmiowb();
 		b43legacy_write16(dev, B43legacy_MMIO_TSF_2, v2);
-		mmiowb();
 		b43legacy_write16(dev, B43legacy_MMIO_TSF_1, v1);
-		mmiowb();
 		b43legacy_write16(dev, B43legacy_MMIO_TSF_0, v0);
 	}
 }
@@ -1250,7 +1236,6 @@ static void b43legacy_beacon_update_trigger_work(struct work_struct *work)
 		/* The handler might have updated the IRQ mask. */
 		b43legacy_write32(dev, B43legacy_MMIO_GEN_IRQ_MASK,
 				  dev->irq_mask);
-		mmiowb();
 		spin_unlock_irq(&wl->irq_lock);
 	}
 	mutex_unlock(&wl->mutex);
@@ -1346,7 +1331,6 @@ static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev)
 			       dma_reason[2], dma_reason[3],
 			       dma_reason[4], dma_reason[5]);
 			b43legacy_controller_restart(dev, "DMA error");
-			mmiowb();
 			spin_unlock_irqrestore(&dev->wl->irq_lock, flags);
 			return;
 		}
@@ -1396,7 +1380,6 @@ static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev)
 		handle_irq_transmit_status(dev);
 
 	b43legacy_write32(dev, B43legacy_MMIO_GEN_IRQ_MASK, dev->irq_mask);
-	mmiowb();
 	spin_unlock_irqrestore(&dev->wl->irq_lock, flags);
 }
 
@@ -1488,7 +1471,6 @@ static irqreturn_t b43legacy_interrupt_handler(int irq, void *dev_id)
 	dev->irq_reason = reason;
 	tasklet_schedule(&dev->isr_tasklet);
 out:
-	mmiowb();
 	spin_unlock(&dev->wl->irq_lock);
 
 	return ret;
@@ -2781,7 +2763,6 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw,
 
 	spin_lock_irqsave(&wl->irq_lock, flags);
 	b43legacy_write32(dev, B43legacy_MMIO_GEN_IRQ_MASK, dev->irq_mask);
-	mmiowb();
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
 out_unlock_mutex:
 	mutex_unlock(&wl->mutex);
@@ -2900,7 +2881,6 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw,
 	spin_lock_irqsave(&wl->irq_lock, flags);
 	b43legacy_write32(dev, B43legacy_MMIO_GEN_IRQ_MASK, dev->irq_mask);
 	/* XXX: why? */
-	mmiowb();
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
  out_unlock_mutex:
 	mutex_unlock(&wl->mutex);
diff --git a/drivers/net/wireless/broadcom/b43legacy/phy.c b/drivers/net/wireless/broadcom/b43legacy/phy.c
index 995c7d0c212ae..f949766d27ca1 100644
--- a/drivers/net/wireless/broadcom/b43legacy/phy.c
+++ b/drivers/net/wireless/broadcom/b43legacy/phy.c
@@ -134,7 +134,6 @@ u16 b43legacy_phy_read(struct b43legacy_wldev *dev, u16 offset)
 void b43legacy_phy_write(struct b43legacy_wldev *dev, u16 offset, u16 val)
 {
 	b43legacy_write16(dev, B43legacy_MMIO_PHY_CONTROL, offset);
-	mmiowb();
 	b43legacy_write16(dev, B43legacy_MMIO_PHY_DATA, val);
 }
 
diff --git a/drivers/net/wireless/broadcom/b43legacy/pio.h b/drivers/net/wireless/broadcom/b43legacy/pio.h
index 1cd1b9ca5e9c4..08cd02282beb4 100644
--- a/drivers/net/wireless/broadcom/b43legacy/pio.h
+++ b/drivers/net/wireless/broadcom/b43legacy/pio.h
@@ -92,7 +92,6 @@ void b43legacy_pio_write(struct b43legacy_pioqueue *queue,
 		       u16 offset, u16 value)
 {
 	b43legacy_write16(queue->dev, queue->mmio_base + offset, value);
-	mmiowb();
 }
 
 
diff --git a/drivers/net/wireless/broadcom/b43legacy/radio.c b/drivers/net/wireless/broadcom/b43legacy/radio.c
index eab1c93878468..c6db444ea07ef 100644
--- a/drivers/net/wireless/broadcom/b43legacy/radio.c
+++ b/drivers/net/wireless/broadcom/b43legacy/radio.c
@@ -95,7 +95,6 @@ void b43legacy_radio_lock(struct b43legacy_wldev *dev)
 	B43legacy_WARN_ON(status & B43legacy_MACCTL_RADIOLOCK);
 	status |= B43legacy_MACCTL_RADIOLOCK;
 	b43legacy_write32(dev, B43legacy_MMIO_MACCTL, status);
-	mmiowb();
 	udelay(10);
 }
 
@@ -108,7 +107,6 @@ void b43legacy_radio_unlock(struct b43legacy_wldev *dev)
 	B43legacy_WARN_ON(!(status & B43legacy_MACCTL_RADIOLOCK));
 	status &= ~B43legacy_MACCTL_RADIOLOCK;
 	b43legacy_write32(dev, B43legacy_MMIO_MACCTL, status);
-	mmiowb();
 }
 
 u16 b43legacy_radio_read16(struct b43legacy_wldev *dev, u16 offset)
@@ -141,7 +139,6 @@ u16 b43legacy_radio_read16(struct b43legacy_wldev *dev, u16 offset)
 void b43legacy_radio_write16(struct b43legacy_wldev *dev, u16 offset, u16 val)
 {
 	b43legacy_write16(dev, B43legacy_MMIO_RADIO_CONTROL, offset);
-	mmiowb();
 	b43legacy_write16(dev, B43legacy_MMIO_RADIO_DATA_LOW, val);
 }
 
@@ -333,7 +330,6 @@ u8 b43legacy_radio_aci_scan(struct b43legacy_wldev *dev)
 void b43legacy_nrssi_hw_write(struct b43legacy_wldev *dev, u16 offset, s16 val)
 {
 	b43legacy_phy_write(dev, B43legacy_PHY_NRSSILT_CTRL, offset);
-	mmiowb();
 	b43legacy_phy_write(dev, B43legacy_PHY_NRSSILT_DATA, (u16)val);
 }
 
diff --git a/drivers/net/wireless/broadcom/b43legacy/sysfs.c b/drivers/net/wireless/broadcom/b43legacy/sysfs.c
index 2a1da15c913b0..2db83eec7a110 100644
--- a/drivers/net/wireless/broadcom/b43legacy/sysfs.c
+++ b/drivers/net/wireless/broadcom/b43legacy/sysfs.c
@@ -143,7 +143,6 @@ static ssize_t b43legacy_attr_interfmode_store(struct device *dev,
 	if (err)
 		b43legacyerr(wldev->wl, "Interference Mitigation not "
 		       "supported by device\n");
-	mmiowb();
 	spin_unlock_irqrestore(&wldev->wl->irq_lock, flags);
 	mutex_unlock(&wldev->wl->mutex);
 
diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h
index b079c64ca0147..986646af8dfd4 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.h
+++ b/drivers/net/wireless/intel/iwlegacy/common.h
@@ -2030,13 +2030,6 @@ static inline void
 _il_release_nic_access(struct il_priv *il)
 {
 	_il_clear_bit(il, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
-	/*
-	 * In above we are reading CSR_GP_CNTRL register, what will flush any
-	 * previous writes, but still want write, which clear MAC_ACCESS_REQ
-	 * bit, be performed on PCI bus before any other writes scheduled on
-	 * different CPUs (after we drop reg_lock).
-	 */
-	mmiowb();
 }
 
 static inline u32
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index fe8269d023def..abbfc9cc80fc3 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -2067,7 +2067,6 @@ static void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans,
 	 * MAC_ACCESS_REQ bit to be performed before any other writes
 	 * scheduled on different CPUs (after we drop reg_lock).
 	 */
-	mmiowb();
 out:
 	spin_unlock_irqrestore(&trans_pcie->reg_lock, *flags);
 }
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index 1dede87dd54fa..dcf2346805350 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -358,8 +358,6 @@ static void idt_sw_write(struct idt_ntb_dev *ndev,
 	iowrite32((u32)reg, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASAADDR);
 	/* Put the new value of the register */
 	iowrite32(data, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASADATA);
-	/* Make sure the PCIe transactions are executed */
-	mmiowb();
 	/* Unlock GASA registers operations */
 	spin_unlock_irqrestore(&ndev->gasa_lock, irqflags);
 }
@@ -750,7 +748,6 @@ static void idt_ntb_local_link_enable(struct idt_ntb_dev *ndev)
 	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
 	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
 	idt_nt_write(ndev, IDT_NT_NTMTBLDATA, mtbldata);
-	mmiowb();
 	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
 
 	/* Notify the peers by setting and clearing the global signal bit */
@@ -778,7 +775,6 @@ static void idt_ntb_local_link_disable(struct idt_ntb_dev *ndev)
 	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
 	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
 	idt_nt_write(ndev, IDT_NT_NTMTBLDATA, 0);
-	mmiowb();
 	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
 
 	/* Notify the peers by setting and clearing the global signal bit */
@@ -1339,7 +1335,6 @@ static int idt_ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
 		idt_nt_write(ndev, IDT_NT_LUTLDATA, (u32)addr);
 		idt_nt_write(ndev, IDT_NT_LUTMDATA, (u32)(addr >> 32));
 		idt_nt_write(ndev, IDT_NT_LUTUDATA, data);
-		mmiowb();
 		spin_unlock_irqrestore(&ndev->lut_lock, irqflags);
 		/* Limit address isn't specified since size is fixed for LUT */
 	}
@@ -1393,7 +1388,6 @@ static int idt_ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx,
 		idt_nt_write(ndev, IDT_NT_LUTLDATA, 0);
 		idt_nt_write(ndev, IDT_NT_LUTMDATA, 0);
 		idt_nt_write(ndev, IDT_NT_LUTUDATA, 0);
-		mmiowb();
 		spin_unlock_irqrestore(&ndev->lut_lock, irqflags);
 	}
 
@@ -1812,7 +1806,6 @@ static int idt_ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
 	/* Set the route and send the data */
 	idt_sw_write(ndev, partdata_tbl[ndev->part].msgctl[midx], swpmsgctl);
 	idt_nt_write(ndev, ntdata_tbl.msgs[midx].out, msg);
-	mmiowb();
 	/* Unlock the messages routing table */
 	spin_unlock_irqrestore(&ndev->msg_locks[midx], irqflags);
 
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 2a9d6b0d1f193..11a6cd3740049 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -284,11 +284,9 @@ static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
 		ntb_peer_spad_write(perf->ntb, peer->pidx,
 				    PERF_SPAD_HDATA(perf->gidx),
 				    upper_32_bits(data));
-		mmiowb();
 		ntb_peer_spad_write(perf->ntb, peer->pidx,
 				    PERF_SPAD_CMD(perf->gidx),
 				    cmd);
-		mmiowb();
 		ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx));
 
 		dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n",
@@ -379,7 +377,6 @@ static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
 
 		ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA,
 				   upper_32_bits(data));
-		mmiowb();
 
 		/* This call shall trigger peer message event */
 		ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd);
diff --git a/drivers/scsi/bfa/bfa.h b/drivers/scsi/bfa/bfa.h
index 0e119d838e1b6..762cb77253b97 100644
--- a/drivers/scsi/bfa/bfa.h
+++ b/drivers/scsi/bfa/bfa.h
@@ -62,8 +62,7 @@ void bfa_isr_unhandled(struct bfa_s *bfa, struct bfi_msg_s *m);
 			((__bfa)->iocfc.cfg.drvcfg.num_reqq_elems - 1); \
 		writel((__bfa)->iocfc.req_cq_pi[__reqq],		\
 			(__bfa)->iocfc.bfa_regs.cpe_q_pi[__reqq]);	\
-		mmiowb();      \
-	} while (0)
+		} while (0)
 
 #define bfa_rspq_pi(__bfa, __rspq)					\
 	(*(u32 *)((__bfa)->iocfc.rsp_cq_shadow_pi[__rspq].kva))
diff --git a/drivers/scsi/bfa/bfa_hw_cb.c b/drivers/scsi/bfa/bfa_hw_cb.c
index c4a0c0eb88a57..4a0d881b26020 100644
--- a/drivers/scsi/bfa/bfa_hw_cb.c
+++ b/drivers/scsi/bfa/bfa_hw_cb.c
@@ -61,7 +61,6 @@ bfa_hwcb_rspq_ack_msix(struct bfa_s *bfa, int rspq, u32 ci)
 
 	bfa_rspq_ci(bfa, rspq) = ci;
 	writel(ci, bfa->iocfc.bfa_regs.rme_q_ci[rspq]);
-	mmiowb();
 }
 
 void
@@ -72,7 +71,6 @@ bfa_hwcb_rspq_ack(struct bfa_s *bfa, int rspq, u32 ci)
 
 	bfa_rspq_ci(bfa, rspq) = ci;
 	writel(ci, bfa->iocfc.bfa_regs.rme_q_ci[rspq]);
-	mmiowb();
 }
 
 void
diff --git a/drivers/scsi/bfa/bfa_hw_ct.c b/drivers/scsi/bfa/bfa_hw_ct.c
index b0ff378dece2c..b7be5f4f02a5e 100644
--- a/drivers/scsi/bfa/bfa_hw_ct.c
+++ b/drivers/scsi/bfa/bfa_hw_ct.c
@@ -81,7 +81,6 @@ bfa_hwct_rspq_ack(struct bfa_s *bfa, int rspq, u32 ci)
 
 	bfa_rspq_ci(bfa, rspq) = ci;
 	writel(ci, bfa->iocfc.bfa_regs.rme_q_ci[rspq]);
-	mmiowb();
 }
 
 /*
@@ -94,7 +93,6 @@ bfa_hwct2_rspq_ack(struct bfa_s *bfa, int rspq, u32 ci)
 {
 	bfa_rspq_ci(bfa, rspq) = ci;
 	writel(ci, bfa->iocfc.bfa_regs.rme_q_ci[rspq]);
-	mmiowb();
 }
 
 void
diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
index 039328d9ef136..19734ec7f42e2 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
@@ -991,7 +991,6 @@ void bnx2fc_arm_cq(struct bnx2fc_rport *tgt)
 			FCOE_CQE_TOGGLE_BIT_SHIFT);
 	msg = *((u32 *)rx_db);
 	writel(cpu_to_le32(msg), tgt->ctx_base);
-	mmiowb();
 
 }
 
@@ -1409,7 +1408,6 @@ void bnx2fc_ring_doorbell(struct bnx2fc_rport *tgt)
 				(tgt->sq_curr_toggle_bit << 15);
 	msg = *((u32 *)sq_db);
 	writel(cpu_to_le32(msg), tgt->ctx_base);
-	mmiowb();
 
 }
 
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index d56a78f411cd3..12666313b9379 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -253,7 +253,6 @@ void bnx2i_put_rq_buf(struct bnx2i_conn *bnx2i_conn, int count)
 		writew(ep->qp.rq_prod_idx,
 		       ep->qp.ctx_base + CNIC_RECV_DOORBELL);
 	}
-	mmiowb();
 }
 
 
@@ -279,8 +278,6 @@ static void bnx2i_ring_sq_dbell(struct bnx2i_conn *bnx2i_conn, int count)
 		bnx2i_ring_577xx_doorbell(bnx2i_conn);
 	} else
 		writew(count, ep->qp.ctx_base + CNIC_SEND_DOORBELL);
-
-	mmiowb();
 }
 
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 293f5cf524d7a..59a6546fd602a 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -815,7 +815,6 @@ megasas_fire_cmd_skinny(struct megasas_instance *instance,
 	       &(regs)->inbound_high_queue_port);
 	writel((lower_32_bits(frame_phys_addr) | (frame_count<<1))|1,
 	       &(regs)->inbound_low_queue_port);
-	mmiowb();
 	spin_unlock_irqrestore(&instance->hba_lock, flags);
 }
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 1d17128030cdd..e35c2b64c1454 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -242,7 +242,6 @@ megasas_fire_cmd_fusion(struct megasas_instance *instance,
 		&instance->reg_set->inbound_low_queue_port);
 	writel(le32_to_cpu(req_desc->u.high),
 		&instance->reg_set->inbound_high_queue_port);
-	mmiowb();
 	spin_unlock_irqrestore(&instance->hba_lock, flags);
 #endif
 }
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 1d8c584ec1e91..f60b9e0a6ca61 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -3333,7 +3333,6 @@ _base_mpi_ep_writeq(__u64 b, volatile void __iomem *addr,
 	spin_lock_irqsave(writeq_lock, flags);
 	__raw_writel((u32)(b), addr);
 	__raw_writel((u32)(b >> 32), (addr + 4));
-	mmiowb();
 	spin_unlock_irqrestore(writeq_lock, flags);
 }
 
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index 6ca583bdde23c..53e8221f68165 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -807,7 +807,6 @@ void qedf_ring_doorbell(struct qedf_rport *fcport)
 	writel(*(u32 *)&dbell, fcport->p_doorbell);
 	/* Make sure SQ index is updated so f/w prcesses requests in order */
 	wmb();
-	mmiowb();
 }
 
 static void qedf_trace_io(struct qedf_rport *fcport, struct qedf_ioreq *io_req,
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index e2a995a6e8e73..f8f86774f77fd 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -985,7 +985,6 @@ static void qedi_ring_doorbell(struct qedi_conn *qedi_conn)
 	 * others they are two different assembly operations.
 	 */
 	wmb();
-	mmiowb();
 	QEDI_INFO(&qedi_conn->qedi->dbg_ctx, QEDI_LOG_MP_REQ,
 		  "prod_idx=0x%x, fw_prod_idx=0x%x, cid=0x%x\n",
 		  qedi_conn->ep->sq_prod_idx, qedi_conn->ep->fw_sq_prod_idx,
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 6856dfdfa4738..93acbc5094f05 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -3004,8 +3004,6 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 	sp->flags |= SRB_SENT;
 	ha->actthreads++;
 	WRT_REG_WORD(&reg->mailbox4, ha->req_ring_index);
-	/* Enforce mmio write ordering; see comment in qla1280_isp_cmd(). */
-	mmiowb();
 
  out:
 	if (status)
@@ -3254,8 +3252,6 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 	sp->flags |= SRB_SENT;
 	ha->actthreads++;
 	WRT_REG_WORD(&reg->mailbox4, ha->req_ring_index);
-	/* Enforce mmio write ordering; see comment in qla1280_isp_cmd(). */
-	mmiowb();
 
 out:
 	if (status)
@@ -3379,7 +3375,6 @@ qla1280_isp_cmd(struct scsi_qla_host *ha)
 	 * See Documentation/driver-api/device-io.rst for more information.
 	 */
 	WRT_REG_WORD(&reg->mailbox4, ha->req_ring_index);
-	mmiowb();
 
 	LEAVE("qla1280_isp_cmd");
 }
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 84807a9b4b13a..da2d2ab8104d3 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -305,7 +305,6 @@ static int sprom_do_write(struct ssb_bus *bus, const u16 *sprom)
 		else if (i % 2)
 			pr_cont(".");
 		writew(sprom[i], bus->mmio + bus->sprom_offset + (i * 2));
-		mmiowb();
 		msleep(20);
 	}
 	err = pci_read_config_dword(pdev, SSB_SPROMCTL, &spromctl);
diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c
index 567013f8a8be9..d7d730c245c56 100644
--- a/drivers/ssb/pcmcia.c
+++ b/drivers/ssb/pcmcia.c
@@ -338,7 +338,6 @@ static void ssb_pcmcia_write8(struct ssb_device *dev, u16 offset, u8 value)
 	err = select_core_and_segment(dev, &offset);
 	if (likely(!err))
 		writeb(value, bus->mmio + offset);
-	mmiowb();
 	spin_unlock_irqrestore(&bus->bar_lock, flags);
 }
 
@@ -352,7 +351,6 @@ static void ssb_pcmcia_write16(struct ssb_device *dev, u16 offset, u16 value)
 	err = select_core_and_segment(dev, &offset);
 	if (likely(!err))
 		writew(value, bus->mmio + offset);
-	mmiowb();
 	spin_unlock_irqrestore(&bus->bar_lock, flags);
 }
 
@@ -368,7 +366,6 @@ static void ssb_pcmcia_write32(struct ssb_device *dev, u16 offset, u32 value)
 		writew((value & 0x0000FFFF), bus->mmio + offset);
 		writew(((value & 0xFFFF0000) >> 16), bus->mmio + offset + 2);
 	}
-	mmiowb();
 	spin_unlock_irqrestore(&bus->bar_lock, flags);
 }
 
@@ -424,7 +421,6 @@ static void ssb_pcmcia_block_write(struct ssb_device *dev, const void *buffer,
 		WARN_ON(1);
 	}
 unlock:
-	mmiowb();
 	spin_unlock_irqrestore(&bus->bar_lock, flags);
 }
 #endif /* CONFIG_SSB_BLOCKIO */
diff --git a/drivers/staging/comedi/drivers/mite.c b/drivers/staging/comedi/drivers/mite.c
index 61e03ad841230..639ec15869760 100644
--- a/drivers/staging/comedi/drivers/mite.c
+++ b/drivers/staging/comedi/drivers/mite.c
@@ -371,7 +371,6 @@ static unsigned int mite_get_status(struct mite_channel *mite_chan)
 		writel(CHOR_CLRDONE,
 		       mite->mmio + MITE_CHOR(mite_chan->channel));
 	}
-	mmiowb();
 	spin_unlock_irqrestore(&mite->lock, flags);
 	return status;
 }
@@ -451,7 +450,6 @@ void mite_dma_arm(struct mite_channel *mite_chan)
 	mite_chan->done = 0;
 	/* arm */
 	writel(CHOR_START, mite->mmio + MITE_CHOR(mite_chan->channel));
-	mmiowb();
 	spin_unlock_irqrestore(&mite->lock, flags);
 }
 EXPORT_SYMBOL_GPL(mite_dma_arm);
@@ -638,7 +636,6 @@ void mite_release_channel(struct mite_channel *mite_chan)
 		       CHCR_CLR_LC_IE | CHCR_CLR_CONT_RB_IE,
 		       mite->mmio + MITE_CHCR(mite_chan->channel));
 		mite_chan->ring = NULL;
-		mmiowb();
 	}
 	spin_unlock_irqrestore(&mite->lock, flags);
 }
diff --git a/drivers/staging/comedi/drivers/ni_660x.c b/drivers/staging/comedi/drivers/ni_660x.c
index 405573e927cfc..4ee9b260eab0e 100644
--- a/drivers/staging/comedi/drivers/ni_660x.c
+++ b/drivers/staging/comedi/drivers/ni_660x.c
@@ -320,7 +320,6 @@ static inline void ni_660x_set_dma_channel(struct comedi_device *dev,
 	ni_660x_write(dev, chip, devpriv->dma_cfg[chip] |
 		      NI660X_DMA_CFG_RESET(mite_channel),
 		      NI660X_DMA_CFG);
-	mmiowb();
 }
 
 static inline void ni_660x_unset_dma_channel(struct comedi_device *dev,
@@ -333,7 +332,6 @@ static inline void ni_660x_unset_dma_channel(struct comedi_device *dev,
 	devpriv->dma_cfg[chip] &= ~NI660X_DMA_CFG_SEL_MASK(mite_channel);
 	devpriv->dma_cfg[chip] |= NI660X_DMA_CFG_SEL_NONE(mite_channel);
 	ni_660x_write(dev, chip, devpriv->dma_cfg[chip], NI660X_DMA_CFG);
-	mmiowb();
 }
 
 static int ni_660x_request_mite_channel(struct comedi_device *dev,
diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index b04dad8c70927..668f2aa16baaa 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -547,7 +547,6 @@ static inline void ni_set_bitfield(struct comedi_device *dev, int reg,
 			reg);
 		break;
 	}
-	mmiowb();
 	spin_unlock_irqrestore(&devpriv->soft_reg_copy_lock, flags);
 }
 
diff --git a/drivers/staging/comedi/drivers/ni_pcidio.c b/drivers/staging/comedi/drivers/ni_pcidio.c
index 4bdef87d5dd73..8f3864799c199 100644
--- a/drivers/staging/comedi/drivers/ni_pcidio.c
+++ b/drivers/staging/comedi/drivers/ni_pcidio.c
@@ -310,7 +310,6 @@ static int ni_pcidio_request_di_mite_channel(struct comedi_device *dev)
 	writeb(primary_DMAChannel_bits(devpriv->di_mite_chan->channel) |
 	       secondary_DMAChannel_bits(devpriv->di_mite_chan->channel),
 	       dev->mmio + DMA_LINE_CONTROL_GROUP1);
-	mmiowb();
 	spin_unlock_irqrestore(&devpriv->mite_channel_lock, flags);
 	return 0;
 }
@@ -327,7 +326,6 @@ static void ni_pcidio_release_di_mite_channel(struct comedi_device *dev)
 		writeb(primary_DMAChannel_bits(0) |
 		       secondary_DMAChannel_bits(0),
 		       dev->mmio + DMA_LINE_CONTROL_GROUP1);
-		mmiowb();
 	}
 	spin_unlock_irqrestore(&devpriv->mite_channel_lock, flags);
 }
diff --git a/drivers/staging/comedi/drivers/ni_tio.c b/drivers/staging/comedi/drivers/ni_tio.c
index 048cb35723ad1..c1131a1622c01 100644
--- a/drivers/staging/comedi/drivers/ni_tio.c
+++ b/drivers/staging/comedi/drivers/ni_tio.c
@@ -234,7 +234,6 @@ static void ni_tio_set_bits_transient(struct ni_gpct *counter,
 		regs[reg] &= ~mask;
 		regs[reg] |= (value & mask);
 		ni_tio_write(counter, regs[reg] | transient, reg);
-		mmiowb();
 		spin_unlock_irqrestore(&counter_dev->regs_lock, flags);
 	}
 }
diff --git a/drivers/staging/comedi/drivers/s626.c b/drivers/staging/comedi/drivers/s626.c
index f5af6f4069dc2..39049d3c56d7b 100644
--- a/drivers/staging/comedi/drivers/s626.c
+++ b/drivers/staging/comedi/drivers/s626.c
@@ -108,7 +108,6 @@ static void s626_mc_enable(struct comedi_device *dev,
 {
 	unsigned int val = (cmd << 16) | cmd;
 
-	mmiowb();
 	writel(val, dev->mmio + reg);
 }
 
@@ -116,7 +115,6 @@ static void s626_mc_disable(struct comedi_device *dev,
 			    unsigned int cmd, unsigned int reg)
 {
 	writel(cmd << 16, dev->mmio + reg);
-	mmiowb();
 }
 
 static bool s626_mc_test(struct comedi_device *dev,
diff --git a/drivers/tty/serial/men_z135_uart.c b/drivers/tty/serial/men_z135_uart.c
index ef89534dd760c..e5d3ebab6dae6 100644
--- a/drivers/tty/serial/men_z135_uart.c
+++ b/drivers/tty/serial/men_z135_uart.c
@@ -353,7 +353,6 @@ static void men_z135_handle_tx(struct men_z135_port *uart)
 
 	memcpy_toio(port->membase + MEN_Z135_TX_RAM, &xmit->buf[xmit->tail], n);
 	xmit->tail = (xmit->tail + n) & (UART_XMIT_SIZE - 1);
-	mmiowb();
 
 	iowrite32(n & 0x3ff, port->membase + MEN_Z135_TX_CTRL);
 
diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c
index 1b4008d022bfd..d22ccb32aa9b7 100644
--- a/drivers/tty/serial/serial_txx9.c
+++ b/drivers/tty/serial/serial_txx9.c
@@ -248,7 +248,6 @@ static void serial_txx9_initialize(struct uart_port *port)
 	sio_out(up, TXX9_SIFCR, TXX9_SIFCR_SWRST);
 	/* TX4925 BUG WORKAROUND.  Accessing SIOC register
 	 * immediately after soft reset causes bus error. */
-	mmiowb();
 	udelay(1);
 	while ((sio_in(up, TXX9_SIFCR) & TXX9_SIFCR_SWRST) && --tmout)
 		udelay(1);
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index c9cfb100ecdca..cac991173ac04 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -533,8 +533,6 @@ static int xdbc_handle_external_reset(void)
 
 	xdbc_mem_init();
 
-	mmiowb();
-
 	ret = xdbc_start();
 	if (ret < 0)
 		goto reset_out;
@@ -587,8 +585,6 @@ static int __init xdbc_early_setup(void)
 
 	xdbc_mem_init();
 
-	mmiowb();
-
 	ret = xdbc_start();
 	if (ret < 0) {
 		writel(0, &xdbc.xdbc_reg->control);
diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c
index d932cc31711e8..52e32644a4b2f 100644
--- a/drivers/usb/host/xhci-dbgcap.c
+++ b/drivers/usb/host/xhci-dbgcap.c
@@ -421,8 +421,6 @@ static int xhci_dbc_mem_init(struct xhci_hcd *xhci, gfp_t flags)
 	string_length = xhci_dbc_populate_strings(dbc->string);
 	xhci_dbc_init_contexts(xhci, string_length);
 
-	mmiowb();
-
 	xhci_dbc_eps_init(xhci);
 	dbc->state = DS_INITIALIZED;
 
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index f6165d304b4dd..48841e5dab90f 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -1338,7 +1338,6 @@ static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info)
 	}
 
 	/* Let SB update */
-	mmiowb();
 	return rc;
 }
 
@@ -1374,7 +1373,6 @@ static inline void qed_sb_ack(struct qed_sb_info *sb_info,
 	/* Both segments (interrupts & acks) are written to same place address;
 	 * Need to guarantee all commands will be received (in-order) by HW.
 	 */
-	mmiowb();
 	barrier();
 }
 
diff --git a/sound/soc/txx9/txx9aclc-ac97.c b/sound/soc/txx9/txx9aclc-ac97.c
index 1cfca698ae4b4..b0fa285c7ba29 100644
--- a/sound/soc/txx9/txx9aclc-ac97.c
+++ b/sound/soc/txx9/txx9aclc-ac97.c
@@ -102,7 +102,6 @@ static void txx9aclc_ac97_cold_reset(struct snd_ac97 *ac97)
 	u32 ready = ACINT_CODECRDY(ac97->num) | ACINT_REGACCRDY;
 
 	__raw_writel(ACCTL_ENLINK, base + ACCTLDIS);
-	mmiowb();
 	udelay(1);
 	__raw_writel(ACCTL_ENLINK, base + ACCTLEN);
 	/* wait for primary codec ready status */
-- 
GitLab


From 161861a4c8279f136e5419deb95c58176af83e48 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 17:18:02 +0000
Subject: [PATCH 0775/1861] scsi/qla1280: Remove stale comment about mmiowb()

All mmiowb() invocations have been removed, so there's no need to keep
banging on about it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/scsi/qla1280.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 93acbc5094f05..327eff67a1eee 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -3363,16 +3363,6 @@ qla1280_isp_cmd(struct scsi_qla_host *ha)
 
 	/*
 	 * Update request index to mailbox4 (Request Queue In).
-	 * The mmiowb() ensures that this write is ordered with writes by other
-	 * CPUs.  Without the mmiowb(), it is possible for the following:
-	 *    CPUA posts write of index 5 to mailbox4
-	 *    CPUA releases host lock
-	 *    CPUB acquires host lock
-	 *    CPUB posts write of index 6 to mailbox4
-	 *    On PCI bus, order reverses and write of 6 posts, then index 5,
-	 *       causing chip to issue full queue of stale commands
-	 * The mmiowb() prevents future writes from crossing the barrier.
-	 * See Documentation/driver-api/device-io.rst for more information.
 	 */
 	WRT_REG_WORD(&reg->mailbox4, ha->req_ring_index);
 
-- 
GitLab


From 1b8546d7e23872c77d0ef6a4c311ac7037380c24 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 17:19:22 +0000
Subject: [PATCH 0776/1861] i40iw: Redefine i40iw_mmiowb() to do nothing

mmiowb() is now implicit in spin_unlock(), so there's no reason to call
it from driver code. Redefine i40iw_mmiowb() to do nothing instead.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/infiniband/hw/i40iw/i40iw_osdep.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index f27be3e7830bb..d474aad62a81d 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -211,7 +211,7 @@ enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
 struct i40iw_sc_vsi;
 void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi);
 void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi);
-#define i40iw_mmiowb() mmiowb()
+#define i40iw_mmiowb() do { } while (0)
 void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
 u32  i40iw_rd32(struct i40iw_hw *hw, u32 reg);
 #endif				/* _I40IW_OSDEP_H_ */
-- 
GitLab


From 96670b2fd0254a2c51cc2bb4b4f3057a6b6f5d29 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 17:21:02 +0000
Subject: [PATCH 0777/1861] net/ethernet/silan/sc92031: Remove stale comment
 about mmiowb()

mmiowb() is no more. It has ceased to be. It is an ex-barrier. So remove
all references to it from comments.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/net/ethernet/silan/sc92031.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index db5dc8ce0aff8..02b3962b0e63e 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c
@@ -251,7 +251,6 @@ enum PMConfigBits {
  * use of mdelay() at _sc92031_reset.
  * Functions prefixed with _sc92031_ must be called with the lock held;
  * functions prefixed with sc92031_ must be called without the lock held.
- * Use mmiowb() before unlocking if the hardware was written to.
  */
 
 /* Locking rules for the interrupt:
-- 
GitLab


From 01e3b958efe85a26d9b1b77be3a0a1491bb4cb36 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 17:25:29 +0000
Subject: [PATCH 0778/1861] arch: Remove dummy mmiowb() definitions from arch
 code

Now that no driver code is using mmiowb() directly, remove the dummy
definitions remaining in architectures that don't make use of
asm-generic/io.h, as well as the definition in asm-generic/io.h itself.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/alpha/include/asm/io.h       | 2 --
 arch/hexagon/include/asm/io.h     | 2 --
 arch/parisc/include/asm/io.h      | 2 --
 arch/powerpc/include/asm/mmiowb.h | 2 --
 arch/sparc/include/asm/io_64.h    | 2 --
 include/asm-generic/io.h          | 4 ----
 6 files changed, 14 deletions(-)

diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index 4c533fc94d62f..ccf9d65166bb6 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -513,8 +513,6 @@ extern inline void writeq(u64 b, volatile void __iomem *addr)
 #define writel_relaxed(b, addr)	__raw_writel(b, addr)
 #define writeq_relaxed(b, addr)	__raw_writeq(b, addr)
 
-#define mmiowb()
-
 /*
  * String version of IO memory access ops:
  */
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index e17262ad125e7..3d0ae09c2b8e1 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -184,8 +184,6 @@ static inline void writel(u32 data, volatile void __iomem *addr)
 #define writew_relaxed __raw_writew
 #define writel_relaxed __raw_writel
 
-#define mmiowb()
-
 /*
  * Need an mtype somewhere in here, for cache type deals?
  * This is probably too long for an inline.
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 30a8315d5c075..93d37010b375c 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -229,8 +229,6 @@ static inline void writeq(unsigned long long q, volatile void __iomem *addr)
 #define writel_relaxed(l, addr)	writel(l, addr)
 #define writeq_relaxed(q, addr)	writeq(q, addr)
 
-#define mmiowb() do { } while (0)
-
 void memset_io(volatile void __iomem *addr, unsigned char val, int count);
 void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
 void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
diff --git a/arch/powerpc/include/asm/mmiowb.h b/arch/powerpc/include/asm/mmiowb.h
index b10180613507c..74a00127eb204 100644
--- a/arch/powerpc/include/asm/mmiowb.h
+++ b/arch/powerpc/include/asm/mmiowb.h
@@ -11,8 +11,6 @@
 #define arch_mmiowb_state()	(&local_paca->mmiowb_state)
 #define mmiowb()		mb()
 
-#else
-#define mmiowb()		do { } while (0)
 #endif /* CONFIG_MMIOWB */
 
 #include <asm-generic/mmiowb.h>
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index b162c23ae8c23..688911051b446 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -396,8 +396,6 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
 	}
 }
 
-#define mmiowb()
-
 #ifdef __KERNEL__
 
 /* On sparc64 we have the whole physical IO address space accessible
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index bc490a7466021..8f3bf95a36d19 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -22,10 +22,6 @@
 #include <asm/mmiowb.h>
 #include <asm-generic/pci_iomap.h>
 
-#ifndef mmiowb
-#define mmiowb() do {} while (0)
-#endif
-
 #ifndef __io_br
 #define __io_br()      barrier()
 #endif
-- 
GitLab


From a58d7525b8014115d57fd30186a84f6d30783f2c Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 12 Mar 2019 10:51:40 +0100
Subject: [PATCH 0779/1861] cfg80211: add ratelimited variants of err and warn

wiphy_{err,warn}_ratelimited will be used by rt2x00

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bb307a11ee636..13bfeb712d369 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7183,6 +7183,11 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
 #define wiphy_info(wiphy, format, args...)			\
 	dev_info(&(wiphy)->dev, format, ##args)
 
+#define wiphy_err_ratelimited(wiphy, format, args...)		\
+	dev_err_ratelimited(&(wiphy)->dev, format, ##args)
+#define wiphy_warn_ratelimited(wiphy, format, args...)		\
+	dev_warn_ratelimited(&(wiphy)->dev, format, ##args)
+
 #define wiphy_debug(wiphy, format, args...)			\
 	wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
 
-- 
GitLab


From 45fcef8b727b6f171bc5443e8153181a367d7a15 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 29 Mar 2019 08:56:22 +0100
Subject: [PATCH 0780/1861] mac80211_hwsim: calculate
 if_combination.max_interfaces

If we just set this to 2048, and have multiple limits you
can select from, the total number might run over and cause
a warning in cfg80211. This doesn't make sense, so we just
calculate the total max_interfaces now.

Reported-by: syzbot+8f91bd563bbff230d0ee@syzkaller.appspotmail.com
Fixes: 99e3a44bac37 ("mac80211_hwsim: allow setting iftype support")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 0838af04d681a..524eb58059957 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2644,7 +2644,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	enum nl80211_band band;
 	const struct ieee80211_ops *ops = &mac80211_hwsim_ops;
 	struct net *net;
-	int idx;
+	int idx, i;
 	int n_limits = 0;
 
 	if (WARN_ON(param->channels > 1 && !param->use_chanctx))
@@ -2768,12 +2768,23 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 		goto failed_hw;
 	}
 
+	data->if_combination.max_interfaces = 0;
+	for (i = 0; i < n_limits; i++)
+		data->if_combination.max_interfaces +=
+			data->if_limits[i].max;
+
 	data->if_combination.n_limits = n_limits;
-	data->if_combination.max_interfaces = 2048;
 	data->if_combination.limits = data->if_limits;
 
-	hw->wiphy->iface_combinations = &data->if_combination;
-	hw->wiphy->n_iface_combinations = 1;
+	/*
+	 * If we actually were asked to support combinations,
+	 * advertise them - if there's only a single thing like
+	 * only IBSS then don't advertise it as combinations.
+	 */
+	if (data->if_combination.max_interfaces > 1) {
+		hw->wiphy->iface_combinations = &data->if_combination;
+		hw->wiphy->n_iface_combinations = 1;
+	}
 
 	if (param->ciphers) {
 		memcpy(data->ciphers, param->ciphers,
-- 
GitLab


From 2b4a66980217332d91ab1785e1750857d6d52bc8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 18 Mar 2019 12:00:58 +0100
Subject: [PATCH 0781/1861] mac80211: make ieee80211_schedule_txq schedule
 empty TXQs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently there is no way for the driver to signal to mac80211 that it should
schedule a TXQ even if there are no packets on the mac80211 part of that queue.
This is problematic if the driver has an internal retry queue to deal with
software A-MPDU retry.

This patch changes the behavior of ieee80211_schedule_txq to always schedule
the queue, as its only user (ath9k) seems to expect such behavior already:
it calls this function on tx status and on powersave wakeup whenever its
internal retry queue is not empty.

Also add an extra argument to ieee80211_return_txq to get the same behavior.

This fixes an issue on ath9k where tx queues with packets to retry (and no
new packets in mac80211) would not get serviced.

Fixes: 89cea7493a346 ("ath9k: Switch to mac80211 TXQ scheduling and airtime APIs")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c |  2 +-
 drivers/net/wireless/ath/ath10k/mac.c    |  4 ++--
 drivers/net/wireless/ath/ath9k/xmit.c    |  5 ++++-
 include/net/mac80211.h                   | 24 ++++++++++++++++++++----
 net/mac80211/tx.c                        | 10 ++++++----
 5 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index a20ea270d519b..1acc622d21833 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2728,7 +2728,7 @@ static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb)
 			num_msdus++;
 			num_bytes += ret;
 		}
-		ieee80211_return_txq(hw, txq);
+		ieee80211_return_txq(hw, txq, false);
 		ieee80211_txq_schedule_end(hw, txq->ac);
 
 		record->num_msdus = cpu_to_le16(num_msdus);
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index b73c23d4ce86d..41e89db244d20 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4089,7 +4089,7 @@ static int ath10k_mac_schedule_txq(struct ieee80211_hw *hw, u32 ac)
 			if (ret < 0)
 				break;
 		}
-		ieee80211_return_txq(hw, txq);
+		ieee80211_return_txq(hw, txq, false);
 		ath10k_htt_tx_txq_update(hw, txq);
 		if (ret == -EBUSY)
 			break;
@@ -4374,7 +4374,7 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw,
 		if (ret < 0)
 			break;
 	}
-	ieee80211_return_txq(hw, txq);
+	ieee80211_return_txq(hw, txq, false);
 	ath10k_htt_tx_txq_update(hw, txq);
 out:
 	ieee80211_txq_schedule_end(hw, ac);
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 773d428ff1b03..b17e1ca40995e 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -1938,12 +1938,15 @@ void ath_txq_schedule(struct ath_softc *sc, struct ath_txq *txq)
 		goto out;
 
 	while ((queue = ieee80211_next_txq(hw, txq->mac80211_qnum))) {
+		bool force;
+
 		tid = (struct ath_atx_tid *)queue->drv_priv;
 
 		ret = ath_tx_sched_aggr(sc, txq, tid);
 		ath_dbg(common, QUEUE, "ath_tx_sched_aggr returned %d\n", ret);
 
-		ieee80211_return_txq(hw, queue);
+		force = !skb_queue_empty(&tid->retry_q);
+		ieee80211_return_txq(hw, queue, force);
 	}
 
 out:
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 616998252dc7e..112dc18c658f1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6253,26 +6253,42 @@ static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 {
 }
 
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq, bool force);
+
 /**
  * ieee80211_schedule_txq - schedule a TXQ for transmission
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @txq: pointer obtained from station or virtual interface
  *
- * Schedules a TXQ for transmission if it is not already scheduled.
+ * Schedules a TXQ for transmission if it is not already scheduled,
+ * even if mac80211 does not have any packets buffered.
+ *
+ * The driver may call this function if it has buffered packets for
+ * this TXQ internally.
  */
-void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
+static inline void
+ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+{
+	__ieee80211_schedule_txq(hw, txq, true);
+}
 
 /**
  * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
  * @txq: pointer obtained from station or virtual interface
+ * @force: schedule txq even if mac80211 does not have any buffered packets.
+ *
+ * The driver may set force=true if it has buffered packets for this TXQ
+ * internally.
  */
 static inline void
-ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
+		     bool force)
 {
-	ieee80211_schedule_txq(hw, txq);
+	__ieee80211_schedule_txq(hw, txq, force);
 }
 
 /**
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 134a3da147c6d..2e816dd67be72 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3688,8 +3688,9 @@ out:
 }
 EXPORT_SYMBOL(ieee80211_next_txq);
 
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
-			    struct ieee80211_txq *txq)
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq,
+			      bool force)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct txq_info *txqi = to_txq_info(txq);
@@ -3697,7 +3698,8 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 	spin_lock_bh(&local->active_txq_lock[txq->ac]);
 
 	if (list_empty(&txqi->schedule_order) &&
-	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
+	    (force || !skb_queue_empty(&txqi->frags) ||
+	     txqi->tin.backlog_packets)) {
 		/* If airtime accounting is active, always enqueue STAs at the
 		 * head of the list to ensure that they only get moved to the
 		 * back by the airtime DRR scheduler once they have a negative
@@ -3717,7 +3719,7 @@ void ieee80211_schedule_txq(struct ieee80211_hw *hw,
 
 	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
 }
-EXPORT_SYMBOL(ieee80211_schedule_txq);
+EXPORT_SYMBOL(__ieee80211_schedule_txq);
 
 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
-- 
GitLab


From 7100e8704b61247649c50551b965e71d168df30b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 29 Mar 2019 17:42:57 +1000
Subject: [PATCH 0782/1861] powerpc/64s/radix: Fix radix segment exception
 handling

Commit 48e7b76957 ("powerpc/64s/hash: Convert SLB miss handlers to C")
broke the radix-mode segment exception handler. In radix mode, this is
exception is not an SLB miss, rather it signals that the EA is outside
the range translated by any page table.

The commit lost the radix feature alternate code patch, which can
cause faults to some EAs to kernel BUG at arch/powerpc/mm/slb.c:639!

The original radix code would send faults to slb_miss_large_addr,
which would end up faulting due to slb_addr_limit being 0. This patch
sends radix directly to do_bad_slb_fault, which is a bit clearer.

Fixes: 48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C")
Cc: stable@vger.kernel.org # v4.20+
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index a5b8fbae56a03..9481a117e2425 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -656,11 +656,17 @@ EXC_COMMON_BEGIN(data_access_slb_common)
 	ld	r4,PACA_EXSLB+EX_DAR(r13)
 	std	r4,_DAR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+	/* HPT case, do SLB fault */
 	bl	do_slb_fault
 	cmpdi	r3,0
 	bne-	1f
 	b	fast_exception_return
 1:	/* Error case */
+MMU_FTR_SECTION_ELSE
+	/* Radix case, access is outside page table range */
+	li	r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	std	r3,RESULT(r1)
 	bl	save_nvgprs
 	RECONCILE_IRQ_STATE(r10, r11)
@@ -705,11 +711,17 @@ EXC_COMMON_BEGIN(instruction_access_slb_common)
 	EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
 	ld	r4,_NIP(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+	/* HPT case, do SLB fault */
 	bl	do_slb_fault
 	cmpdi	r3,0
 	bne-	1f
 	b	fast_exception_return
 1:	/* Error case */
+MMU_FTR_SECTION_ELSE
+	/* Radix case, access is outside page table range */
+	li	r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	std	r3,RESULT(r1)
 	bl	save_nvgprs
 	RECONCILE_IRQ_STATE(r10, r11)
-- 
GitLab


From cae30527901d9590db0e12ace994c1d58bea87fd Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Mon, 8 Apr 2019 15:58:11 +0800
Subject: [PATCH 0783/1861] ALSA: hda - Add two more machines to the
 power_save_blacklist

Recently we set CONFIG_SND_HDA_POWER_SAVE_DEFAULT to 1 when
configuring the kernel, then two machines were reported to have noise
after installing the new kernel. Put them in the blacklist, the
noise disappears.

https://bugs.launchpad.net/bugs/1821663
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index ece256a3b48f3..2ec91085fa3e7 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2142,6 +2142,8 @@ static struct snd_pci_quirk power_save_blacklist[] = {
 	SND_PCI_QUIRK(0x8086, 0x2040, "Intel DZ77BH-55K", 0),
 	/* https://bugzilla.kernel.org/show_bug.cgi?id=199607 */
 	SND_PCI_QUIRK(0x8086, 0x2057, "Intel NUC5i7RYB", 0),
+	/* https://bugs.launchpad.net/bugs/1821663 */
+	SND_PCI_QUIRK(0x8086, 0x2064, "Intel SDP 8086:2064", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1520902 */
 	SND_PCI_QUIRK(0x8086, 0x2068, "Intel NUC7i3BNB", 0),
 	/* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
@@ -2150,6 +2152,8 @@ static struct snd_pci_quirk power_save_blacklist[] = {
 	SND_PCI_QUIRK(0x17aa, 0x367b, "Lenovo IdeaCentre B550", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */
 	SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0),
+	/* https://bugs.launchpad.net/bugs/1821663 */
+	SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0),
 	{}
 };
 #endif /* CONFIG_PM */
-- 
GitLab


From 6a8aae68c87349dbbcd46eac380bc43cdb98a13b Mon Sep 17 00:00:00 2001
From: Longpeng <longpeng2@huawei.com>
Date: Sat, 9 Mar 2019 15:17:40 +0800
Subject: [PATCH 0784/1861] virtio_pci: fix a NULL pointer reference in
 vp_del_vqs

If the msix_affinity_masks is alloced failed, then we'll
try to free some resources in vp_free_vectors() that may
access it directly.

We met the following stack in our production:
[   29.296767] BUG: unable to handle kernel NULL pointer dereference at  (null)
[   29.311151] IP: [<ffffffffc04fe35a>] vp_free_vectors+0x6a/0x150 [virtio_pci]
[   29.324787] PGD 0
[   29.333224] Oops: 0000 [#1] SMP
[...]
[   29.425175] RIP: 0010:[<ffffffffc04fe35a>]  [<ffffffffc04fe35a>] vp_free_vectors+0x6a/0x150 [virtio_pci]
[   29.441405] RSP: 0018:ffff9a55c2dcfa10  EFLAGS: 00010206
[   29.453491] RAX: 0000000000000000 RBX: ffff9a55c322c400 RCX: 0000000000000000
[   29.467488] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9a55c322c400
[   29.481461] RBP: ffff9a55c2dcfa20 R08: 0000000000000000 R09: ffffc1b6806ff020
[   29.495427] R10: 0000000000000e95 R11: 0000000000aaaaaa R12: 0000000000000000
[   29.509414] R13: 0000000000010000 R14: ffff9a55bd2d9e98 R15: ffff9a55c322c400
[   29.523407] FS:  00007fdcba69f8c0(0000) GS:ffff9a55c2840000(0000) knlGS:0000000000000000
[   29.538472] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   29.551621] CR2: 0000000000000000 CR3: 000000003ce52000 CR4: 00000000003607a0
[   29.565886] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   29.580055] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   29.594122] Call Trace:
[   29.603446]  [<ffffffffc04fe8a2>] vp_request_msix_vectors+0xe2/0x260 [virtio_pci]
[   29.618017]  [<ffffffffc04fedc5>] vp_try_to_find_vqs+0x95/0x3b0 [virtio_pci]
[   29.632152]  [<ffffffffc04ff117>] vp_find_vqs+0x37/0xb0 [virtio_pci]
[   29.645582]  [<ffffffffc057bf63>] init_vq+0x153/0x260 [virtio_blk]
[   29.658831]  [<ffffffffc057c1e8>] virtblk_probe+0xe8/0x87f [virtio_blk]
[...]

Cc: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Longpeng <longpeng2@huawei.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
---
 drivers/virtio/virtio_pci_common.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index d0584c040c60f..7a0398bb84f77 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -255,9 +255,11 @@ void vp_del_vqs(struct virtio_device *vdev)
 	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
 		free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
 
-	for (i = 0; i < vp_dev->msix_vectors; i++)
-		if (vp_dev->msix_affinity_masks[i])
-			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+	if (vp_dev->msix_affinity_masks) {
+		for (i = 0; i < vp_dev->msix_vectors; i++)
+			if (vp_dev->msix_affinity_masks[i])
+				free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+	}
 
 	if (vp_dev->msix_enabled) {
 		/* Disable the vector used for configuration */
-- 
GitLab


From 5e9642a2e14cd0f5be14186681f280979bb3f3cd Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 27 Mar 2019 11:37:26 -0400
Subject: [PATCH 0785/1861] MAiNTAINERS: add Paolo, Stefan for virtio blk/scsi

Jason doesn't really have the time to review blk/scsi
patches. Paolo and Setfan agreed to help out.

Thanks guys!

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 MAINTAINERS | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2359e12e4c41e..09c546312bc88 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16509,7 +16509,7 @@ F:	drivers/char/virtio_console.c
 F:	include/linux/virtio_console.h
 F:	include/uapi/linux/virtio_console.h
 
-VIRTIO CORE, NET AND BLOCK DRIVERS
+VIRTIO CORE AND NET DRIVERS
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 M:	Jason Wang <jasowang@redhat.com>
 L:	virtualization@lists.linux-foundation.org
@@ -16524,6 +16524,19 @@ F:	include/uapi/linux/virtio_*.h
 F:	drivers/crypto/virtio/
 F:	mm/balloon_compaction.c
 
+VIRTIO BLOCK AND SCSI DRIVERS
+M:	"Michael S. Tsirkin" <mst@redhat.com>
+M:	Jason Wang <jasowang@redhat.com>
+R:	Paolo Bonzini <pbonzini@redhat.com>
+R:	Stefan Hajnoczi <stefanha@redhat.com>
+L:	virtualization@lists.linux-foundation.org
+S:	Maintained
+F:	drivers/block/virtio_blk.c
+F:	drivers/scsi/virtio_scsi.c
+F:	include/uapi/linux/virtio_blk.h
+F:	include/uapi/linux/virtio_scsi.h
+F:	drivers/vhost/scsi.c
+
 VIRTIO CRYPTO DRIVER
 M:	Gonglei <arei.gonglei@huawei.com>
 L:	virtualization@lists.linux-foundation.org
-- 
GitLab


From 1200e07f3ad4b9d976cf2fff3a0c3d9a1faecb3e Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 8 Apr 2019 19:02:38 +0800
Subject: [PATCH 0786/1861] block: don't use for-inside-for in
 bio_for_each_segment_all

Commit 6dc4f100c175 ("block: allow bio_for_each_segment_all() to
iterate over multi-page bvec") changes bio_for_each_segment_all()
to use for-inside-for.

This way breaks all bio_for_each_segment_all() call with error out
branch via 'break', since now 'break' can only break from the inner
loop.

Fixes this issue by implementing bio_for_each_segment_all() via
single 'for' loop, and now the logic is very similar with normal
bvec iterator.

Cc: Qu Wenruo <quwenruo.btrfs@gmx.com>
Cc: linux-btrfs@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: Omar Sandoval <osandov@fb.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reported-and-Tested-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Fixes: 6dc4f100c175 ("block: allow bio_for_each_segment_all() to iterate over multi-page bvec")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h  | 20 ++++++++++++--------
 include/linux/bvec.h | 14 ++++++++++----
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/include/linux/bio.h b/include/linux/bio.h
index bb6090aa165d3..e584673c18814 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -120,19 +120,23 @@ static inline bool bio_full(struct bio *bio)
 	return bio->bi_vcnt >= bio->bi_max_vecs;
 }
 
-#define mp_bvec_for_each_segment(bv, bvl, i, iter_all)			\
-	for (bv = bvec_init_iter_all(&iter_all);			\
-		(iter_all.done < (bvl)->bv_len) &&			\
-		(mp_bvec_next_segment((bvl), &iter_all), 1);		\
-		iter_all.done += bv->bv_len, i += 1)
+static inline bool bio_next_segment(const struct bio *bio,
+				    struct bvec_iter_all *iter)
+{
+	if (iter->idx >= bio->bi_vcnt)
+		return false;
+
+	bvec_advance(&bio->bi_io_vec[iter->idx], iter);
+	return true;
+}
 
 /*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
  */
-#define bio_for_each_segment_all(bvl, bio, i, iter_all)		\
-	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
-		mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
+#define bio_for_each_segment_all(bvl, bio, i, iter)			\
+	for (i = 0, bvl = bvec_init_iter_all(&iter);			\
+	     bio_next_segment((bio), &iter); i++)
 
 static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 				    unsigned bytes)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index f6275c4da13a7..3bc91879e1e2b 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -145,18 +145,18 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 
 static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all)
 {
-	iter_all->bv.bv_page = NULL;
 	iter_all->done = 0;
+	iter_all->idx = 0;
 
 	return &iter_all->bv;
 }
 
-static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
-					struct bvec_iter_all *iter_all)
+static inline void bvec_advance(const struct bio_vec *bvec,
+				struct bvec_iter_all *iter_all)
 {
 	struct bio_vec *bv = &iter_all->bv;
 
-	if (bv->bv_page) {
+	if (iter_all->done) {
 		bv->bv_page = nth_page(bv->bv_page, 1);
 		bv->bv_offset = 0;
 	} else {
@@ -165,6 +165,12 @@ static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
 	}
 	bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
 			   bvec->bv_len - iter_all->done);
+	iter_all->done += bv->bv_len;
+
+	if (iter_all->done == bvec->bv_len) {
+		iter_all->idx++;
+		iter_all->done = 0;
+	}
 }
 
 /*
-- 
GitLab


From bd13b2b874eceb4677cd26eebdc5f45cc52fa400 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Fri, 1 Feb 2019 09:36:59 -0500
Subject: [PATCH 0787/1861] drm/amd/display: Fix negative cursor pos
 programming (v2)

[Why]
If the cursor pos passed from DM is less than the plane_state->dst_rect
top left corner then the unsigned cursor pos wraps around to a large
positive number since cursor pos is a u32.

There was an attempt to guard against this in hubp1_cursor_set_position
by checking the src_x_offset and src_y_offset and offseting the
cursor hotspot within hubp1_cursor_set_position.

However, the cursor position itself is still being programmed
incorrectly as a large value.

This manifests itself visually as the cursor disappearing or containing
strange artifacts near the middle of the screen on raven.

[How]
Don't subtract the destination rect top left corner from the pos but
add it to the hotspot instead. This happens before the pos gets
passed into hubp1_cursor_set_position.

This achieves the same result but avoids the subtraction wrap around.
With this fix the original cursor programming logic can be used again.

v2: add hunk that got dropped accidently when this patch was originally
committed. (Alex)
Fixes: 0921c41e1902831 ("drm/amd/display: Fix negative cursor pos programming")

Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Charlene Liu <Charlene.Liu@amd.com>
Acked-by: Leo Li <sunpeng.li@amd.com>
Acked-by: Murton Liu <Murton.Liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 23 ++-----------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 683829466a44c..0ba68d41b9c37 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -1150,28 +1150,9 @@ void hubp1_cursor_set_position(
 	REG_UPDATE(CURSOR_CONTROL,
 			CURSOR_ENABLE, cur_en);
 
-	//account for cases where we see negative offset relative to overlay plane
-	if (src_x_offset < 0 && src_y_offset < 0) {
-		REG_SET_2(CURSOR_POSITION, 0,
-			CURSOR_X_POSITION, 0,
-			CURSOR_Y_POSITION, 0);
-		x_hotspot -= src_x_offset;
-		y_hotspot -= src_y_offset;
-	} else if (src_x_offset < 0) {
-		REG_SET_2(CURSOR_POSITION, 0,
-			CURSOR_X_POSITION, 0,
-			CURSOR_Y_POSITION, pos->y);
-		x_hotspot -= src_x_offset;
-	} else if (src_y_offset < 0) {
-		REG_SET_2(CURSOR_POSITION, 0,
+	REG_SET_2(CURSOR_POSITION, 0,
 			CURSOR_X_POSITION, pos->x,
-			CURSOR_Y_POSITION, 0);
-		y_hotspot -= src_y_offset;
-	} else {
-		REG_SET_2(CURSOR_POSITION, 0,
-				CURSOR_X_POSITION, pos->x,
-				CURSOR_Y_POSITION, pos->y);
-	}
+			CURSOR_Y_POSITION, pos->y);
 
 	REG_SET_2(CURSOR_HOT_SPOT, 0,
 			CURSOR_HOT_SPOT_X, x_hotspot,
-- 
GitLab


From e1b7058ece718c0350ad2e5bfbdab17885bd9f39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 5 Apr 2019 17:13:49 +0300
Subject: [PATCH 0788/1861] drm/i915: Fix pipe_bpp readout for BXT/GLK DSI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only bpc information in pipe registers for BXT/GLK DSI
is the PIPEMISC dither bpc. Let's try to use that to read
out pipe_bpp on these platforms. However, I'm not sure if
this will be correctly populated by the GOP since bspec
suggests it's only needed if dithering is actually enabled.
If not I guess we'll have to go one step further and
extract pipe_bpp from the DSI pixel format when dithering
is disabled.

Cc: Hans de Goede <hdegoede@redhat.com>
Fixes: ca0b04db14a5 ("drm/i915/dsi: Fix pipe_bpp for handling for 6 bpc pixel-formats")
References: https://bugs.freedesktop.org/show_bug.cgi?id=109516
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190405141349.11950-1-ville.syrjala@linux.intel.com
(cherry picked from commit 499653501baf27d26e73cb5ce744869df3400509)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/vlv_dsi.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/vlv_dsi.c
index 6403728fe7784..31c93c3ccd00f 100644
--- a/drivers/gpu/drm/i915/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/vlv_dsi.c
@@ -256,6 +256,28 @@ static void band_gap_reset(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev_priv->sb_lock);
 }
 
+static int bdw_get_pipemisc_bpp(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 tmp;
+
+	tmp = I915_READ(PIPEMISC(crtc->pipe));
+
+	switch (tmp & PIPEMISC_DITHER_BPC_MASK) {
+	case PIPEMISC_DITHER_6_BPC:
+		return 18;
+	case PIPEMISC_DITHER_8_BPC:
+		return 24;
+	case PIPEMISC_DITHER_10_BPC:
+		return 30;
+	case PIPEMISC_DITHER_12_BPC:
+		return 36;
+	default:
+		MISSING_CASE(tmp);
+		return 0;
+	}
+}
+
 static int intel_dsi_compute_config(struct intel_encoder *encoder,
 				    struct intel_crtc_state *pipe_config,
 				    struct drm_connector_state *conn_state)
@@ -1071,6 +1093,8 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder,
 	bpp = mipi_dsi_pixel_format_to_bpp(
 			pixel_format_from_register_bits(fmt));
 
+	pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc);
+
 	/* Enable Frame time stamo based scanline reporting */
 	adjusted_mode->private_flags |=
 			I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP;
-- 
GitLab


From 55c0c4c793b538fb438bcc72481b9dc2f79fe5a9 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Mon, 8 Apr 2019 16:04:38 +0300
Subject: [PATCH 0789/1861] ARC: memset: fix build with L1_CACHE_SHIFT != 6

In case of 'L1_CACHE_SHIFT != 6' we define dummy assembly macroses
PREALLOC_INSTR and PREFETCHW_INSTR without arguments. However
we pass arguments to them in code which cause build errors.
Fix that.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Cc: <stable@vger.kernel.org>    [5.0]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/lib/memset-archs.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index f230bb7092fdb..b3373f5c88e0b 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -30,10 +30,10 @@
 
 #else
 
-.macro PREALLOC_INSTR
+.macro PREALLOC_INSTR	reg, off
 .endm
 
-.macro PREFETCHW_INSTR
+.macro PREFETCHW_INSTR	reg, off
 .endm
 
 #endif
-- 
GitLab


From 5a3ae7b314a2259b1188b22b392f5eba01e443ee Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 7 Apr 2019 21:06:16 +0200
Subject: [PATCH 0790/1861] arm64/ftrace: fix inadvertent BUG() in trampoline
 check

The ftrace trampoline code (which deals with modules loaded out of
BL range of the core kernel) uses plt_entries_equal() to check whether
the per-module trampoline equals a zero buffer, to decide whether the
trampoline has already been initialized.

This triggers a BUG() in the opcode manipulation code, since we end
up checking the ADRP offset of a 0x0 opcode, which is not an ADRP
instruction.

So instead, add a helper to check whether a PLT is initialized, and
call that from the frace code.

Cc: <stable@vger.kernel.org> # v5.0
Fixes: bdb85cd1d206 ("arm64/module: switch to ADRP/ADD sequences for PLT entries")
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/module.h | 5 +++++
 arch/arm64/kernel/ftrace.c      | 3 +--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 905e1bb0e7bd0..cd9f4e9d04d3b 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -73,4 +73,9 @@ static inline bool is_forbidden_offset_for_adrp(void *place)
 struct plt_entry get_plt_entry(u64 dst, void *pc);
 bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
 
+static inline bool plt_entry_is_initialized(const struct plt_entry *e)
+{
+	return e->adrp || e->add || e->br;
+}
+
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 8e4431a8821f5..07b2981201820 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -107,8 +107,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
 		if (!plt_entries_equal(mod->arch.ftrace_trampoline,
 				       &trampoline)) {
-			if (!plt_entries_equal(mod->arch.ftrace_trampoline,
-					       &(struct plt_entry){})) {
+			if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) {
 				pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
 				return -EINVAL;
 			}
-- 
GitLab


From ea7a5c706fa49273cf6d1d9def053ecb50db2076 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalheib1@gmail.com>
Date: Wed, 3 Apr 2019 16:52:54 +0300
Subject: [PATCH 0791/1861] RDMA/vmw_pvrdma: Fix memory leak on
 pvrdma_pci_remove

Make sure to free the DSR on pvrdma_pci_remove() to avoid the memory leak.

Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver")
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
Acked-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 6d8b3e0de57a8..ec41400fec0c0 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -1131,6 +1131,8 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
 	pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
 	pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
 	pvrdma_free_slots(dev);
+	dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
+			  dev->dsrbase);
 
 	iounmap(dev->regs);
 	kfree(dev->sgid_tbl);
-- 
GitLab


From 4772e03d239484f3461e33c79d721c8ea03f7416 Mon Sep 17 00:00:00 2001
From: Lijun Ou <oulijun@huawei.com>
Date: Sun, 7 Apr 2019 13:23:38 +0800
Subject: [PATCH 0792/1861] RDMA/hns: Fix bug that caused srq creation to fail

Due to the incorrect use of the seg and obj information, the position of
the mtt is calculated incorrectly, and the free space of the page is not
enough to store the entire mtt, resulting in access to the next page. This
patch fixes this problem.

 Unable to handle kernel paging request at virtual address ffff00006e3cd000
 ...
 Call trace:
  hns_roce_write_mtt+0x154/0x2f0 [hns_roce]
  hns_roce_buf_write_mtt+0xa8/0xd8 [hns_roce]
  hns_roce_create_srq+0x74c/0x808 [hns_roce]
  ib_create_srq+0x28/0xc8

Fixes: 0203b14c4f32 ("RDMA/hns: Unify the calculation for hem index in hip08")
Signed-off-by: chenglang <chenglang@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_hem.c | 6 ++++--
 drivers/infiniband/hw/hns/hns_roce_mr.c  | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index f1fec56f3ff49..8e29dbb5b5fbc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -792,6 +792,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
 		idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk;
 		dma_offset = offset = idx_offset * table->obj_size;
 	} else {
+		u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
+
 		hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
 		/* mtt mhop */
 		i = mhop.l0_idx;
@@ -803,8 +805,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
 			hem_idx = i;
 
 		hem = table->hem[hem_idx];
-		dma_offset = offset = (obj & (table->num_obj - 1)) *
-				       table->obj_size % mhop.bt_chunk_size;
+		dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size %
+				       mhop.bt_chunk_size;
 		if (mhop.hop_num == 2)
 			dma_offset = offset = 0;
 	}
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index b09f1cde2ff54..08be0e4eabcd7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -746,7 +746,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
 	struct hns_roce_hem_table *table;
 	dma_addr_t dma_handle;
 	__le64 *mtts;
-	u32 s = start_index * sizeof(u64);
 	u32 bt_page_size;
 	u32 i;
 
@@ -780,7 +779,8 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
 		return -EINVAL;
 
 	mtts = hns_roce_table_find(hr_dev, table,
-				mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
+				mtt->first_seg +
+				start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
 				&dma_handle);
 	if (!mtts)
 		return -ENOMEM;
-- 
GitLab


From 2170a0d53bee1a6c1a4ebd042f99d85aafc6c0ea Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 11 Mar 2019 12:47:14 -0700
Subject: [PATCH 0793/1861] tools/testing/nvdimm: Retain security state after
 overwrite

Overwrite retains the security state after completion of operation.  Fix
nfit_test to reflect this so that the kernel can test the behavior it is
more likely to see in practice.

Fixes: 926f74802cb1 ("tools/testing/nvdimm: Add overwrite support for nfit_test")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index cad719876ef45..85ffdcfa596b5 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -146,6 +146,7 @@ static int dimm_fail_cmd_code[ARRAY_SIZE(handle)];
 struct nfit_test_sec {
 	u8 state;
 	u8 ext_state;
+	u8 old_state;
 	u8 passphrase[32];
 	u8 master_passphrase[32];
 	u64 overwrite_end_time;
@@ -1100,7 +1101,7 @@ static int nd_intel_test_cmd_overwrite(struct nfit_test *t,
 		return 0;
 	}
 
-	memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
+	sec->old_state = sec->state;
 	sec->state = ND_INTEL_SEC_STATE_OVERWRITE;
 	dev_dbg(dev, "overwrite progressing.\n");
 	sec->overwrite_end_time = get_jiffies_64() + 5 * HZ;
@@ -1122,7 +1123,8 @@ static int nd_intel_test_cmd_query_overwrite(struct nfit_test *t,
 
 	if (time_is_before_jiffies64(sec->overwrite_end_time)) {
 		sec->overwrite_end_time = 0;
-		sec->state = 0;
+		sec->state = sec->old_state;
+		sec->old_state = 0;
 		sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED;
 		dev_dbg(dev, "overwrite is complete\n");
 	} else
-- 
GitLab


From 00fb67ec6b98114a887d9ef26fc7c3e566e7f665 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Sun, 7 Apr 2019 13:23:39 +0800
Subject: [PATCH 0794/1861] RDMA/hns: Bugfix for SCC hem free

The method of hem free for SCC context is different from qp context.

In the current version, if free SCC hem during the execution of qp free,
there may be smmu error as below:

 arm-smmu-v3 arm-smmu-v3.1.auto: event 0x10 received:
 arm-smmu-v3 arm-smmu-v3.1.auto:  0x00007d0000000010
 arm-smmu-v3 arm-smmu-v3.1.auto:  0x000012000000017c
 arm-smmu-v3 arm-smmu-v3.1.auto:  0x00000000000009e0
 arm-smmu-v3 arm-smmu-v3.1.auto:  0x0000000000000000

As SCC context is still used by hardware after qp free, we can solve this
problem by removing SCC hem free from hns_roce_qp_free.

Fixes: 6a157f7d1b14 ("RDMA/hns: Add SCC context allocation support for hip08")
Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_qp.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 57c76eafef2f8..66cdf625534ff 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -274,9 +274,6 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 	wait_for_completion(&hr_qp->free);
 
 	if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
-		if (hr_dev->caps.sccc_entry_sz)
-			hns_roce_table_put(hr_dev, &qp_table->sccc_table,
-					   hr_qp->qpn);
 		if (hr_dev->caps.trrl_entry_sz)
 			hns_roce_table_put(hr_dev, &qp_table->trrl_table,
 					   hr_qp->qpn);
-- 
GitLab


From e6abc8caa6deb14be2a206253f7e1c5e37e9515b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Fri, 5 Apr 2019 08:54:37 -0700
Subject: [PATCH 0795/1861] nfsd: Don't release the callback slot unless it was
 actually held

If there are multiple callbacks queued, waiting for the callback
slot when the callback gets shut down, then they all currently
end up acting as if they hold the slot, and call
nfsd4_cb_sequence_done() resulting in interesting side-effects.

In addition, the 'retry_nowait' path in nfsd4_cb_sequence_done()
causes a loop back to nfsd4_cb_prepare() without first freeing the
slot, which causes a deadlock when nfsd41_cb_get_slot() gets called
a second time.

This patch therefore adds a boolean to track whether or not the
callback did pick up the slot, so that it can do the right thing
in these 2 cases.

Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c | 8 +++++++-
 fs/nfsd/state.h        | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d219159b98afc..7caa3801ce72b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1010,8 +1010,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
 	cb->cb_seq_status = 1;
 	cb->cb_status = 0;
 	if (minorversion) {
-		if (!nfsd41_cb_get_slot(clp, task))
+		if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
 			return;
+		cb->cb_holds_slot = true;
 	}
 	rpc_call_start(task);
 }
@@ -1038,6 +1039,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
 		return true;
 	}
 
+	if (!cb->cb_holds_slot)
+		goto need_restart;
+
 	switch (cb->cb_seq_status) {
 	case 0:
 		/*
@@ -1076,6 +1080,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
 			cb->cb_seq_status);
 	}
 
+	cb->cb_holds_slot = false;
 	clear_bit(0, &clp->cl_cb_slot_busy);
 	rpc_wake_up_next(&clp->cl_cb_waitq);
 	dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1283,6 +1288,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 	cb->cb_seq_status = 1;
 	cb->cb_status = 0;
 	cb->cb_need_restart = false;
+	cb->cb_holds_slot = false;
 }
 
 void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 396c76755b03b..9d6cb246c6c55 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,7 @@ struct nfsd4_callback {
 	int cb_seq_status;
 	int cb_status;
 	bool cb_need_restart;
+	bool cb_holds_slot;
 };
 
 struct nfsd4_callback_ops {
-- 
GitLab


From 704236672edacf353c362bab70c3d3eda7bb4a51 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 8 Apr 2019 10:48:50 -0600
Subject: [PATCH 0796/1861] tools/io_uring: remove IOCQE_FLAG_CACHEHIT

This ended up not being included in the mainline version of io_uring,
so drop it from the test app as well.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 tools/io_uring/io_uring-bench.c | 32 ++++----------------------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
index 512306a37531d..0f257139b003e 100644
--- a/tools/io_uring/io_uring-bench.c
+++ b/tools/io_uring/io_uring-bench.c
@@ -32,10 +32,6 @@
 #include "liburing.h"
 #include "barrier.h"
 
-#ifndef IOCQE_FLAG_CACHEHIT
-#define IOCQE_FLAG_CACHEHIT	(1U << 0)
-#endif
-
 #define min(a, b)		((a < b) ? (a) : (b))
 
 struct io_sq_ring {
@@ -85,7 +81,6 @@ struct submitter {
 	unsigned long reaps;
 	unsigned long done;
 	unsigned long calls;
-	unsigned long cachehit, cachemiss;
 	volatile int finish;
 
 	__s32 *fds;
@@ -270,10 +265,6 @@ static int reap_events(struct submitter *s)
 				return -1;
 			}
 		}
-		if (cqe->flags & IOCQE_FLAG_CACHEHIT)
-			s->cachehit++;
-		else
-			s->cachemiss++;
 		reaped++;
 		head++;
 	} while (1);
@@ -489,7 +480,7 @@ static void file_depths(char *buf)
 int main(int argc, char *argv[])
 {
 	struct submitter *s = &submitters[0];
-	unsigned long done, calls, reap, cache_hit, cache_miss;
+	unsigned long done, calls, reap;
 	int err, i, flags, fd;
 	char *fdepths;
 	void *ret;
@@ -569,44 +560,29 @@ int main(int argc, char *argv[])
 	pthread_create(&s->thread, NULL, submitter_fn, s);
 
 	fdepths = malloc(8 * s->nr_files);
-	cache_hit = cache_miss = reap = calls = done = 0;
+	reap = calls = done = 0;
 	do {
 		unsigned long this_done = 0;
 		unsigned long this_reap = 0;
 		unsigned long this_call = 0;
-		unsigned long this_cache_hit = 0;
-		unsigned long this_cache_miss = 0;
 		unsigned long rpc = 0, ipc = 0;
-		double hit = 0.0;
 
 		sleep(1);
 		this_done += s->done;
 		this_call += s->calls;
 		this_reap += s->reaps;
-		this_cache_hit += s->cachehit;
-		this_cache_miss += s->cachemiss;
-		if (this_cache_hit && this_cache_miss) {
-			unsigned long hits, total;
-
-			hits = this_cache_hit - cache_hit;
-			total = hits + this_cache_miss - cache_miss;
-			hit = (double) hits / (double) total;
-			hit *= 100.0;
-		}
 		if (this_call - calls) {
 			rpc = (this_done - done) / (this_call - calls);
 			ipc = (this_reap - reap) / (this_call - calls);
 		} else
 			rpc = ipc = -1;
 		file_depths(fdepths);
-		printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s), Cachehit=%0.2f%%\n",
+		printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n",
 				this_done - done, rpc, ipc, s->inflight,
-				fdepths, hit);
+				fdepths);
 		done = this_done;
 		calls = this_call;
 		reap = this_reap;
-		cache_hit = s->cachehit;
-		cache_miss = s->cachemiss;
 	} while (!finish);
 
 	pthread_join(s->thread, &ret);
-- 
GitLab


From 3ec482d15cb986bf08b923f9193eeddb3b9ca69f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 8 Apr 2019 10:51:01 -0600
Subject: [PATCH 0797/1861] io_uring: restrict IORING_SETUP_SQPOLL to root

This options spawns a kernel side thread that will poll for submissions
(and completions, if IORING_SETUP_IOPOLL is set). As this allows a user
to potentially use more cycles outside of the normal hierarchy,
restrict the use of this feature to root.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 07d6ef195d05a..89aa8412b5f59 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2245,6 +2245,10 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 		goto err;
 
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
+		ret = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto err;
+
 		if (p->flags & IORING_SETUP_SQ_AFF) {
 			int cpu;
 
-- 
GitLab


From 1e6f5440a6814d28c32d347f338bfef68bc3e69d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Apr 2019 17:56:34 +0100
Subject: [PATCH 0798/1861] arm64: backtrace: Don't bother trying to unwind the
 userspace stack

Calling dump_backtrace() with a pt_regs argument corresponding to
userspace doesn't make any sense and our unwinder will simply print
"Call trace:" before unwinding the stack looking for user frames.

Rather than go through this song and dance, just return early if we're
passed a user register state.

Cc: <stable@vger.kernel.org>
Fixes: 1149aad10b1e ("arm64: Add dump_backtrace() in show_regs")
Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/traps.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8ad119c3f665d..29755989f616c 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -102,10 +102,16 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	int skip;
+	int skip = 0;
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
+	if (regs) {
+		if (user_mode(regs))
+			return;
+		skip = 1;
+	}
+
 	if (!tsk)
 		tsk = current;
 
@@ -126,7 +132,6 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	frame.graph = 0;
 #endif
 
-	skip = !!regs;
 	printk("Call trace:\n");
 	do {
 		/* skip until specified stack frame */
@@ -176,15 +181,13 @@ static int __die(const char *str, int err, struct pt_regs *regs)
 		return ret;
 
 	print_modules();
-	__show_regs(regs);
 	pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
 		 TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk),
 		 end_of_stack(tsk));
+	show_regs(regs);
 
-	if (!user_mode(regs)) {
-		dump_backtrace(regs, tsk);
+	if (!user_mode(regs))
 		dump_instr(KERN_EMERG, regs);
-	}
 
 	return ret;
 }
-- 
GitLab


From b1a6e8f9131381a92bfdacdf86ef80cca82f71d4 Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@datenfreihafen.org>
Date: Mon, 8 Apr 2019 18:08:04 +0200
Subject: [PATCH 0799/1861] MAINTAINERS: ieee802154: update documentation file
 pattern

When moving the documentation for the ieee802154 subsystem from
plain text to rst the file pattern in the MAINTAINERS file got wrong.
Updating it here to fix scripts using this file.

Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6771bd784f5f5..6216221388600 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7515,7 +7515,7 @@ F:	include/net/mac802154.h
 F:	include/net/af_ieee802154.h
 F:	include/net/cfg802154.h
 F:	include/net/ieee802154_netdev.h
-F:	Documentation/networking/ieee802154.txt
+F:	Documentation/networking/ieee802154.rst
 
 IFE PROTOCOL
 M:	Yotam Gigi <yotam.gi@gmail.com>
-- 
GitLab


From cf94db21905333e610e479688add629397a4b384 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Mon, 8 Apr 2019 14:33:22 +0200
Subject: [PATCH 0800/1861] virtio: Honour 'may_reduce_num' in
 vring_create_virtqueue

vring_create_virtqueue() allows the caller to specify via the
may_reduce_num parameter whether the vring code is allowed to
allocate a smaller ring than specified.

However, the split ring allocation code tries to allocate a
smaller ring on allocation failure regardless of what the
caller specified. This may cause trouble for e.g. virtio-pci
in legacy mode, which does not support ring resizing. (The
packed ring code does not resize in any case.)

Let's fix this by bailing out immediately in the split ring code
if the requested size cannot be allocated and may_reduce_num has
not been specified.

While at it, fix a typo in the usage instructions.

Fixes: 2a2d1382fe9d ("virtio: Add improved queue allocation API")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Jens Freimann <jfreimann@redhat.com>
---
 drivers/virtio/virtio_ring.c | 2 ++
 include/linux/virtio_ring.h  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 18846afb39da1..5df92c308286d 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -882,6 +882,8 @@ static struct virtqueue *vring_create_virtqueue_split(
 					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 		if (queue)
 			break;
+		if (!may_reduce_num)
+			return NULL;
 	}
 
 	if (!num)
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index fab02133a9197..3dc70adfe5f5e 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -63,7 +63,7 @@ struct virtqueue;
 /*
  * Creates a virtqueue and allocates the descriptor ring.  If
  * may_reduce_num is set, then this may allocate a smaller ring than
- * expected.  The caller should query virtqueue_get_ring_size to learn
+ * expected.  The caller should query virtqueue_get_vring_size to learn
  * the actual size of the ring.
  */
 struct virtqueue *vring_create_virtqueue(unsigned int index,
-- 
GitLab


From b75bb8a5b755d0c7bf1ac071e4df2349a7644a1e Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 5 Apr 2019 20:46:46 +0200
Subject: [PATCH 0801/1861] r8169: disable ASPM again

There's a significant number of reports that re-enabling ASPM causes
different issues, ranging from decreased performance to system not
booting at all. This affects only a minority of users, but the number
of affected users is big enough that we better switch off ASPM again.

This will hurt notebook users who are not affected by the issues, they
may see decreased battery runtime w/o ASPM. With the PCI core folks is
being discussed to add generic sysfs attributes to control ASPM.
Once this is in place brave enough users can re-enable ASPM on their
system.

Fixes: a99790bf5c7f ("r8169: Reinstate ASPM Support")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 19efa88f3f02d..ed651dde6ef9e 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -28,6 +28,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/firmware.h>
 #include <linux/prefetch.h>
+#include <linux/pci-aspm.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
 
@@ -7352,6 +7353,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 
+	/* Disable ASPM completely as that cause random device stop working
+	 * problems as well as full system hangs for some PCIe devices users.
+	 */
+	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
 	rc = pcim_enable_device(pdev);
 	if (rc < 0) {
-- 
GitLab


From e891db1a18bf11e02533ec2386b796cfd8d60666 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Fri, 22 Mar 2019 12:51:20 +0200
Subject: [PATCH 0802/1861] tpm: turn on TPM on suspend for TPM 1.x

tpm_chip_start/stop() should be also called for TPM 1.x devices on
suspend. Add that functionality back. Do not lock the chip because
it is unnecessary as there are no multiple threads using it when
doing the suspend.

Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()")
Reported-by: Paul Zimmerman <pauldzim@gmail.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Domenico Andreoli <domenico.andreoli@linux.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 drivers/char/tpm/tpm-interface.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 83ece5639f863..ae1030c9b086d 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -402,15 +402,13 @@ int tpm_pm_suspend(struct device *dev)
 	if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED)
 		return 0;
 
-	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
-		mutex_lock(&chip->tpm_mutex);
-		if (!tpm_chip_start(chip)) {
+	if (!tpm_chip_start(chip)) {
+		if (chip->flags & TPM_CHIP_FLAG_TPM2)
 			tpm2_shutdown(chip, TPM2_SU_STATE);
-			tpm_chip_stop(chip);
-		}
-		mutex_unlock(&chip->tpm_mutex);
-	} else {
-		rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+		else
+			rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
+
+		tpm_chip_stop(chip);
 	}
 
 	return rc;
-- 
GitLab


From 7110629263469b4664d00b38ef80a656eddf3637 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Wed, 27 Mar 2019 11:32:38 -0700
Subject: [PATCH 0803/1861] tpm: fix an invalid condition in tpm_common_poll
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The poll condition should only check response_length,
because reads should only be issued if there is data to read.
The response_read flag only prevents double writes.
The problem was that the write set the response_read to false,
enqued a tpm job, and returned. Then application called poll
which checked the response_read flag and returned EPOLLIN.
Then the application called read, but got nothing.
After all that the async_work kicked in.
Added also mutex_lock around the poll check to prevent
other possible race conditions.

Fixes: 9488585b21bef0df12 ("tpm: add support for partial reads")
Reported-by: Mantas Mikulėnas <grawity@gmail.com>
Tested-by: Mantas Mikulėnas <grawity@gmail.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 drivers/char/tpm/tpm-dev-common.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index 8856cce5a23b2..817ae09a369ec 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -233,12 +233,19 @@ __poll_t tpm_common_poll(struct file *file, poll_table *wait)
 	__poll_t mask = 0;
 
 	poll_wait(file, &priv->async_wait, wait);
+	mutex_lock(&priv->buffer_mutex);
 
-	if (!priv->response_read || priv->response_length)
+	/*
+	 * The response_length indicates if there is still response
+	 * (or part of it) to be consumed. Partial reads decrease it
+	 * by the number of bytes read, and write resets it the zero.
+	 */
+	if (priv->response_length)
 		mask = EPOLLIN | EPOLLRDNORM;
 	else
 		mask = EPOLLOUT | EPOLLWRNORM;
 
+	mutex_unlock(&priv->buffer_mutex);
 	return mask;
 }
 
-- 
GitLab


From c78719203fc629421a0d91d3d22240c36ae0119c Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Mon, 25 Mar 2019 16:43:10 +0200
Subject: [PATCH 0804/1861] KEYS: trusted: allow trusted.ko to initialize w/o a
 TPM

Allow trusted.ko to initialize w/o a TPM. This commit also adds checks
to the exported functions to fail when a TPM is not available.

Fixes: 240730437deb ("KEYS: trusted: explicitly use tpm_chip structure...")
Cc: James Morris <jmorris@namei.org>
Reported-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 security/keys/trusted.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index bcc9c6ead7fd3..a284d790de8c3 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -135,6 +135,9 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 	int ret;
 	va_list argp;
 
+	if (!chip)
+		return -ENODEV;
+
 	sdesc = init_sdesc(hashalg);
 	if (IS_ERR(sdesc)) {
 		pr_info("trusted_key: can't alloc %s\n", hash_alg);
@@ -196,6 +199,9 @@ int TSS_checkhmac1(unsigned char *buffer,
 	va_list argp;
 	int ret;
 
+	if (!chip)
+		return -ENODEV;
+
 	bufsize = LOAD32(buffer, TPM_SIZE_OFFSET);
 	tag = LOAD16(buffer, 0);
 	ordinal = command;
@@ -363,6 +369,9 @@ int trusted_tpm_send(unsigned char *cmd, size_t buflen)
 {
 	int rc;
 
+	if (!chip)
+		return -ENODEV;
+
 	dump_tpm_buf(cmd);
 	rc = tpm_send(chip, cmd, buflen);
 	dump_tpm_buf(cmd);
@@ -429,6 +438,9 @@ int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
 {
 	int ret;
 
+	if (!chip)
+		return -ENODEV;
+
 	INIT_BUF(tb);
 	store16(tb, TPM_TAG_RQU_COMMAND);
 	store32(tb, TPM_OIAP_SIZE);
@@ -1245,9 +1257,13 @@ static int __init init_trusted(void)
 {
 	int ret;
 
+	/* encrypted_keys.ko depends on successful load of this module even if
+	 * TPM is not used.
+	 */
 	chip = tpm_default_chip();
 	if (!chip)
-		return -ENOENT;
+		return 0;
+
 	ret = init_digests();
 	if (ret < 0)
 		goto err_put;
@@ -1269,10 +1285,12 @@ err_put:
 
 static void __exit cleanup_trusted(void)
 {
-	put_device(&chip->dev);
-	kfree(digests);
-	trusted_shash_release();
-	unregister_key_type(&key_type_trusted);
+	if (chip) {
+		put_device(&chip->dev);
+		kfree(digests);
+		trusted_shash_release();
+		unregister_key_type(&key_type_trusted);
+	}
 }
 
 late_initcall(init_trusted);
-- 
GitLab


From b9d0a85d6b2e76630cfd4c475ee3af4109bfd87a Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Wed, 20 Feb 2019 16:25:38 +0800
Subject: [PATCH 0805/1861] tpm: Fix the type of the return value in
 calc_tpm2_event_size()

calc_tpm2_event_size() has an invalid signature because
it returns a 'size_t' where as its signature says that
it returns 'int'.

Cc: <stable@vger.kernel.org>
Fixes: 4d23cc323cdb ("tpm: add securityfs support for TPM 2.0 firmware event log")
Suggested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 drivers/char/tpm/eventlog/tpm2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index d8b77133a83a2..f824563fc28dd 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -37,8 +37,8 @@
  *
  * Returns size of the event. If it is an invalid event, returns 0.
  */
-static int calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
-				struct tcg_pcr_event *event_header)
+static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+				   struct tcg_pcr_event *event_header)
 {
 	struct tcg_efi_specid_event_head *efispecid;
 	struct tcg_event_field *event_field;
-- 
GitLab


From be24b37e22c20cbaa891971616784dd0f35211e8 Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Mon, 22 Oct 2018 16:43:57 -0700
Subject: [PATCH 0806/1861] KEYS: trusted: fix -Wvarags warning

Fixes the warning reported by Clang:
security/keys/trusted.c:146:17: warning: passing an object that
undergoes default
      argument promotion to 'va_start' has undefined behavior [-Wvarargs]
        va_start(argp, h3);
                       ^
security/keys/trusted.c:126:37: note: parameter of type 'unsigned
char' is declared here
unsigned char *h2, unsigned char h3, ...)
                               ^
Specifically, it seems that both the C90 (4.8.1.1) and C11 (7.16.1.4)
standards explicitly call this out as undefined behavior:

The parameter parmN is the identifier of the rightmost parameter in
the variable parameter list in the function definition (the one just
before the ...). If the parameter parmN is declared with ... or with a
type that is not compatible with the type that results after
application of the default argument promotions, the behavior is
undefined.

Link: https://github.com/ClangBuiltLinux/linux/issues/41
Link: https://www.eskimo.com/~scs/cclass/int/sx11c.html
Suggested-by: David Laight <David.Laight@aculab.com>
Suggested-by: Denis Kenzior <denkenz@gmail.com>
Suggested-by: James Bottomley <jejb@linux.vnet.ibm.com>
Suggested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/keys/trusted.h  | 2 +-
 security/keys/trusted.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/keys/trusted.h b/include/keys/trusted.h
index adbcb68178260..0071298b9b28e 100644
--- a/include/keys/trusted.h
+++ b/include/keys/trusted.h
@@ -38,7 +38,7 @@ enum {
 
 int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 			unsigned int keylen, unsigned char *h1,
-			unsigned char *h2, unsigned char h3, ...);
+			unsigned char *h2, unsigned int h3, ...);
 int TSS_checkhmac1(unsigned char *buffer,
 			  const uint32_t command,
 			  const unsigned char *ononce,
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index a284d790de8c3..efdbf17f39152 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -125,7 +125,7 @@ out:
  */
 int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 			unsigned int keylen, unsigned char *h1,
-			unsigned char *h2, unsigned char h3, ...)
+			unsigned char *h2, unsigned int h3, ...)
 {
 	unsigned char paramdigest[SHA1_DIGEST_SIZE];
 	struct sdesc *sdesc;
@@ -144,7 +144,7 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 		return PTR_ERR(sdesc);
 	}
 
-	c = h3;
+	c = !!h3;
 	ret = crypto_shash_init(&sdesc->shash);
 	if (ret < 0)
 		goto out;
-- 
GitLab


From f1a0ba6cccff75d882204cae1f154f17620b3c4a Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Tue, 12 Feb 2019 15:42:10 -0800
Subject: [PATCH 0807/1861] selftests/tpm2: Extend tests to cover partial reads

Three new tests added:
1. Send get random cmd, read header in 1st read, read the rest in second
   read - expect success
2. Send get random cmd, read only part of the response, send another
   get random command, read the response - expect success
3. Send get random cmd followed by another get random cmd, without
   reading the first response - expect the second cmd to fail with -EBUSY

Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 tools/testing/selftests/tpm2/tpm2.py       |  1 +
 tools/testing/selftests/tpm2/tpm2_tests.py | 63 ++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index 40ea95ce2eada..6fc99ce025b59 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -22,6 +22,7 @@ TPM2_CC_UNSEAL = 0x015E
 TPM2_CC_FLUSH_CONTEXT = 0x0165
 TPM2_CC_START_AUTH_SESSION = 0x0176
 TPM2_CC_GET_CAPABILITY	= 0x017A
+TPM2_CC_GET_RANDOM = 0x017B
 TPM2_CC_PCR_READ = 0x017E
 TPM2_CC_POLICY_PCR = 0x017F
 TPM2_CC_PCR_EXTEND = 0x0182
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 3bb066fea4a01..d4973be534932 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -158,6 +158,69 @@ class SmokeTest(unittest.TestCase):
             pass
         self.assertEqual(rejected, True)
 
+    def test_read_partial_resp(self):
+        try:
+            fmt = '>HIIH'
+            cmd = struct.pack(fmt,
+                              tpm2.TPM2_ST_NO_SESSIONS,
+                              struct.calcsize(fmt),
+                              tpm2.TPM2_CC_GET_RANDOM,
+                              0x20)
+            self.client.tpm.write(cmd)
+            hdr = self.client.tpm.read(10)
+            sz = struct.unpack('>I', hdr[2:6])[0]
+            rsp = self.client.tpm.read()
+        except:
+            pass
+        self.assertEqual(sz, 10 + 2 + 32)
+        self.assertEqual(len(rsp), 2 + 32)
+
+    def test_read_partial_overwrite(self):
+        try:
+            fmt = '>HIIH'
+            cmd = struct.pack(fmt,
+                              tpm2.TPM2_ST_NO_SESSIONS,
+                              struct.calcsize(fmt),
+                              tpm2.TPM2_CC_GET_RANDOM,
+                              0x20)
+            self.client.tpm.write(cmd)
+            # Read part of the respone
+            rsp1 = self.client.tpm.read(15)
+
+            # Send a new cmd
+            self.client.tpm.write(cmd)
+
+            # Read the whole respone
+            rsp2 = self.client.tpm.read()
+        except:
+            pass
+        self.assertEqual(len(rsp1), 15)
+        self.assertEqual(len(rsp2), 10 + 2 + 32)
+
+    def test_send_two_cmds(self):
+        rejected = False
+        try:
+            fmt = '>HIIH'
+            cmd = struct.pack(fmt,
+                              tpm2.TPM2_ST_NO_SESSIONS,
+                              struct.calcsize(fmt),
+                              tpm2.TPM2_CC_GET_RANDOM,
+                              0x20)
+            self.client.tpm.write(cmd)
+
+            # expect the second one to raise -EBUSY error
+            self.client.tpm.write(cmd)
+            rsp = self.client.tpm.read()
+
+        except IOError, e:
+            # read the response
+            rsp = self.client.tpm.read()
+            rejected = True
+            pass
+        except:
+            pass
+        self.assertEqual(rejected, True)
+
 class SpaceTest(unittest.TestCase):
     def setUp(self):
         logging.basicConfig(filename='SpaceTest.log', level=logging.DEBUG)
-- 
GitLab


From 6da70580af9612accf042b37564d73e787af39b4 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Tue, 12 Feb 2019 15:42:05 -0800
Subject: [PATCH 0808/1861] selftests/tpm2: Open tpm dev in unbuffered mode

In order to have control over how many bytes are read or written
the device needs to be opened in unbuffered mode.

Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 tools/testing/selftests/tpm2/tpm2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index 6fc99ce025b59..828c185846248 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -358,9 +358,9 @@ class Client:
         self.flags = flags
 
         if (self.flags & Client.FLAG_SPACE) == 0:
-            self.tpm = open('/dev/tpm0', 'r+b')
+            self.tpm = open('/dev/tpm0', 'r+b', buffering=0)
         else:
-            self.tpm = open('/dev/tpmrm0', 'r+b')
+            self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0)
 
     def close(self):
         self.tpm.close()
-- 
GitLab


From 492b67e28ee5f2a2522fb72e3d3bcb990e461514 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 6 Apr 2019 17:16:52 +0200
Subject: [PATCH 0809/1861] net: ip_gre: fix possible use-after-free in
 erspan_rcv

erspan tunnels run __iptunnel_pull_header on received skbs to remove
gre and erspan headers. This can determine a possible use-after-free
accessing pkt_md pointer in erspan_rcv since the packet will be 'uncloned'
running pskb_expand_head if it is a cloned gso skb (e.g if the packet has
been sent though a veth device). Fix it resetting pkt_md pointer after
__iptunnel_pull_header

Fixes: 1d7e2ed22f8d ("net: erspan: refactor existing erspan code")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fd219f7bd3ea2..4b05264414763 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 	struct net *net = dev_net(skb->dev);
 	struct metadata_dst *tun_dst = NULL;
 	struct erspan_base_hdr *ershdr;
-	struct erspan_metadata *pkt_md;
 	struct ip_tunnel_net *itn;
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph;
@@ -282,9 +281,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 		if (unlikely(!pskb_may_pull(skb, len)))
 			return PACKET_REJECT;
 
-		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
-		pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
 		if (__iptunnel_pull_header(skb,
 					   len,
 					   htons(ETH_P_TEB),
@@ -292,8 +288,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 			goto drop;
 
 		if (tunnel->collect_md) {
+			struct erspan_metadata *pkt_md, *md;
 			struct ip_tunnel_info *info;
-			struct erspan_metadata *md;
+			unsigned char *gh;
 			__be64 tun_id;
 			__be16 flags;
 
@@ -306,6 +303,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 			if (!tun_dst)
 				return PACKET_REJECT;
 
+			/* skb can be uncloned in __iptunnel_pull_header, so
+			 * old pkt_md is no longer valid and we need to reset
+			 * it
+			 */
+			gh = skb_network_header(skb) +
+			     skb_network_header_len(skb);
+			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+							    sizeof(*ershdr));
 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
 			md->version = ver;
 			md2 = &md->u.md2;
-- 
GitLab


From 2a3cabae4536edbcb21d344e7aa8be7a584d2afb Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 6 Apr 2019 17:16:53 +0200
Subject: [PATCH 0810/1861] net: ip6_gre: fix possible use-after-free in
 ip6erspan_rcv

erspan_v6 tunnels run __iptunnel_pull_header on received skbs to remove
erspan header. This can determine a possible use-after-free accessing
pkt_md pointer in ip6erspan_rcv since the packet will be 'uncloned'
running pskb_expand_head if it is a cloned gso skb (e.g if the packet has
been sent though a veth device). Fix it resetting pkt_md pointer after
__iptunnel_pull_header

Fixes: 1d7e2ed22f8d ("net: erspan: refactor existing erspan code")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b32c95f021280..655e46b227f9e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -525,10 +525,10 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
 }
 
 static int ip6erspan_rcv(struct sk_buff *skb,
-			 struct tnl_ptk_info *tpi)
+			 struct tnl_ptk_info *tpi,
+			 int gre_hdr_len)
 {
 	struct erspan_base_hdr *ershdr;
-	struct erspan_metadata *pkt_md;
 	const struct ipv6hdr *ipv6h;
 	struct erspan_md2 *md2;
 	struct ip6_tnl *tunnel;
@@ -547,18 +547,16 @@ static int ip6erspan_rcv(struct sk_buff *skb,
 		if (unlikely(!pskb_may_pull(skb, len)))
 			return PACKET_REJECT;
 
-		ershdr = (struct erspan_base_hdr *)skb->data;
-		pkt_md = (struct erspan_metadata *)(ershdr + 1);
-
 		if (__iptunnel_pull_header(skb, len,
 					   htons(ETH_P_TEB),
 					   false, false) < 0)
 			return PACKET_REJECT;
 
 		if (tunnel->parms.collect_md) {
+			struct erspan_metadata *pkt_md, *md;
 			struct metadata_dst *tun_dst;
 			struct ip_tunnel_info *info;
-			struct erspan_metadata *md;
+			unsigned char *gh;
 			__be64 tun_id;
 			__be16 flags;
 
@@ -571,6 +569,14 @@ static int ip6erspan_rcv(struct sk_buff *skb,
 			if (!tun_dst)
 				return PACKET_REJECT;
 
+			/* skb can be uncloned in __iptunnel_pull_header, so
+			 * old pkt_md is no longer valid and we need to reset
+			 * it
+			 */
+			gh = skb_network_header(skb) +
+			     skb_network_header_len(skb);
+			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+							    sizeof(*ershdr));
 			info = &tun_dst->u.tun_info;
 			md = ip_tunnel_info_opts(info);
 			md->version = ver;
@@ -607,7 +613,7 @@ static int gre_rcv(struct sk_buff *skb)
 
 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
-		if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD)
+		if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 			return 0;
 		goto out;
 	}
-- 
GitLab


From afe64245af9f58267e7fa8fb76ad5650ee7ec25f Mon Sep 17 00:00:00 2001
From: Michael Zhivich <mzhivich@akamai.com>
Date: Mon, 8 Apr 2019 10:48:45 -0400
Subject: [PATCH 0811/1861] ethtool: avoid signed-unsigned comparison in
 ethtool_validate_speed()

When building C++ userspace code that includes ethtool.h
with "-Werror -Wall", g++ complains about signed-unsigned comparison in
ethtool_validate_speed() due to definition of SPEED_UNKNOWN as -1.

Explicitly cast SPEED_UNKNOWN to __u32 to match type of
ethtool_validate_speed() argument.

Signed-off-by: Michael Zhivich <mzhivich@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 3652b239dad1d..d473e5ed044c7 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1591,7 +1591,7 @@ enum ethtool_link_mode_bit_indices {
 
 static inline int ethtool_validate_speed(__u32 speed)
 {
-	return speed <= INT_MAX || speed == SPEED_UNKNOWN;
+	return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN;
 }
 
 /* Duplex, half or full. */
-- 
GitLab


From caf2c5205d82ff0d758096a69a7e0466c38d4dbb Mon Sep 17 00:00:00 2001
From: Michael Zhivich <mzhivich@akamai.com>
Date: Mon, 8 Apr 2019 10:48:46 -0400
Subject: [PATCH 0812/1861] broadcom: tg3: fix use of SPEED_UNKNOWN ethtool
 constant

tg3 driver uses u16 to store SPEED_UKNOWN ethtool constant,
which is defined as -1, resulting in value truncation and
thus incorrect test results against SPEED_UNKNOWN.

For example, the following test will print "False":

	u16 speed = SPEED_UNKNOWN;

	if (speed == SPEED_UNKNOWN)
	    printf("True");
	else
	    printf("False");

Change storage of speed to use u32 to avoid this issue.

Signed-off-by: Michael Zhivich <mzhivich@akamai.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 8 ++++----
 drivers/net/ethernet/broadcom/tg3.h | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 328373e0578ff..060a6f386104a 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -4283,7 +4283,7 @@ static void tg3_power_down(struct tg3 *tp)
 	pci_set_power_state(tp->pdev, PCI_D3hot);
 }
 
-static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex)
+static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u32 *speed, u8 *duplex)
 {
 	switch (val & MII_TG3_AUX_STAT_SPDMASK) {
 	case MII_TG3_AUX_STAT_10HALF:
@@ -4787,7 +4787,7 @@ static int tg3_setup_copper_phy(struct tg3 *tp, bool force_reset)
 	bool current_link_up;
 	u32 bmsr, val;
 	u32 lcl_adv, rmt_adv;
-	u16 current_speed;
+	u32 current_speed;
 	u8 current_duplex;
 	int i, err;
 
@@ -5719,7 +5719,7 @@ out:
 static int tg3_setup_fiber_phy(struct tg3 *tp, bool force_reset)
 {
 	u32 orig_pause_cfg;
-	u16 orig_active_speed;
+	u32 orig_active_speed;
 	u8 orig_active_duplex;
 	u32 mac_status;
 	bool current_link_up;
@@ -5823,7 +5823,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset)
 {
 	int err = 0;
 	u32 bmsr, bmcr;
-	u16 current_speed = SPEED_UNKNOWN;
+	u32 current_speed = SPEED_UNKNOWN;
 	u8 current_duplex = DUPLEX_UNKNOWN;
 	bool current_link_up = false;
 	u32 local_adv, remote_adv, sgsr;
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index a772a33b685c5..6953d0546acb3 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2873,7 +2873,7 @@ struct tg3_tx_ring_info {
 struct tg3_link_config {
 	/* Describes what we're trying to get. */
 	u32				advertising;
-	u16				speed;
+	u32				speed;
 	u8				duplex;
 	u8				autoneg;
 	u8				flowctrl;
@@ -2882,7 +2882,7 @@ struct tg3_link_config {
 	u8				active_flowctrl;
 
 	u8				active_duplex;
-	u16				active_speed;
+	u32				active_speed;
 	u32				rmt_adv;
 };
 
-- 
GitLab


From d63da85a4226c4b5a28536a1f48d89eefd50a832 Mon Sep 17 00:00:00 2001
From: Michael Zhivich <mzhivich@akamai.com>
Date: Mon, 8 Apr 2019 10:48:47 -0400
Subject: [PATCH 0813/1861] qlogic: qlcnic: fix use of SPEED_UNKNOWN ethtool
 constant

qlcnic driver uses u16 to store SPEED_UKNOWN ethtool constant,
which is defined as -1, resulting in value truncation and
thus incorrect test results against SPEED_UNKNOWN.

For example, the following test will print "False":

    u16 speed = SPEED_UNKNOWN;

    if (speed == SPEED_UNKNOWN)
        printf("True");
    else
        printf("False");

Change storage of speed to use u32 to avoid this issue.

Signed-off-by: Michael Zhivich <mzhivich@akamai.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 0c443ea98479a..374a4d4371f99 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -497,7 +497,7 @@ struct qlcnic_hardware_context {
 	u16 board_type;
 	u16 supported_type;
 
-	u16 link_speed;
+	u32 link_speed;
 	u16 link_duplex;
 	u16 link_autoneg;
 	u16 module_type;
-- 
GitLab


From a62520473f15750cd1432d36b377a06cd7cff8d2 Mon Sep 17 00:00:00 2001
From: Paul Thomas <pthomas8589@gmail.com>
Date: Mon, 8 Apr 2019 15:37:54 -0400
Subject: [PATCH 0814/1861] net: macb driver, check for SKBTX_HW_TSTAMP

Make sure SKBTX_HW_TSTAMP (i.e. SOF_TIMESTAMPING_TX_HARDWARE) has been
enabled for this skb. It does fix the issue where normal socks that
aren't expecting a timestamp will not wake up on select, but when a
user does want a SOF_TIMESTAMPING_TX_HARDWARE it does work.

Signed-off-by: Paul Thomas <pthomas8589@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 1522aee81884b..3da2795e24863 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -898,7 +898,9 @@ static void macb_tx_interrupt(struct macb_queue *queue)
 
 			/* First, update TX stats if needed */
 			if (skb) {
-				if (gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+				if (unlikely(skb_shinfo(skb)->tx_flags &
+					     SKBTX_HW_TSTAMP) &&
+				    gem_ptp_do_txstamp(queue, skb, desc) == 0) {
 					/* skb now belongs to timestamp buffer
 					 * and will be removed later
 					 */
-- 
GitLab


From a1b0e4e684e9c300b9e759b46cb7a0147e61ddff Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 8 Apr 2019 17:39:54 -0400
Subject: [PATCH 0815/1861] bnxt_en: Improve RX consumer index validity check.

There is logic to check that the RX/TPA consumer index is the expected
index to work around a hardware problem.  However, the potentially bad
consumer index is first used to index into an array to reference an entry.
This can potentially crash if the bad consumer index is beyond legal
range.  Improve the logic to use the consumer index for dereferencing
after the validity check and log an error message.

Fixes: fa7e28127a5a ("bnxt_en: Add workaround to detect bad opaque in rx completion (part 2)")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0bb9d7b3a2b62..3df847b7079f9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1133,6 +1133,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	tpa_info = &rxr->rx_tpa[agg_id];
 
 	if (unlikely(cons != rxr->rx_next_cons)) {
+		netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n",
+			    cons, rxr->rx_next_cons);
 		bnxt_sched_reset(bp, rxr);
 		return;
 	}
@@ -1585,15 +1587,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	}
 
 	cons = rxcmp->rx_cmp_opaque;
-	rx_buf = &rxr->rx_buf_ring[cons];
-	data = rx_buf->data;
-	data_ptr = rx_buf->data_ptr;
 	if (unlikely(cons != rxr->rx_next_cons)) {
 		int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
 
+		netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
+			    cons, rxr->rx_next_cons);
 		bnxt_sched_reset(bp, rxr);
 		return rc1;
 	}
+	rx_buf = &rxr->rx_buf_ring[cons];
+	data = rx_buf->data;
+	data_ptr = rx_buf->data_ptr;
 	prefetch(data_ptr);
 
 	misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1);
-- 
GitLab


From 8e44e96c6c8e8fb80b84a2ca11798a8554f710f2 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 8 Apr 2019 17:39:55 -0400
Subject: [PATCH 0816/1861] bnxt_en: Reset device on RX buffer errors.

If the RX completion indicates RX buffers errors, the RX ring will be
disabled by firmware and no packets will be received on that ring from
that point on.  Recover by resetting the device.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3df847b7079f9..4c586ba4364ba 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1614,11 +1614,17 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 
 	rx_buf->data = NULL;
 	if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) {
+		u32 rx_err = le32_to_cpu(rxcmp1->rx_cmp_cfa_code_errors_v2);
+
 		bnxt_reuse_rx_data(rxr, cons, data);
 		if (agg_bufs)
 			bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs);
 
 		rc = -EIO;
+		if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
+			netdev_warn(bp->dev, "RX buffer error %x\n", rx_err);
+			bnxt_sched_reset(bp, rxr);
+		}
 		goto next_rx;
 	}
 
-- 
GitLab


From 5c2442fd78998af60e13aba506d103f7f43f8701 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 5 Apr 2019 20:39:13 +0530
Subject: [PATCH 0817/1861] scsi: csiostor: fix missing data copy in
 csio_scsi_err_handler()

If scsi cmd sglist is not suitable for DDP then csiostor driver uses
preallocated buffers for DDP, because of this data copy is required from
DDP buffer to scsi cmd sglist before calling ->scsi_done().

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/csiostor/csio_scsi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
index 462560b2855e2..469d0bc9f5fe4 100644
--- a/drivers/scsi/csiostor/csio_scsi.c
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -1713,8 +1713,11 @@ csio_scsi_err_handler(struct csio_hw *hw, struct csio_ioreq *req)
 	}
 
 out:
-	if (req->nsge > 0)
+	if (req->nsge > 0) {
 		scsi_dma_unmap(cmnd);
+		if (req->dcopy && (host_status == DID_OK))
+			host_status = csio_scsi_copy_to_sgl(hw, req);
+	}
 
 	cmnd->result = (((host_status) << 16) | scsi_status);
 	cmnd->scsi_done(cmnd);
-- 
GitLab


From bef42cb2029c0ec8cd8c9e0545589d81913dec4e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Sun, 7 Apr 2019 15:46:55 +0300
Subject: [PATCH 0818/1861] drm/i915: Get power refs in
 encoder->get_power_domains()

Push getting the reference for the encoders' power domains into the
encoder get_power_domains() hook instead of doing this from the caller.
This way the encoder can store away the corresponding wakerefs.

This fixes the DSI encoder disabling, which didn't release these
power references it acquired during HW state readout.

Note that longtime ownership for the corresponding wakerefs can be thus
acquired / released in two ways. Nevertheless there is always only one
owner for them:

After HW readout (booting/system resume):
- encoder->get_power_domains() acquires
- encoder->disable*() releases

After a modeset (calling intel_atomic_commit()):
- encoder->enable*() acquires
- encoder->disable*() releases

* can be any of the encoder enable/disable hooks.

v2:
- Check that the DSI io_wakerefs are unset both during encoder HW
  readout and enabling. (Chris)

Fixes: 0e6e0be4c9523 ("drm/i915: Markup paired operations on display power domains")
Cc: Vandita Kulkarni <vandita.kulkarni@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190407124655.31536-1-imre.deak@intel.com
(cherry picked from commit 3a52fb7e7953f0b13df8c05d0d74b56a66888f30)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/icl_dsi.c       | 40 ++++++++++++++--------------
 drivers/gpu/drm/i915/intel_ddi.c     | 17 ++++++------
 drivers/gpu/drm/i915/intel_display.c |  6 +----
 drivers/gpu/drm/i915/intel_drv.h     | 10 ++++---
 4 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c
index 73a7bee24a663..83cd8284e8077 100644
--- a/drivers/gpu/drm/i915/icl_dsi.c
+++ b/drivers/gpu/drm/i915/icl_dsi.c
@@ -323,6 +323,21 @@ static void gen11_dsi_program_esc_clk_div(struct intel_encoder *encoder)
 	}
 }
 
+static void get_dsi_io_power_domains(struct drm_i915_private *dev_priv,
+				     struct intel_dsi *intel_dsi)
+{
+	enum port port;
+
+	for_each_dsi_port(port, intel_dsi->ports) {
+		WARN_ON(intel_dsi->io_wakeref[port]);
+		intel_dsi->io_wakeref[port] =
+			intel_display_power_get(dev_priv,
+						port == PORT_A ?
+						POWER_DOMAIN_PORT_DDI_A_IO :
+						POWER_DOMAIN_PORT_DDI_B_IO);
+	}
+}
+
 static void gen11_dsi_enable_io_power(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
@@ -336,13 +351,7 @@ static void gen11_dsi_enable_io_power(struct intel_encoder *encoder)
 		I915_WRITE(ICL_DSI_IO_MODECTL(port), tmp);
 	}
 
-	for_each_dsi_port(port, intel_dsi->ports) {
-		intel_dsi->io_wakeref[port] =
-			intel_display_power_get(dev_priv,
-						port == PORT_A ?
-						POWER_DOMAIN_PORT_DDI_A_IO :
-						POWER_DOMAIN_PORT_DDI_B_IO);
-	}
+	get_dsi_io_power_domains(dev_priv, intel_dsi);
 }
 
 static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder)
@@ -1218,20 +1227,11 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder,
 	return 0;
 }
 
-static u64 gen11_dsi_get_power_domains(struct intel_encoder *encoder,
-				       struct intel_crtc_state *crtc_state)
+static void gen11_dsi_get_power_domains(struct intel_encoder *encoder,
+					struct intel_crtc_state *crtc_state)
 {
-	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
-	u64 domains = 0;
-	enum port port;
-
-	for_each_dsi_port(port, intel_dsi->ports)
-		if (port == PORT_A)
-			domains |= BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO);
-		else
-			domains |= BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO);
-
-	return domains;
+	get_dsi_io_power_domains(to_i915(encoder->base.dev),
+				 enc_to_intel_dsi(&encoder->base));
 }
 
 static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 14d580cdefd3e..deba8e90360a6 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2075,12 +2075,11 @@ intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port)
 					      intel_aux_power_domain(dig_port);
 }
 
-static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder,
-				       struct intel_crtc_state *crtc_state)
+static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
+					struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_digital_port *dig_port;
-	u64 domains;
 
 	/*
 	 * TODO: Add support for MST encoders. Atm, the following should never
@@ -2088,10 +2087,10 @@ static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder,
 	 * hook.
 	 */
 	if (WARN_ON(intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)))
-		return 0;
+		return;
 
 	dig_port = enc_to_dig_port(&encoder->base);
-	domains = BIT_ULL(dig_port->ddi_io_power_domain);
+	intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
 
 	/*
 	 * AUX power is only needed for (e)DP mode, and for HDMI mode on TC
@@ -2099,15 +2098,15 @@ static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder,
 	 */
 	if (intel_crtc_has_dp_encoder(crtc_state) ||
 	    intel_port_is_tc(dev_priv, encoder->port))
-		domains |= BIT_ULL(intel_ddi_main_link_aux_domain(dig_port));
+		intel_display_power_get(dev_priv,
+					intel_ddi_main_link_aux_domain(dig_port));
 
 	/*
 	 * VDSC power is needed when DSC is enabled
 	 */
 	if (crtc_state->dsc_params.compression_enable)
-		domains |= BIT_ULL(intel_dsc_power_domain(crtc_state));
-
-	return domains;
+		intel_display_power_get(dev_priv,
+					intel_dsc_power_domain(crtc_state));
 }
 
 void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ccb616351bba7..421aac80a8381 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15986,8 +15986,6 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
 	struct intel_encoder *encoder;
 
 	for_each_intel_encoder(&dev_priv->drm, encoder) {
-		u64 get_domains;
-		enum intel_display_power_domain domain;
 		struct intel_crtc_state *crtc_state;
 
 		if (!encoder->get_power_domains)
@@ -16001,9 +15999,7 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
 			continue;
 
 		crtc_state = to_intel_crtc_state(encoder->base.crtc->state);
-		get_domains = encoder->get_power_domains(encoder, crtc_state);
-		for_each_power_domain(domain, get_domains)
-			intel_display_power_get(dev_priv, domain);
+		encoder->get_power_domains(encoder, crtc_state);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 15db41394b9ed..d5660ac1b0d60 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -270,10 +270,12 @@ struct intel_encoder {
 	 * be set correctly before calling this function. */
 	void (*get_config)(struct intel_encoder *,
 			   struct intel_crtc_state *pipe_config);
-	/* Returns a mask of power domains that need to be referenced as part
-	 * of the hardware state readout code. */
-	u64 (*get_power_domains)(struct intel_encoder *encoder,
-				 struct intel_crtc_state *crtc_state);
+	/*
+	 * Acquires the power domains needed for an active encoder during
+	 * hardware state readout.
+	 */
+	void (*get_power_domains)(struct intel_encoder *encoder,
+				  struct intel_crtc_state *crtc_state);
 	/*
 	 * Called during system suspend after all pending requests for the
 	 * encoder are flushed (for example for DP AUX transactions) and
-- 
GitLab


From 2ae2c3316fb77dcf64275d011596b60104c45426 Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Thu, 4 Apr 2019 00:04:09 +0800
Subject: [PATCH 0819/1861] drm/mediatek: fix possible object reference leak

The call to of_parse_phandle returns a node pointer with refcount
incremented thus it must be explicitly decremented after the last
usage.

Detected by coccinelle with the following warnings:
drivers/gpu/drm/mediatek/mtk_hdmi.c:1521:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1509, but without a corresponding object release within this function.
drivers/gpu/drm/mediatek/mtk_hdmi.c:1524:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1509, but without a corresponding object release within this function.

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: CK Hu <ck.hu@mediatek.com>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mediatek@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_hdmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 543a25e5765e4..e04e6c293d39d 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1515,6 +1515,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
 	of_node_put(remote);
 
 	hdmi->ddc_adpt = of_find_i2c_adapter_by_node(i2c_np);
+	of_node_put(i2c_np);
 	if (!hdmi->ddc_adpt) {
 		dev_err(dev, "Failed to get ddc i2c adapter by node\n");
 		return -EINVAL;
-- 
GitLab


From 89f98d7e5fa8f7e1ba627537dc8a3cef5add7b04 Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Tue, 9 Apr 2019 10:25:36 +0800
Subject: [PATCH 0820/1861] cpufreq: Remove cpufreq_driver check in
 cpufreq_boost_supported()

Currently there are three calling paths for cpufreq_boost_supported() in
all as below, we can see the cpufreq_driver null check is needless since
it is already checked before.

<path1>
  cpufreq_enable_boost_support()
    |-> if (!cpufreq_driver)
    |-> cpufreq_boost_supported()

<path2>
  cpufreq_register_driver()
    |-> if (!driver_data ...
    |-> cpufreq_driver = driver_data
    |-> cpufreq_boost_supported()
    |-> remove_boost_sysfs_file()
          |-> cpufreq_boost_supported()

<path3>
  cpufreq_unregister_driver()
    |-> if (!cpufreq_driver ...
    |-> remove_boost_sysfs_file()
          |-> cpufreq_boost_supported()

Signed-off-by: Yue Hu <huyue2@yulong.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index f3f79266ab48c..3f235d5f67ee8 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2472,7 +2472,7 @@ int cpufreq_boost_trigger_state(int state)
 
 static bool cpufreq_boost_supported(void)
 {
-	return likely(cpufreq_driver) && cpufreq_driver->set_boost;
+	return cpufreq_driver->set_boost;
 }
 
 static int create_boost_sysfs_file(void)
-- 
GitLab


From c5781ffbbd4f742a58263458145fe7f0ac01d9e0 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Mon, 8 Apr 2019 13:42:26 -0700
Subject: [PATCH 0821/1861] ACPICA: Namespace: remove address node from global
 list after method termination

ACPICA commit b233720031a480abd438f2e9c643080929d144c3

ASL operation_regions declare a range of addresses that it uses. In a
perfect world, the range of addresses should be used exclusively by
the AML interpreter. The OS can use this information to decide which
drivers to load so that the AML interpreter and device drivers use
different regions of memory.

During table load, the address information is added to a global
address range list. Each node in this list contains an address range
as well as a namespace node of the operation_region. This list is
deleted at ACPI shutdown.

Unfortunately, ASL operation_regions can be declared inside of control
methods. Although this is not recommended, modern firmware contains
such code. New module level code changes unintentionally removed the
functionality of adding and removing nodes to the global address
range list.

A few months ago, support for adding addresses has been re-
implemented. However, the removal of the address range list was
missed and resulted in some systems to crash due to the address list
containing bogus namespace nodes from operation_regions declared in
control methods. In order to fix the crash, this change removes
dynamic operation_regions after control method termination.

Link: https://github.com/acpica/acpica/commit/b2337200
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202475
Fixes: 4abb951b73ff ("ACPICA: AML interpreter: add region addresses in global list during initialization")
Reported-by: Michael J Gruber <mjg@fedoraproject.org>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Cc: 4.20+ <stable@vger.kernel.org> # 4.20+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/nsobject.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/acpi/acpica/nsobject.c b/drivers/acpi/acpica/nsobject.c
index 8638f43cfc3d8..79d86da1c8924 100644
--- a/drivers/acpi/acpica/nsobject.c
+++ b/drivers/acpi/acpica/nsobject.c
@@ -186,6 +186,10 @@ void acpi_ns_detach_object(struct acpi_namespace_node *node)
 		}
 	}
 
+	if (obj_desc->common.type == ACPI_TYPE_REGION) {
+		acpi_ut_remove_address_range(obj_desc->region.space_id, node);
+	}
+
 	/* Clear the Node entry in all cases */
 
 	node->object = NULL;
-- 
GitLab


From a3ce7a8e0dd9baa5932c480b789ab54afa3ab116 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 8 Apr 2019 13:42:23 -0700
Subject: [PATCH 0822/1861] ACPICA: Rename nameseg copy macro for clarity

ACPICA commit 19c18d3157945d1b8b64a826f0a8e848b7dbb127

ACPI_MOVE_NAME changed to ACPI_COPY_NAMESEG
This clarifies (1) this is a copy operation, and
(2) it operates on ACPI name_segs.
Improves understanding of the code.

Link: https://github.com/acpica/acpica/commit/19c18d31
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/nsnames.c                            | 2 +-
 drivers/acpi/acpica/nsutils.c                            | 4 ++--
 drivers/acpi/acpica/tbfind.c                             | 2 +-
 drivers/acpi/acpica/utstring.c                           | 2 +-
 include/acpi/actypes.h                                   | 4 ++--
 tools/power/acpi/os_specific/service_layers/oslinuxtbl.c | 4 ++--
 tools/power/acpi/tools/acpidump/apfiles.c                | 4 ++--
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c
index 289c15bb8c6a9..c384320a0f269 100644
--- a/drivers/acpi/acpica/nsnames.c
+++ b/drivers/acpi/acpica/nsnames.c
@@ -108,7 +108,7 @@ acpi_ns_handle_to_name(acpi_handle target_handle, struct acpi_buffer *buffer)
 	/* Just copy the ACPI name from the Node and zero terminate it */
 
 	node_name = acpi_ut_get_node_name(node);
-	ACPI_MOVE_NAME(buffer->pointer, node_name);
+	ACPI_COPY_NAMESEG(buffer->pointer, node_name);
 	((char *)buffer->pointer)[ACPI_NAME_SIZE] = 0;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "%4.4s\n", (char *)buffer->pointer));
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index e5cef1edf49fa..6f5940b02a4f0 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -489,8 +489,8 @@ acpi_ns_externalize_name(u32 internal_name_length,
 
 			/* Copy and validate the 4-char name segment */
 
-			ACPI_MOVE_NAME(&(*converted_name)[j],
-				       &internal_name[names_index]);
+			ACPI_COPY_NAMESEG(&(*converted_name)[j],
+					  &internal_name[names_index]);
 			acpi_ut_repair_name(&(*converted_name)[j]);
 
 			j += ACPI_NAME_SIZE;
diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index 951bd8e1c50a6..ddc88aaca3764 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -56,7 +56,7 @@ acpi_tb_find_table(char *signature,
 	/* Normalize the input strings */
 
 	memset(&header, 0, sizeof(struct acpi_table_header));
-	ACPI_MOVE_NAME(header.signature, signature);
+	ACPI_COPY_NAMESEG(header.signature, signature);
 	strncpy(header.oem_id, oem_id, ACPI_OEM_ID_SIZE);
 	strncpy(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
 
diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c
index 5bef0b0594062..89dc79798d385 100644
--- a/drivers/acpi/acpica/utstring.c
+++ b/drivers/acpi/acpica/utstring.c
@@ -145,7 +145,7 @@ void acpi_ut_repair_name(char *name)
 		return;
 	}
 
-	ACPI_MOVE_NAME(&original_name, name);
+	ACPI_COPY_NAMESEG(&original_name, name);
 
 	/* Check each character in the name */
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index f73382e82c26b..bfe54189f242e 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -516,10 +516,10 @@ typedef u64 acpi_integer;
 
 #ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
 #define ACPI_COMPARE_NAME(a,b)          (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b)))
-#define ACPI_MOVE_NAME(dest,src)        (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src)))
+#define ACPI_COPY_NAMESEG(dest,src)     (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src)))
 #else
 #define ACPI_COMPARE_NAME(a,b)          (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE))
-#define ACPI_MOVE_NAME(dest,src)        (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE))
+#define ACPI_COPY_NAMESEG(dest,src)     (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE))
 #endif
 
 /* Support for the special RSDP signature (8 characters) */
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 2a1fd9182f94d..587b701a7edb6 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -286,7 +286,7 @@ static acpi_status osl_add_table_to_list(char *signature, u32 instance)
 		return (AE_NO_MEMORY);
 	}
 
-	ACPI_MOVE_NAME(new_info->signature, signature);
+	ACPI_COPY_NAMESEG(new_info->signature, signature);
 
 	if (!gbl_table_list_head) {
 		gbl_table_list_head = new_info;
@@ -1174,7 +1174,7 @@ osl_table_name_from_file(char *filename, char *signature, u32 *instance)
 
 	/* Extract signature */
 
-	ACPI_MOVE_NAME(signature, filename);
+	ACPI_COPY_NAMESEG(signature, filename);
 	return (AE_OK);
 }
 
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 49972bc78bc5e..8abc8f998abd7 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -110,9 +110,9 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
 	/* Construct lower-case filename from the table local signature */
 
 	if (ACPI_VALIDATE_RSDP_SIG(table->signature)) {
-		ACPI_MOVE_NAME(filename, ACPI_RSDP_NAME);
+		ACPI_COPY_NAMESEG(filename, ACPI_RSDP_NAME);
 	} else {
-		ACPI_MOVE_NAME(filename, table->signature);
+		ACPI_COPY_NAMESEG(filename, table->signature);
 	}
 
 	filename[0] = (char)tolower((int)filename[0]);
-- 
GitLab


From 5599fb69355d7a558f32206dac7539e945a1f604 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 8 Apr 2019 13:42:24 -0700
Subject: [PATCH 0823/1861] ACPICA: Rename nameseg compare macro for clarity

ACPICA commit 92ec0935f27e217dff0b176fca02c2ec3d782bb5

ACPI_COMPARE_NAME changed to ACPI_COMPARE_NAMESEG
This clarifies (1) this is a compare on 4-byte namesegs, not
a generic compare. Improves understanding of the code.

Link: https://github.com/acpica/acpica/commit/92ec0935
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/boot/compressed/acpi.c               |  2 +-
 drivers/acpi/acpica/dbexec.c                  |  2 +-
 drivers/acpi/acpica/dsinit.c                  |  2 +-
 drivers/acpi/acpica/nsinit.c                  |  4 +--
 drivers/acpi/acpica/nsparse.c                 |  2 +-
 drivers/acpi/acpica/nsrepair.c                |  2 +-
 drivers/acpi/acpica/nsrepair2.c               |  2 +-
 drivers/acpi/acpica/nsxfname.c                |  4 +--
 drivers/acpi/acpica/rsxface.c                 |  8 ++---
 drivers/acpi/acpica/tbdata.c                  |  3 +-
 drivers/acpi/acpica/tbinstal.c                |  2 +-
 drivers/acpi/acpica/tbprint.c                 |  6 ++--
 drivers/acpi/acpica/tbutils.c                 |  6 ++--
 drivers/acpi/acpica/tbxface.c                 |  4 +--
 drivers/acpi/acpica/tbxfload.c                | 15 +++++-----
 drivers/acpi/acpica/utmisc.c                  |  8 ++---
 drivers/acpi/acpica/utpredef.c                |  4 +--
 drivers/acpi/acpica/utstring.c                |  2 +-
 drivers/acpi/scan.c                           |  2 +-
 drivers/acpi/sysfs.c                          |  8 ++---
 drivers/acpi/tables.c                         |  4 +--
 include/acpi/actypes.h                        |  4 +--
 .../os_specific/service_layers/oslinuxtbl.c   | 30 +++++++++----------
 tools/power/acpi/tools/acpidump/apdump.c      |  4 +--
 24 files changed, 66 insertions(+), 64 deletions(-)

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 0ef4ad55b29b2..ad84239e595ea 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -276,7 +276,7 @@ static unsigned long get_acpi_srat_table(void)
 		if (acpi_table) {
 			header = (struct acpi_table_header *)acpi_table;
 
-			if (ACPI_COMPARE_NAME(header->signature, ACPI_SIG_SRAT))
+			if (ACPI_COMPARE_NAMESEG(header->signature, ACPI_SIG_SRAT))
 				return acpi_table;
 		}
 		entry += size;
diff --git a/drivers/acpi/acpica/dbexec.c b/drivers/acpi/acpica/dbexec.c
index bb43305cb215e..4027eaab18a4d 100644
--- a/drivers/acpi/acpica/dbexec.c
+++ b/drivers/acpi/acpica/dbexec.c
@@ -453,7 +453,7 @@ acpi_db_execute(char *name, char **args, acpi_object_type *types, u32 flags)
 
 			/* Dump a _PLD buffer if present */
 
-			if (ACPI_COMPARE_NAME
+			if (ACPI_COMPARE_NAMESEG
 			    ((ACPI_CAST_PTR
 			      (struct acpi_namespace_node,
 			       acpi_gbl_db_method_info.method)->name.ascii),
diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c
index a4a24ffe5fae5..4ebd23700bbcb 100644
--- a/drivers/acpi/acpica/dsinit.c
+++ b/drivers/acpi/acpica/dsinit.c
@@ -200,7 +200,7 @@ acpi_ds_initialize_objects(u32 table_index,
 
 	/* DSDT is always the first AML table */
 
-	if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT)) {
+	if (ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_DSDT)) {
 		ACPI_DEBUG_PRINT_RAW((ACPI_DB_INIT,
 				      "\nInitializing Namespace objects:\n"));
 	}
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 19fb8dda870f7..53e5d00d3a5ed 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -478,7 +478,7 @@ acpi_ns_find_ini_methods(acpi_handle obj_handle,
 
 	/* We are only looking for methods named _INI */
 
-	if (!ACPI_COMPARE_NAME(node->name.ascii, METHOD_NAME__INI)) {
+	if (!ACPI_COMPARE_NAMESEG(node->name.ascii, METHOD_NAME__INI)) {
 		return (AE_OK);
 	}
 
@@ -641,7 +641,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
 	 * Note: We know there is an _INI within this subtree, but it may not be
 	 * under this particular device, it may be lower in the branch.
 	 */
-	if (!ACPI_COMPARE_NAME(device_node->name.ascii, "_SB_") ||
+	if (!ACPI_COMPARE_NAMESEG(device_node->name.ascii, "_SB_") ||
 	    device_node->parent != acpi_gbl_root_node) {
 		ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname
 				(ACPI_TYPE_METHOD, device_node,
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index c0b4f7bedfaba..f16cf5e4742cc 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -203,7 +203,7 @@ acpi_ns_one_complete_parse(u32 pass_number,
 
 	/* Found OSDT table, enable the namespace override feature */
 
-	if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT) &&
+	if (ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_OSDT) &&
 	    pass_number == ACPI_IMODE_LOAD_PASS1) {
 		walk_state->namespace_override = TRUE;
 	}
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index 0aacfa48e20d1..be86fea8e4d48 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -316,7 +316,7 @@ static const struct acpi_simple_repair_info *acpi_ns_match_simple_repair(struct
 
 	this_name = acpi_object_repair_info;
 	while (this_name->object_converter) {
-		if (ACPI_COMPARE_NAME(node->name.ascii, this_name->name)) {
+		if (ACPI_COMPARE_NAMESEG(node->name.ascii, this_name->name)) {
 
 			/* Check if we can actually repair this name/type combination */
 
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index d5804a6d1d657..4c285e9184654 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -188,7 +188,7 @@ static const struct acpi_repair_info *acpi_ns_match_complex_repair(struct
 
 	this_name = acpi_ns_repairable_names;
 	while (this_name->repair_function) {
-		if (ACPI_COMPARE_NAME(node->name.ascii, this_name->name)) {
+		if (ACPI_COMPARE_NAMESEG(node->name.ascii, this_name->name)) {
 			return (this_name);
 		}
 
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index de2d3135d6a93..55b4a5b3331f2 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -495,8 +495,8 @@ acpi_status acpi_install_method(u8 *buffer)
 
 	/* Table must be a DSDT or SSDT */
 
-	if (!ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT) &&
-	    !ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT)) {
+	if (!ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_DSDT) &&
+	    !ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_SSDT)) {
 		return (AE_BAD_HEADER);
 	}
 
diff --git a/drivers/acpi/acpica/rsxface.c b/drivers/acpi/acpica/rsxface.c
index 1d6f136e40683..c62be3d917129 100644
--- a/drivers/acpi/acpica/rsxface.c
+++ b/drivers/acpi/acpica/rsxface.c
@@ -603,10 +603,10 @@ acpi_walk_resources(acpi_handle device_handle,
 	/* Parameter validation */
 
 	if (!device_handle || !user_function || !name ||
-	    (!ACPI_COMPARE_NAME(name, METHOD_NAME__CRS) &&
-	     !ACPI_COMPARE_NAME(name, METHOD_NAME__PRS) &&
-	     !ACPI_COMPARE_NAME(name, METHOD_NAME__AEI) &&
-	     !ACPI_COMPARE_NAME(name, METHOD_NAME__DMA))) {
+	    (!ACPI_COMPARE_NAMESEG(name, METHOD_NAME__CRS) &&
+	     !ACPI_COMPARE_NAMESEG(name, METHOD_NAME__PRS) &&
+	     !ACPI_COMPARE_NAMESEG(name, METHOD_NAME__AEI) &&
+	     !ACPI_COMPARE_NAMESEG(name, METHOD_NAME__DMA))) {
 		return_ACPI_STATUS(AE_BAD_PARAMETER);
 	}
 
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 0cecd0039acff..933f81316ad2c 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -480,7 +480,8 @@ acpi_tb_verify_temp_table(struct acpi_table_desc *table_desc,
 
 	/* If a particular signature is expected (DSDT/FACS), it must match */
 
-	if (signature && !ACPI_COMPARE_NAME(&table_desc->signature, signature)) {
+	if (signature &&
+	    !ACPI_COMPARE_NAMESEG(&table_desc->signature, signature)) {
 		ACPI_BIOS_ERROR((AE_INFO,
 				 "Invalid signature 0x%X for ACPI table, expected [%s]",
 				 table_desc->signature.integer, signature));
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index be6642bf6366e..ef1ffd36ab3ff 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -120,7 +120,7 @@ acpi_tb_install_standard_table(acpi_physical_address address,
 	 */
 	if (!reload &&
 	    acpi_gbl_disable_ssdt_table_install &&
-	    ACPI_COMPARE_NAME(&new_table_desc.signature, ACPI_SIG_SSDT)) {
+	    ACPI_COMPARE_NAMESEG(&new_table_desc.signature, ACPI_SIG_SSDT)) {
 		ACPI_INFO(("Ignoring installation of %4.4s at %8.8X%8.8X",
 			   new_table_desc.signature.ascii,
 			   ACPI_FORMAT_UINT64(address)));
diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c
index 9b5df95d881b2..0bc18fd5cd717 100644
--- a/drivers/acpi/acpica/tbprint.c
+++ b/drivers/acpi/acpica/tbprint.c
@@ -94,7 +94,7 @@ acpi_tb_print_table_header(acpi_physical_address address,
 {
 	struct acpi_table_header local_header;
 
-	if (ACPI_COMPARE_NAME(header->signature, ACPI_SIG_FACS)) {
+	if (ACPI_COMPARE_NAMESEG(header->signature, ACPI_SIG_FACS)) {
 
 		/* FACS only has signature and length fields */
 
@@ -158,8 +158,8 @@ acpi_status acpi_tb_verify_checksum(struct acpi_table_header *table, u32 length)
 	 * They are the odd tables, have no standard ACPI header and no checksum
 	 */
 
-	if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_S3PT) ||
-	    ACPI_COMPARE_NAME(table->signature, ACPI_SIG_FACS)) {
+	if (ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_S3PT) ||
+	    ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_FACS)) {
 		return (AE_OK);
 	}
 
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 2469e01310e25..c5f0b8ec70cc4 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -332,9 +332,9 @@ acpi_tb_parse_root_table(acpi_physical_address rsdp_address)
 							&table_index);
 
 		if (ACPI_SUCCESS(status) &&
-		    ACPI_COMPARE_NAME(&acpi_gbl_root_table_list.
-				      tables[table_index].signature,
-				      ACPI_SIG_FADT)) {
+		    ACPI_COMPARE_NAMESEG(&acpi_gbl_root_table_list.
+					 tables[table_index].signature,
+					 ACPI_SIG_FADT)) {
 			acpi_gbl_fadt_index = table_index;
 			acpi_tb_parse_fadt();
 		}
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index 36592888f0e74..1640685bf4ae8 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -230,7 +230,7 @@ acpi_get_table_header(char *signature,
 
 	for (i = 0, j = 0; i < acpi_gbl_root_table_list.current_table_count;
 	     i++) {
-		if (!ACPI_COMPARE_NAME
+		if (!ACPI_COMPARE_NAMESEG
 		    (&(acpi_gbl_root_table_list.tables[i].signature),
 		     signature)) {
 			continue;
@@ -323,7 +323,7 @@ acpi_get_table(char *signature,
 	     i++) {
 		table_desc = &acpi_gbl_root_table_list.tables[i];
 
-		if (!ACPI_COMPARE_NAME(&table_desc->signature, signature)) {
+		if (!ACPI_COMPARE_NAMESEG(&table_desc->signature, signature)) {
 			continue;
 		}
 
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index 1a2592cc3245d..4f30f06a6f787 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -118,7 +118,7 @@ acpi_status acpi_tb_load_namespace(void)
 	table = &acpi_gbl_root_table_list.tables[acpi_gbl_dsdt_index];
 
 	if (!acpi_gbl_root_table_list.current_table_count ||
-	    !ACPI_COMPARE_NAME(table->signature.ascii, ACPI_SIG_DSDT) ||
+	    !ACPI_COMPARE_NAMESEG(table->signature.ascii, ACPI_SIG_DSDT) ||
 	    ACPI_FAILURE(acpi_tb_validate_table(table))) {
 		status = AE_NO_ACPI_TABLES;
 		goto unlock_and_exit;
@@ -170,11 +170,12 @@ acpi_status acpi_tb_load_namespace(void)
 		table = &acpi_gbl_root_table_list.tables[i];
 
 		if (!table->address ||
-		    (!ACPI_COMPARE_NAME(table->signature.ascii, ACPI_SIG_SSDT)
-		     && !ACPI_COMPARE_NAME(table->signature.ascii,
-					   ACPI_SIG_PSDT)
-		     && !ACPI_COMPARE_NAME(table->signature.ascii,
-					   ACPI_SIG_OSDT))
+		    (!ACPI_COMPARE_NAMESEG
+		     (table->signature.ascii, ACPI_SIG_SSDT)
+		     && !ACPI_COMPARE_NAMESEG(table->signature.ascii,
+					      ACPI_SIG_PSDT)
+		     && !ACPI_COMPARE_NAMESEG(table->signature.ascii,
+					      ACPI_SIG_OSDT))
 		    || ACPI_FAILURE(acpi_tb_validate_table(table))) {
 			continue;
 		}
@@ -364,7 +365,7 @@ acpi_status acpi_unload_parent_table(acpi_handle object)
 		 * only these types can contain AML and thus are the only types
 		 * that can create namespace objects.
 		 */
-		if (ACPI_COMPARE_NAME
+		if (ACPI_COMPARE_NAMESEG
 		    (acpi_gbl_root_table_list.tables[i].signature.ascii,
 		     ACPI_SIG_DSDT)) {
 			status = AE_TYPE;
diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index afaadc73196b9..8638efacdbf44 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -59,10 +59,10 @@ u8 acpi_ut_is_aml_table(struct acpi_table_header *table)
 
 	/* These are the only tables that contain executable AML */
 
-	if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT) ||
-	    ACPI_COMPARE_NAME(table->signature, ACPI_SIG_PSDT) ||
-	    ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT) ||
-	    ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT) ||
+	if (ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_DSDT) ||
+	    ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_PSDT) ||
+	    ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_SSDT) ||
+	    ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_OSDT) ||
 	    ACPI_IS_OEM_SIG(table->signature)) {
 		return (TRUE);
 	}
diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c
index a9f08f43c6851..1b0f68f5ed8c5 100644
--- a/drivers/acpi/acpica/utpredef.c
+++ b/drivers/acpi/acpica/utpredef.c
@@ -84,7 +84,7 @@ const union acpi_predefined_info *acpi_ut_match_predefined_method(char *name)
 
 	this_name = acpi_gbl_predefined_methods;
 	while (this_name->info.name[0]) {
-		if (ACPI_COMPARE_NAME(name, this_name->info.name)) {
+		if (ACPI_COMPARE_NAMESEG(name, this_name->info.name)) {
 			return (this_name);
 		}
 
@@ -201,7 +201,7 @@ const union acpi_predefined_info *acpi_ut_match_resource_name(char *name)
 
 	this_name = acpi_gbl_resource_names;
 	while (this_name->info.name[0]) {
-		if (ACPI_COMPARE_NAME(name, this_name->info.name)) {
+		if (ACPI_COMPARE_NAMESEG(name, this_name->info.name)) {
 			return (this_name);
 		}
 
diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c
index 89dc79798d385..927797355da9d 100644
--- a/drivers/acpi/acpica/utstring.c
+++ b/drivers/acpi/acpica/utstring.c
@@ -141,7 +141,7 @@ void acpi_ut_repair_name(char *name)
 	 * Special case for the root node. This can happen if we get an
 	 * error during the execution of module-level code.
 	 */
-	if (ACPI_COMPARE_NAME(name, ACPI_ROOT_PATHNAME)) {
+	if (ACPI_COMPARE_NAMESEG(name, ACPI_ROOT_PATHNAME)) {
 		return;
 	}
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 446c959a8f082..3fb331fb6e821 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2260,7 +2260,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr)
 
 	mutex_lock(&acpi_probe_mutex);
 	for (ape = ap_head; nr; ape++, nr--) {
-		if (ACPI_COMPARE_NAME(ACPI_SIG_MADT, ape->id)) {
+		if (ACPI_COMPARE_NAMESEG(ACPI_SIG_MADT, ape->id)) {
 			acpi_probe_count = 0;
 			acpi_table_parse_madt(ape->type, acpi_match_madt, 0);
 			count += acpi_probe_count;
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index fa76f5e41b5ca..262d6ee4735dc 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -368,10 +368,10 @@ static int acpi_table_attr_init(struct kobject *tables_obj,
 	char instance_str[ACPI_INST_SIZE];
 
 	sysfs_attr_init(&table_attr->attr.attr);
-	ACPI_MOVE_NAME(table_attr->name, table_header->signature);
+	ACPI_COPY_NAMESEG(table_attr->name, table_header->signature);
 
 	list_for_each_entry(attr, &acpi_table_attr_list, node) {
-		if (ACPI_COMPARE_NAME(table_attr->name, attr->name))
+		if (ACPI_COMPARE_NAMESEG(table_attr->name, attr->name))
 			if (table_attr->instance < attr->instance)
 				table_attr->instance = attr->instance;
 	}
@@ -382,7 +382,7 @@ static int acpi_table_attr_init(struct kobject *tables_obj,
 		return -ERANGE;
 	}
 
-	ACPI_MOVE_NAME(table_attr->filename, table_header->signature);
+	ACPI_COPY_NAMESEG(table_attr->filename, table_header->signature);
 	table_attr->filename[ACPI_NAME_SIZE] = '\0';
 	if (table_attr->instance > 1 || (table_attr->instance == 1 &&
 					 !acpi_get_table
@@ -484,7 +484,7 @@ static int acpi_table_data_init(struct acpi_table_header *th)
 	int i;
 
 	for (i = 0; i < NUM_ACPI_DATA_OBJS; i++) {
-		if (ACPI_COMPARE_NAME(th->signature, acpi_data_objs[i].name)) {
+		if (ACPI_COMPARE_NAMESEG(th->signature, acpi_data_objs[i].name)) {
 			data_attr = kzalloc(sizeof(*data_attr), GFP_KERNEL);
 			if (!data_attr)
 				return -ENOMEM;
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 8fccbe49612a0..1ffd7b9eefc5a 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -670,8 +670,8 @@ static void __init acpi_table_initrd_scan(void)
 		table_length = table->length;
 
 		/* Skip RSDT/XSDT which should only be used for override */
-		if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) ||
-		    ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) {
+		if (ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_RSDT) ||
+		    ACPI_COMPARE_NAMESEG(table->signature, ACPI_SIG_XSDT)) {
 			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
 			goto next_table;
 		}
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index bfe54189f242e..cb51172a47a36 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -515,10 +515,10 @@ typedef u64 acpi_integer;
 /* Optimizations for 4-character (32-bit) acpi_name manipulation */
 
 #ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
-#define ACPI_COMPARE_NAME(a,b)          (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b)))
+#define ACPI_COMPARE_NAMESEG(a,b)       (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b)))
 #define ACPI_COPY_NAMESEG(dest,src)     (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src)))
 #else
-#define ACPI_COMPARE_NAME(a,b)          (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE))
+#define ACPI_COMPARE_NAMESEG(a,b)       (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE))
 #define ACPI_COPY_NAMESEG(dest,src)     (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE))
 #endif
 
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 587b701a7edb6..2686d858225ad 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -293,7 +293,7 @@ static acpi_status osl_add_table_to_list(char *signature, u32 instance)
 	} else {
 		next = gbl_table_list_head;
 		while (1) {
-			if (ACPI_COMPARE_NAME(next->signature, signature)) {
+			if (ACPI_COMPARE_NAMESEG(next->signature, signature)) {
 				if (next->instance == instance) {
 					found = TRUE;
 				}
@@ -782,11 +782,11 @@ osl_get_bios_table(char *signature,
 
 	/* Handle special tables whose addresses are not in RSDT/XSDT */
 
-	if (ACPI_COMPARE_NAME(signature, ACPI_RSDP_NAME) ||
-	    ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT) ||
-	    ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT) ||
-	    ACPI_COMPARE_NAME(signature, ACPI_SIG_DSDT) ||
-	    ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) {
+	if (ACPI_COMPARE_NAMESEG(signature, ACPI_RSDP_NAME) ||
+	    ACPI_COMPARE_NAMESEG(signature, ACPI_SIG_RSDT) ||
+	    ACPI_COMPARE_NAMESEG(signature, ACPI_SIG_XSDT) ||
+	    ACPI_COMPARE_NAMESEG(signature, ACPI_SIG_DSDT) ||
+	    ACPI_COMPARE_NAMESEG(signature, ACPI_SIG_FACS)) {
 
 find_next_instance:
 
@@ -797,7 +797,7 @@ find_next_instance:
 		 * careful about the FADT length and validate table addresses.
 		 * Note: The 64-bit addresses have priority.
 		 */
-		if (ACPI_COMPARE_NAME(signature, ACPI_SIG_DSDT)) {
+		if (ACPI_COMPARE_NAMESEG(signature, ACPI_SIG_DSDT)) {
 			if (current_instance < 2) {
 				if ((gbl_fadt->header.length >=
 				     MIN_FADT_FOR_XDSDT) && gbl_fadt->Xdsdt
@@ -815,7 +815,7 @@ find_next_instance:
 					    dsdt;
 				}
 			}
-		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) {
+		} else if (ACPI_COMPARE_NAMESEG(signature, ACPI_SIG_FACS)) {
 			if (current_instance < 2) {
 				if ((gbl_fadt->header.length >=
 				     MIN_FADT_FOR_XFACS) && gbl_fadt->Xfacs
@@ -833,7 +833,7 @@ find_next_instance:
 					    facs;
 				}
 			}
-		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT)) {
+		} else if (ACPI_COMPARE_NAMESEG(signature, ACPI_SIG_XSDT)) {
 			if (!gbl_revision) {
 				return (AE_BAD_SIGNATURE);
 			}
@@ -842,7 +842,7 @@ find_next_instance:
 				    (acpi_physical_address)gbl_rsdp.
 				    xsdt_physical_address;
 			}
-		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT)) {
+		} else if (ACPI_COMPARE_NAMESEG(signature, ACPI_SIG_RSDT)) {
 			if (current_instance == 0) {
 				table_address =
 				    (acpi_physical_address)gbl_rsdp.
@@ -931,7 +931,7 @@ find_next_instance:
 
 			/* Does this table match the requested signature? */
 
-			if (!ACPI_COMPARE_NAME
+			if (!ACPI_COMPARE_NAMESEG
 			    (mapped_table->signature, signature)) {
 				osl_unmap_table(mapped_table);
 				mapped_table = NULL;
@@ -1086,8 +1086,8 @@ osl_map_table(acpi_size address,
 				return (AE_BAD_SIGNATURE);
 			}
 		} else
-		    if (!ACPI_COMPARE_NAME(signature, mapped_table->signature))
-		{
+		    if (!ACPI_COMPARE_NAMESEG
+			(signature, mapped_table->signature)) {
 			acpi_os_unmap_memory(mapped_table,
 					     sizeof(struct acpi_table_header));
 			return (AE_BAD_SIGNATURE);
@@ -1236,7 +1236,7 @@ osl_read_table_from_file(char *filename,
 				status = AE_BAD_SIGNATURE;
 				goto exit;
 			}
-		} else if (!ACPI_COMPARE_NAME(signature, header.signature)) {
+		} else if (!ACPI_COMPARE_NAMESEG(signature, header.signature)) {
 			fprintf(stderr,
 				"Incorrect signature: Expecting %4.4s, found %4.4s\n",
 				signature, header.signature);
@@ -1329,7 +1329,7 @@ osl_get_customized_table(char *pathname,
 
 		/* Ignore meaningless files */
 
-		if (!ACPI_COMPARE_NAME(filename, signature)) {
+		if (!ACPI_COMPARE_NAMESEG(filename, signature)) {
 			continue;
 		}
 
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index e256c2ac5ddc8..9dfcc0329d34d 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -310,9 +310,9 @@ int ap_dump_table_by_name(char *signature)
 
 	/* To be friendly, handle tables whose signatures do not match the name */
 
-	if (ACPI_COMPARE_NAME(local_signature, "FADT")) {
+	if (ACPI_COMPARE_NAMESEG(local_signature, "FADT")) {
 		strcpy(local_signature, ACPI_SIG_FADT);
-	} else if (ACPI_COMPARE_NAME(local_signature, "MADT")) {
+	} else if (ACPI_COMPARE_NAMESEG(local_signature, "MADT")) {
 		strcpy(local_signature, ACPI_SIG_MADT);
 	}
 
-- 
GitLab


From 0c24613cda163dedfa229afc8eff6072e57fac8d Mon Sep 17 00:00:00 2001
From: Wangyan Wang <wangyan.wang@mediatek.com>
Date: Tue, 9 Apr 2019 14:53:04 +0800
Subject: [PATCH 0824/1861] drm/mediatek: fix the rate and divder of hdmi phy
 for MT2701

Due to a clerical error,there is one zero less for 12800000.
Fix it for 128000000
Fixes: 0fc721b2968e ("drm/mediatek: add hdmi driver for MT2701 and MT7623")

Signed-off-by: Wangyan Wang <wangyan.wang@mediatek.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
index fcc42dc6ea7fb..0746fc8877069 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
@@ -116,8 +116,8 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	if (rate <= 64000000)
 		pos_div = 3;
-	else if (rate <= 12800000)
-		pos_div = 1;
+	else if (rate <= 128000000)
+		pos_div = 2;
 	else
 		pos_div = 1;
 
-- 
GitLab


From 3278675567dfb901d831d46849c386a4f932905e Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 8 Apr 2019 13:42:25 -0700
Subject: [PATCH 0825/1861] ACPICA: Rename nameseg length macro/define for
 clarity

ACPICA commit 24870bd9e73d71e2a1ff0a1e94519f8f8409e57d

ACPI_NAME_SIZE changed to ACPI_NAMESEG_SIZE
This clarifies that this is the length of an individual
nameseg, not the length of a generic namestring/namepath.
Improves understanding of the code.

Link: https://github.com/acpica/acpica/commit/24870bd9
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_configfs.c                   |  4 ++--
 drivers/acpi/acpica/aclocal.h                  |  4 ++--
 drivers/acpi/acpica/dbnames.c                  |  2 +-
 drivers/acpi/acpica/evgpeinit.c                |  4 ++--
 drivers/acpi/acpica/exnames.c                  |  6 +++---
 drivers/acpi/acpica/nsaccess.c                 |  2 +-
 drivers/acpi/acpica/nsdump.c                   |  2 +-
 drivers/acpi/acpica/nsnames.c                  |  6 +++---
 drivers/acpi/acpica/nsrepair2.c                |  2 +-
 drivers/acpi/acpica/nsutils.c                  | 10 +++++-----
 drivers/acpi/acpica/psargs.c                   |  8 ++++----
 drivers/acpi/acpica/tbfind.c                   | 18 +++++++++---------
 drivers/acpi/acpica/tbprint.c                  |  4 ++--
 drivers/acpi/acpica/utascii.c                  |  2 +-
 drivers/acpi/acpica/utdecode.c                 |  2 +-
 drivers/acpi/acpica/utstring.c                 |  2 +-
 drivers/acpi/sysfs.c                           |  6 +++---
 drivers/firmware/iscsi_ibft.c                  |  2 +-
 .../intel/int340x_thermal/acpi_thermal_rel.c   |  2 +-
 include/acpi/actbl.h                           |  4 ++--
 include/acpi/actypes.h                         |  8 ++++----
 .../os_specific/service_layers/oslinuxtbl.c    | 14 +++++++-------
 tools/power/acpi/tools/acpidump/apdump.c       |  4 ++--
 tools/power/acpi/tools/acpidump/apfiles.c      |  4 ++--
 24 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c
index 81bfc61972931..f92033661239b 100644
--- a/drivers/acpi/acpi_configfs.c
+++ b/drivers/acpi/acpi_configfs.c
@@ -109,7 +109,7 @@ static ssize_t acpi_table_signature_show(struct config_item *cfg, char *str)
 	if (!h)
 		return -EINVAL;
 
-	return sprintf(str, "%.*s\n", ACPI_NAME_SIZE, h->signature);
+	return sprintf(str, "%.*s\n", ACPI_NAMESEG_SIZE, h->signature);
 }
 
 static ssize_t acpi_table_length_show(struct config_item *cfg, char *str)
@@ -170,7 +170,7 @@ static ssize_t acpi_table_asl_compiler_id_show(struct config_item *cfg,
 	if (!h)
 		return -EINVAL;
 
-	return sprintf(str, "%.*s\n", ACPI_NAME_SIZE, h->asl_compiler_id);
+	return sprintf(str, "%.*s\n", ACPI_NAMESEG_SIZE, h->asl_compiler_id);
 }
 
 static ssize_t acpi_table_asl_compiler_revision_show(struct config_item *cfg,
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index a2dfbf6b004e2..13d513b815892 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -293,7 +293,7 @@ acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state);
  * expected_return_btypes - Allowed type(s) for the return value
  */
 struct acpi_name_info {
-	char name[ACPI_NAME_SIZE];
+	char name[ACPI_NAMESEG_SIZE];
 	u16 argument_list;
 	u8 expected_btypes;
 };
@@ -370,7 +370,7 @@ typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node *
 					      converted_object);
 
 struct acpi_simple_repair_info {
-	char name[ACPI_NAME_SIZE];
+	char name[ACPI_NAMESEG_SIZE];
 	u32 unexpected_btypes;
 	u32 package_index;
 	acpi_object_converter object_converter;
diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index 004d34d9369bb..63fe30e868075 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c
@@ -354,7 +354,7 @@ acpi_status acpi_db_find_name_in_namespace(char *name_arg)
 	char acpi_name[5] = "____";
 	char *acpi_name_ptr = acpi_name;
 
-	if (strlen(name_arg) > ACPI_NAME_SIZE) {
+	if (strlen(name_arg) > ACPI_NAMESEG_SIZE) {
 		acpi_os_printf("Name must be no longer than 4 characters\n");
 		return (AE_OK);
 	}
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index c92d2f6ebe014..b04f982e59fa8 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -292,7 +292,7 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 	acpi_status status;
 	u32 gpe_number;
 	u8 temp_gpe_number;
-	char name[ACPI_NAME_SIZE + 1];
+	char name[ACPI_NAMESEG_SIZE + 1];
 	u8 type;
 
 	ACPI_FUNCTION_TRACE(ev_match_gpe_method);
@@ -310,7 +310,7 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
 	 * 1) Extract the method name and null terminate it
 	 */
 	ACPI_MOVE_32_TO_32(name, &method_node->name.integer);
-	name[ACPI_NAME_SIZE] = 0;
+	name[ACPI_NAMESEG_SIZE] = 0;
 
 	/* 2) Name must begin with an underscore */
 
diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index bd68d66e89f0d..6b76be5212a47 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -53,10 +53,10 @@ static char *acpi_ex_allocate_name_string(u32 prefix_count, u32 num_name_segs)
 
 		/* Special case for root */
 
-		size_needed = 1 + (ACPI_NAME_SIZE * num_name_segs) + 2 + 1;
+		size_needed = 1 + (ACPI_NAMESEG_SIZE * num_name_segs) + 2 + 1;
 	} else {
 		size_needed =
-		    prefix_count + (ACPI_NAME_SIZE * num_name_segs) + 2 + 1;
+		    prefix_count + (ACPI_NAMESEG_SIZE * num_name_segs) + 2 + 1;
 	}
 
 	/*
@@ -141,7 +141,7 @@ static acpi_status acpi_ex_name_segment(u8 ** in_aml_address, char *name_string)
 	}
 
 	for (index = 0;
-	     (index < ACPI_NAME_SIZE)
+	     (index < ACPI_NAMESEG_SIZE)
 	     && (acpi_ut_valid_name_char(*aml_address, 0)); index++) {
 		char_buf[index] = *aml_address++;
 	}
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 75192b958544e..7b855603f81a5 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -683,7 +683,7 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
 
 		/* Point to next name segment and make this node current */
 
-		path += ACPI_NAME_SIZE;
+		path += ACPI_NAMESEG_SIZE;
 		current_node = this_node;
 	}
 
diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index 15070bd0c28a9..1b12c172e1151 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -70,7 +70,7 @@ void acpi_ns_print_pathname(u32 num_segments, const char *pathname)
 			    acpi_os_printf("?");
 		}
 
-		pathname += ACPI_NAME_SIZE;
+		pathname += ACPI_NAMESEG_SIZE;
 		num_segments--;
 		if (num_segments) {
 			acpi_os_printf(".");
diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c
index c384320a0f269..370bbc8677453 100644
--- a/drivers/acpi/acpica/nsnames.c
+++ b/drivers/acpi/acpica/nsnames.c
@@ -109,7 +109,7 @@ acpi_ns_handle_to_name(acpi_handle target_handle, struct acpi_buffer *buffer)
 
 	node_name = acpi_ut_get_node_name(node);
 	ACPI_COPY_NAMESEG(buffer->pointer, node_name);
-	((char *)buffer->pointer)[ACPI_NAME_SIZE] = 0;
+	((char *)buffer->pointer)[ACPI_NAMESEG_SIZE] = 0;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "%4.4s\n", (char *)buffer->pointer));
 	return_ACPI_STATUS(AE_OK);
@@ -198,7 +198,7 @@ acpi_ns_build_normalized_path(struct acpi_namespace_node *node,
 			      char *full_path, u32 path_size, u8 no_trailing)
 {
 	u32 length = 0, i;
-	char name[ACPI_NAME_SIZE];
+	char name[ACPI_NAMESEG_SIZE];
 	u8 do_no_trailing;
 	char c, *left, *right;
 	struct acpi_namespace_node *next_node;
@@ -446,7 +446,7 @@ static void acpi_ns_normalize_pathname(char *original_path)
 
 		/* Do one nameseg at a time */
 
-		for (i = 0; (i < ACPI_NAME_SIZE) && *input_path; i++) {
+		for (i = 0; (i < ACPI_NAMESEG_SIZE) && *input_path; i++) {
 			if ((i == 0) || (*input_path != '_')) {	/* First char is allowed to be underscore */
 				*new_path = *input_path;
 				new_path++;
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 4c285e9184654..8d776256b2131 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -25,7 +25,7 @@ acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info,
 				     return_object_ptr);
 
 typedef struct acpi_repair_info {
-	char name[ACPI_NAME_SIZE];
+	char name[ACPI_NAMESEG_SIZE];
 	acpi_repair_function repair_function;
 
 } acpi_repair_info;
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 6f5940b02a4f0..6bc90d46db5ca 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -178,7 +178,7 @@ void acpi_ns_get_internal_name_length(struct acpi_namestring_info *info)
 		}
 	}
 
-	info->length = (ACPI_NAME_SIZE * info->num_segments) +
+	info->length = (ACPI_NAMESEG_SIZE * info->num_segments) +
 	    4 + info->num_carats;
 
 	info->next_external_char = next_external_char;
@@ -249,7 +249,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
 	/* Build the name (minus path separators) */
 
 	for (; num_segments; num_segments--) {
-		for (i = 0; i < ACPI_NAME_SIZE; i++) {
+		for (i = 0; i < ACPI_NAMESEG_SIZE; i++) {
 			if (ACPI_IS_PATH_SEPARATOR(*external_name) ||
 			    (*external_name == 0)) {
 
@@ -274,7 +274,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info)
 		/* Move on the next segment */
 
 		external_name++;
-		result += ACPI_NAME_SIZE;
+		result += ACPI_NAMESEG_SIZE;
 	}
 
 	/* Terminate the string */
@@ -493,8 +493,8 @@ acpi_ns_externalize_name(u32 internal_name_length,
 					  &internal_name[names_index]);
 			acpi_ut_repair_name(&(*converted_name)[j]);
 
-			j += ACPI_NAME_SIZE;
-			names_index += ACPI_NAME_SIZE;
+			j += ACPI_NAMESEG_SIZE;
+			names_index += ACPI_NAMESEG_SIZE;
 		}
 	}
 
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index 9d9d442cd9997..e62c7897fdf18 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -150,21 +150,21 @@ char *acpi_ps_get_next_namestring(struct acpi_parse_state *parser_state)
 
 		/* Two name segments */
 
-		end += 1 + (2 * ACPI_NAME_SIZE);
+		end += 1 + (2 * ACPI_NAMESEG_SIZE);
 		break;
 
 	case AML_MULTI_NAME_PREFIX:
 
 		/* Multiple name segments, 4 chars each, count in next byte */
 
-		end += 2 + (*(end + 1) * ACPI_NAME_SIZE);
+		end += 2 + (*(end + 1) * ACPI_NAMESEG_SIZE);
 		break;
 
 	default:
 
 		/* Single name segment */
 
-		end += ACPI_NAME_SIZE;
+		end += ACPI_NAMESEG_SIZE;
 		break;
 	}
 
@@ -522,7 +522,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
 
 		ACPI_MOVE_32_TO_32(&name, parser_state->aml);
 		acpi_ps_set_name(field, name);
-		parser_state->aml += ACPI_NAME_SIZE;
+		parser_state->aml += ACPI_NAMESEG_SIZE;
 
 		ASL_CV_CAPTURE_COMMENTS_ONLY(parser_state);
 
diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index ddc88aaca3764..b2abb40023a65 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -65,7 +65,7 @@ acpi_tb_find_table(char *signature,
 	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 	for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) {
 		if (memcmp(&(acpi_gbl_root_table_list.tables[i].signature),
-			   header.signature, ACPI_NAME_SIZE)) {
+			   header.signature, ACPI_NAMESEG_SIZE)) {
 
 			/* Not the requested table */
 
@@ -94,14 +94,14 @@ acpi_tb_find_table(char *signature,
 
 		if (!memcmp
 		    (acpi_gbl_root_table_list.tables[i].pointer->signature,
-		     header.signature, ACPI_NAME_SIZE) && (!oem_id[0]
-							   ||
-							   !memcmp
-							   (acpi_gbl_root_table_list.
-							    tables[i].pointer->
-							    oem_id,
-							    header.oem_id,
-							    ACPI_OEM_ID_SIZE))
+		     header.signature, ACPI_NAMESEG_SIZE) && (!oem_id[0]
+							      ||
+							      !memcmp
+							      (acpi_gbl_root_table_list.
+							       tables[i].
+							       pointer->oem_id,
+							       header.oem_id,
+							       ACPI_OEM_ID_SIZE))
 		    && (!oem_table_id[0]
 			|| !memcmp(acpi_gbl_root_table_list.tables[i].pointer->
 				   oem_table_id, header.oem_table_id,
diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c
index 0bc18fd5cd717..4764f849cb782 100644
--- a/drivers/acpi/acpica/tbprint.c
+++ b/drivers/acpi/acpica/tbprint.c
@@ -69,10 +69,10 @@ acpi_tb_cleanup_table_header(struct acpi_table_header *out_header,
 
 	memcpy(out_header, header, sizeof(struct acpi_table_header));
 
-	acpi_tb_fix_string(out_header->signature, ACPI_NAME_SIZE);
+	acpi_tb_fix_string(out_header->signature, ACPI_NAMESEG_SIZE);
 	acpi_tb_fix_string(out_header->oem_id, ACPI_OEM_ID_SIZE);
 	acpi_tb_fix_string(out_header->oem_table_id, ACPI_OEM_TABLE_ID_SIZE);
-	acpi_tb_fix_string(out_header->asl_compiler_id, ACPI_NAME_SIZE);
+	acpi_tb_fix_string(out_header->asl_compiler_id, ACPI_NAMESEG_SIZE);
 }
 
 /*******************************************************************************
diff --git a/drivers/acpi/acpica/utascii.c b/drivers/acpi/acpica/utascii.c
index 79d7426fd7bfb..f6cd7d4f698b3 100644
--- a/drivers/acpi/acpica/utascii.c
+++ b/drivers/acpi/acpica/utascii.c
@@ -30,7 +30,7 @@ u8 acpi_ut_valid_nameseg(char *name)
 
 	/* Validate each character in the signature */
 
-	for (i = 0; i < ACPI_NAME_SIZE; i++) {
+	for (i = 0; i < ACPI_NAMESEG_SIZE; i++) {
 		if (!acpi_ut_valid_name_char(name[i], i)) {
 			return (FALSE);
 		}
diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index ad9f77eb554ff..76e054c83c396 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -239,7 +239,7 @@ const char *acpi_ut_get_node_name(void *object)
 {
 	struct acpi_namespace_node *node = (struct acpi_namespace_node *)object;
 
-	/* Must return a string of exactly 4 characters == ACPI_NAME_SIZE */
+	/* Must return a string of exactly 4 characters == ACPI_NAMESEG_SIZE */
 
 	if (!object) {
 		return ("NULL");
diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c
index 927797355da9d..c39b5483045df 100644
--- a/drivers/acpi/acpica/utstring.c
+++ b/drivers/acpi/acpica/utstring.c
@@ -149,7 +149,7 @@ void acpi_ut_repair_name(char *name)
 
 	/* Check each character in the name */
 
-	for (i = 0; i < ACPI_NAME_SIZE; i++) {
+	for (i = 0; i < ACPI_NAMESEG_SIZE; i++) {
 		if (acpi_ut_valid_name_char(name[i], i)) {
 			continue;
 		}
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 262d6ee4735dc..75948a3f1a20e 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -327,9 +327,9 @@ static struct kobject *hotplug_kobj;
 
 struct acpi_table_attr {
 	struct bin_attribute attr;
-	char name[ACPI_NAME_SIZE];
+	char name[ACPI_NAMESEG_SIZE];
 	int instance;
-	char filename[ACPI_NAME_SIZE+ACPI_INST_SIZE];
+	char filename[ACPI_NAMESEG_SIZE+ACPI_INST_SIZE];
 	struct list_head node;
 };
 
@@ -383,7 +383,7 @@ static int acpi_table_attr_init(struct kobject *tables_obj,
 	}
 
 	ACPI_COPY_NAMESEG(table_attr->filename, table_header->signature);
-	table_attr->filename[ACPI_NAME_SIZE] = '\0';
+	table_attr->filename[ACPI_NAMESEG_SIZE] = '\0';
 	if (table_attr->instance > 1 || (table_attr->instance == 1 &&
 					 !acpi_get_table
 					 (table_header->signature, 2, &header))) {
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index c51462f5aa1e4..a5dc0629f2259 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -425,7 +425,7 @@ static ssize_t ibft_attr_show_acpitbl(void *data, int type, char *buf)
 
 	switch (type) {
 	case ISCSI_BOOT_ACPITBL_SIGNATURE:
-		str += sprintf_string(str, ACPI_NAME_SIZE,
+		str += sprintf_string(str, ACPI_NAMESEG_SIZE,
 				      entry->header->header.signature);
 		break;
 	case ISCSI_BOOT_ACPITBL_OEM_ID:
diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
index 45e7e5cbdffb4..7c71ffb733a19 100644
--- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
@@ -230,7 +230,7 @@ static void get_single_name(acpi_handle handle, char *name)
 	if (ACPI_FAILURE(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)))
 		pr_warn("Failed to get device name from acpi handle\n");
 	else {
-		memcpy(name, buffer.pointer, ACPI_NAME_SIZE);
+		memcpy(name, buffer.pointer, ACPI_NAMESEG_SIZE);
 		kfree(buffer.pointer);
 	}
 }
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 65cc9cbf11415..d568128025df8 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -66,14 +66,14 @@
  ******************************************************************************/
 
 struct acpi_table_header {
-	char signature[ACPI_NAME_SIZE];	/* ASCII table signature */
+	char signature[ACPI_NAMESEG_SIZE];	/* ASCII table signature */
 	u32 length;		/* Length of table in bytes, including this header */
 	u8 revision;		/* ACPI Specification minor version number */
 	u8 checksum;		/* To make sum of entire table == 0 */
 	char oem_id[ACPI_OEM_ID_SIZE];	/* ASCII OEM identification */
 	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];	/* ASCII OEM table identification */
 	u32 oem_revision;	/* OEM revision number */
-	char asl_compiler_id[ACPI_NAME_SIZE];	/* ASCII ASL compiler vendor ID */
+	char asl_compiler_id[ACPI_NAMESEG_SIZE];	/* ASCII ASL compiler vendor ID */
 	u32 asl_compiler_revision;	/* ASL compiler version */
 };
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index cb51172a47a36..ad6892a24015e 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -375,7 +375,7 @@ typedef u64 acpi_physical_address;
 
 /* Names within the namespace are 4 bytes long */
 
-#define ACPI_NAME_SIZE                  4
+#define ACPI_NAMESEG_SIZE               4	/* Fixed by ACPI spec */
 #define ACPI_PATH_SEGMENT_LENGTH        5	/* 4 chars for name + 1 char for separator */
 #define ACPI_PATH_SEPARATOR             '.'
 
@@ -518,8 +518,8 @@ typedef u64 acpi_integer;
 #define ACPI_COMPARE_NAMESEG(a,b)       (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b)))
 #define ACPI_COPY_NAMESEG(dest,src)     (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src)))
 #else
-#define ACPI_COMPARE_NAMESEG(a,b)       (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE))
-#define ACPI_COPY_NAMESEG(dest,src)     (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE))
+#define ACPI_COMPARE_NAMESEG(a,b)       (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAMESEG_SIZE))
+#define ACPI_COPY_NAMESEG(dest,src)     (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAMESEG_SIZE))
 #endif
 
 /* Support for the special RSDP signature (8 characters) */
@@ -529,7 +529,7 @@ typedef u64 acpi_integer;
 
 /* Support for OEMx signature (x can be any character) */
 #define ACPI_IS_OEM_SIG(a)        (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_OEM_NAME, 3) &&\
-	 strnlen (a, ACPI_NAME_SIZE) == ACPI_NAME_SIZE)
+	 strnlen (a, ACPI_NAMESEG_SIZE) == ACPI_NAMESEG_SIZE)
 
 /*
  * Algorithm to obtain access bit width.
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 2686d858225ad..d1f3d44e315eb 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl")
 typedef struct osl_table_info {
 	struct osl_table_info *next;
 	u32 instance;
-	char signature[ACPI_NAME_SIZE];
+	char signature[ACPI_NAMESEG_SIZE];
 
 } osl_table_info;
 
@@ -995,7 +995,7 @@ static acpi_status osl_list_customized_tables(char *directory)
 {
 	void *table_dir;
 	u32 instance;
-	char temp_name[ACPI_NAME_SIZE];
+	char temp_name[ACPI_NAMESEG_SIZE];
 	char *filename;
 	acpi_status status = AE_OK;
 
@@ -1158,15 +1158,15 @@ osl_table_name_from_file(char *filename, char *signature, u32 *instance)
 
 	/* Ignore meaningless files */
 
-	if (strlen(filename) < ACPI_NAME_SIZE) {
+	if (strlen(filename) < ACPI_NAMESEG_SIZE) {
 		return (AE_BAD_SIGNATURE);
 	}
 
 	/* Extract instance number */
 
-	if (isdigit((int)filename[ACPI_NAME_SIZE])) {
-		sscanf(&filename[ACPI_NAME_SIZE], "%u", instance);
-	} else if (strlen(filename) != ACPI_NAME_SIZE) {
+	if (isdigit((int)filename[ACPI_NAMESEG_SIZE])) {
+		sscanf(&filename[ACPI_NAMESEG_SIZE], "%u", instance);
+	} else if (strlen(filename) != ACPI_NAMESEG_SIZE) {
 		return (AE_BAD_SIGNATURE);
 	} else {
 		*instance = 0;
@@ -1311,7 +1311,7 @@ osl_get_customized_table(char *pathname,
 {
 	void *table_dir;
 	u32 current_instance = 0;
-	char temp_name[ACPI_NAME_SIZE];
+	char temp_name[ACPI_NAMESEG_SIZE];
 	char table_filename[PATH_MAX];
 	char *filename;
 	acpi_status status;
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index 9dfcc0329d34d..820baeb5092bd 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -289,14 +289,14 @@ int ap_dump_table_by_address(char *ascii_address)
 
 int ap_dump_table_by_name(char *signature)
 {
-	char local_signature[ACPI_NAME_SIZE + 1];
+	char local_signature[ACPI_NAMESEG_SIZE + 1];
 	u32 instance;
 	struct acpi_table_header *table;
 	acpi_physical_address address;
 	acpi_status status;
 	int table_status;
 
-	if (strlen(signature) != ACPI_NAME_SIZE) {
+	if (strlen(signature) != ACPI_NAMESEG_SIZE) {
 		fprintf(stderr,
 			"Invalid table signature [%s]: must be exactly 4 characters\n",
 			signature);
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 8abc8f998abd7..a42cfcaa32938 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -97,7 +97,7 @@ int ap_open_output_file(char *pathname)
 
 int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
 {
-	char filename[ACPI_NAME_SIZE + 16];
+	char filename[ACPI_NAMESEG_SIZE + 16];
 	char instance_str[16];
 	ACPI_FILE file;
 	acpi_size actual;
@@ -119,7 +119,7 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
 	filename[1] = (char)tolower((int)filename[1]);
 	filename[2] = (char)tolower((int)filename[2]);
 	filename[3] = (char)tolower((int)filename[3]);
-	filename[ACPI_NAME_SIZE] = 0;
+	filename[ACPI_NAMESEG_SIZE] = 0;
 
 	/* Handle multiple SSDts - create different filenames for each */
 
-- 
GitLab


From f49c90e8958ef2745224e36c3158ead27289d645 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Mon, 8 Apr 2019 13:42:27 -0700
Subject: [PATCH 0826/1861] ACPICA: utilities: fix spelling of PCC to
 platform_comm_channel

ACPICA commit 5e5c349e73982aea5d9f74416c0b2eea1b0767a1

Link: https://github.com/acpica/acpica/commit/5e5c349e
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utdecode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index 76e054c83c396..65beaa2376692 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -78,7 +78,7 @@ const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS] = {
 	"IPMI",			/* 0x07 */
 	"GeneralPurposeIo",	/* 0x08 */
 	"GenericSerialBus",	/* 0x09 */
-	"PCC"			/* 0x0A */
+	"PlatformCommChannel"	/* 0x0A */
 };
 
 const char *acpi_ut_get_region_name(u8 space_id)
-- 
GitLab


From 985d5124bfb0773edb8353237e5e8ddb614c3442 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 8 Apr 2019 13:42:28 -0700
Subject: [PATCH 0827/1861] ACPICA: Update version to 20190329

ACPICA commit 802ec363be67580f52251219ef90613008495392

Version 20190329.

Link: https://github.com/acpica/acpica/commit/802ec363
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 24dbb4e742a68..60b4067391e37 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20190215
+#define ACPI_CA_VERSION                 0x20190329
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
GitLab


From df9271d69f4016c1169699de9a826745688103bd Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Mon, 8 Apr 2019 13:42:29 -0700
Subject: [PATCH 0828/1861] ACPICA: Namespace: add check to avoid null pointer
 dereference

ACPICA commit 7586a625f9c34c3169efd88470192bf63119e31a

Some ACPICA userspace tools call acpi_ut_subsystem_shutdown() during
cleanup and dereference a null pointer when cleaning up the
namespace.

Link: https://github.com/acpica/acpica/commit/7586a625
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/nsalloc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/acpi/acpica/nsalloc.c b/drivers/acpi/acpica/nsalloc.c
index 5470213b8e645..6eb63db72249b 100644
--- a/drivers/acpi/acpica/nsalloc.c
+++ b/drivers/acpi/acpica/nsalloc.c
@@ -74,6 +74,10 @@ void acpi_ns_delete_node(struct acpi_namespace_node *node)
 
 	ACPI_FUNCTION_NAME(ns_delete_node);
 
+	if (!node) {
+		return_VOID;
+	}
+
 	/* Detach an object if there is one */
 
 	acpi_ns_detach_object(node);
-- 
GitLab


From 6c6a828f86d67c157c7a2b26d46ebca0ed0d9c47 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 8 Apr 2019 13:42:30 -0700
Subject: [PATCH 0829/1861] ACPICA: Update version to 20190405

ACPICA commit 0c1495275a35071b957ab59549761c6cd3f413b7

Version 20190405.

Link: https://github.com/acpica/acpica/commit/0c149527
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 60b4067391e37..3b1b1d0e4c33f 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20190329
+#define ACPI_CA_VERSION                 0x20190405
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
GitLab


From fcccc5c838c1999785a5aeb9e3bdcd00957f3e15 Mon Sep 17 00:00:00 2001
From: Kyle Lin <linkyle0915@gmail.com>
Date: Tue, 9 Apr 2019 16:43:04 +0800
Subject: [PATCH 0830/1861] cpufreq: stats: Use lock by stat to replace global
 spin lock

Stats is updated by each policy, using the lock by stat can
reduce the contention.

Signed-off-by: Kyle Lin <linkyle0915@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_stats.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index e2db5581489a0..08b192eb22c6e 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 
-static DEFINE_SPINLOCK(cpufreq_stats_lock);
 
 struct cpufreq_stats {
 	unsigned int total_trans;
@@ -23,6 +22,7 @@ struct cpufreq_stats {
 	unsigned int state_num;
 	unsigned int last_index;
 	u64 *time_in_state;
+	spinlock_t lock;
 	unsigned int *freq_table;
 	unsigned int *trans_table;
 };
@@ -39,12 +39,12 @@ static void cpufreq_stats_clear_table(struct cpufreq_stats *stats)
 {
 	unsigned int count = stats->max_state;
 
-	spin_lock(&cpufreq_stats_lock);
+	spin_lock(&stats->lock);
 	memset(stats->time_in_state, 0, count * sizeof(u64));
 	memset(stats->trans_table, 0, count * count * sizeof(int));
 	stats->last_time = get_jiffies_64();
 	stats->total_trans = 0;
-	spin_unlock(&cpufreq_stats_lock);
+	spin_unlock(&stats->lock);
 }
 
 static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf)
@@ -62,9 +62,9 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
 	if (policy->fast_switch_enabled)
 		return 0;
 
-	spin_lock(&cpufreq_stats_lock);
+	spin_lock(&stats->lock);
 	cpufreq_stats_update(stats);
-	spin_unlock(&cpufreq_stats_lock);
+	spin_unlock(&stats->lock);
 
 	for (i = 0; i < stats->state_num; i++) {
 		len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i],
@@ -211,6 +211,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	stats->state_num = i;
 	stats->last_time = get_jiffies_64();
 	stats->last_index = freq_table_get_index(stats, policy->cur);
+	spin_lock_init(&stats->lock);
 
 	policy->stats = stats;
 	ret = sysfs_create_group(&policy->kobj, &stats_attr_group);
@@ -242,11 +243,11 @@ void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
 	if (old_index == -1 || new_index == -1 || old_index == new_index)
 		return;
 
-	spin_lock(&cpufreq_stats_lock);
+	spin_lock(&stats->lock);
 	cpufreq_stats_update(stats);
 
 	stats->last_index = new_index;
 	stats->trans_table[old_index * stats->max_state + new_index]++;
 	stats->total_trans++;
-	spin_unlock(&cpufreq_stats_lock);
+	spin_unlock(&stats->lock);
 }
-- 
GitLab


From edf072d36dbfdf74465b66988f30084b6c996fbf Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Fri, 8 Feb 2019 16:10:07 +0100
Subject: [PATCH 0831/1861] arm64: Makefile: Replace -pg with CC_FLAGS_FTRACE

In preparation for arm64 supporting ftrace built on other compiler
options, let's have the arm64 Makefiles remove the $(CC_FLAGS_FTRACE)
flags, whatever these may be, rather than assuming '-pg'.

There should be no functional change as a result of this patch.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/Makefile | 6 +++---
 arch/arm64/lib/Makefile    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index cd434d0719c1c..a30bbecb23d82 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -7,9 +7,9 @@ CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_armv8_deprecated.o := -I$(src)
 
-CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_insn.o = -pg
-CFLAGS_REMOVE_return_address.o = -pg
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_insn.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
 
 # Object file lists.
 obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 5540a1638baf7..33c2a4abda046 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -24,7 +24,7 @@ CFLAGS_atomic_ll_sc.o	:= -ffixed-x1 -ffixed-x2        		\
 		   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12	\
 		   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15	\
 		   -fcall-saved-x18 -fomit-frame-pointer
-CFLAGS_REMOVE_atomic_ll_sc.o := -pg
+CFLAGS_REMOVE_atomic_ll_sc.o := $(CC_FLAGS_FTRACE)
 GCOV_PROFILE_atomic_ll_sc.o	:= n
 KASAN_SANITIZE_atomic_ll_sc.o	:= n
 KCOV_INSTRUMENT_atomic_ll_sc.o	:= n
-- 
GitLab


From e1a7eafb7350ff118e7538aca76b3f79e9280a8f Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Fri, 8 Feb 2019 16:10:11 +0100
Subject: [PATCH 0832/1861] efi/arm/arm64: Makefile: Replace -pg with
 CC_FLAGS_FTRACE

In preparation for arm64 supporting ftrace built on other compiler
options, let's have Makefiles remove the $(CC_FLAGS_FTRACE) flags,
whatever these maybe, rather than assuming '-pg'.

While at it, fix arm32 as well. There should be no functional change as
a result of this patch.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/firmware/efi/libstub/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index b0103e16fc1b9..645269c3906d8 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -16,9 +16,9 @@ cflags-$(CONFIG_X86)		+= -m$(BITS) -D__KERNEL__ -O2 \
 
 # arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly
 # disable the stackleak plugin
-cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS)) -fpie \
-				   $(DISABLE_STACKLEAK_PLUGIN)
-cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
+cflags-$(CONFIG_ARM64)		:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
+				   -fpie $(DISABLE_STACKLEAK_PLUGIN)
+cflags-$(CONFIG_ARM)		:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
 				   -fno-builtin -fpic \
 				   $(call cc-option,-mno-single-pic-base)
 
-- 
GitLab


From e2092740b72384e95b9c8a418536b35d628a1642 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Fri, 8 Feb 2019 16:10:14 +0100
Subject: [PATCH 0833/1861] kasan: Makefile: Replace -pg with CC_FLAGS_FTRACE

In preparation for arm64 supporting ftrace built on other compiler
options, let's have Makefiles remove the $(CC_FLAGS_FTRACE) flags,
whatever these may be, rather than assuming '-pg'.

There should be no functional change as a result of this patch.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 mm/kasan/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 5d1065efbd476..f06ee820d3560 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -5,9 +5,9 @@ UBSAN_SANITIZE_generic.o := n
 UBSAN_SANITIZE_tags.o := n
 KCOV_INSTRUMENT := n
 
-CFLAGS_REMOVE_common.o = -pg
-CFLAGS_REMOVE_generic.o = -pg
-CFLAGS_REMOVE_tags.o = -pg
+CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_generic.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_tags.o = $(CC_FLAGS_FTRACE)
 
 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1
 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
-- 
GitLab


From 321b628e6f5a3af999f75eadd373adbcb8b4cb1f Mon Sep 17 00:00:00 2001
From: Wangyan Wang <wangyan.wang@mediatek.com>
Date: Tue, 9 Apr 2019 14:53:06 +0800
Subject: [PATCH 0834/1861] drm/mediatek: make implementation of recalc_rate()
 for MT2701 hdmi phy

Recalculate the rate of this clock, by querying hardware to
make implementation of recalc_rate() to match the definition.

Signed-off-by: Wangyan Wang <wangyan.wang@mediatek.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_hdmi_phy.c       |  8 ----
 drivers/gpu/drm/mediatek/mtk_hdmi_phy.h       |  2 -
 .../gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c    | 38 +++++++++++++++++--
 .../gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c    |  8 ++++
 4 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
index 4ef9c57ffd44d..efc400ebbb90b 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
@@ -29,14 +29,6 @@ long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
 	return rate;
 }
 
-unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw,
-				       unsigned long parent_rate)
-{
-	struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
-
-	return hdmi_phy->pll_rate;
-}
-
 void mtk_hdmi_phy_clear_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset,
 			     u32 bits)
 {
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
index f39b1fc666129..71430691ffe43 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
@@ -50,8 +50,6 @@ void mtk_hdmi_phy_mask(struct mtk_hdmi_phy *hdmi_phy, u32 offset,
 struct mtk_hdmi_phy *to_mtk_hdmi_phy(struct clk_hw *hw);
 long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
 			     unsigned long *parent_rate);
-unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw,
-				       unsigned long parent_rate);
 
 extern struct platform_driver mtk_hdmi_phy_driver;
 extern struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf;
diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
index 0746fc8877069..feb6a7ed63d16 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
@@ -79,7 +79,6 @@ static int mtk_hdmi_pll_prepare(struct clk_hw *hw)
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK);
 	usleep_range(80, 100);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN);
-	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK);
@@ -94,7 +93,6 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw)
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK);
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK);
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK);
-	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN);
 	usleep_range(80, 100);
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK);
@@ -123,6 +121,7 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON6, RG_HTPLL_PREDIV_MASK);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON6, RG_HTPLL_POSDIV_MASK);
+	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
 	mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (0x1 << RG_HTPLL_IC),
 			  RG_HTPLL_IC_MASK);
 	mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (0x1 << RG_HTPLL_IR),
@@ -154,6 +153,39 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 	return 0;
 }
 
+static unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw,
+					      unsigned long parent_rate)
+{
+	struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
+	unsigned long out_rate, val;
+
+	val = (readl(hdmi_phy->regs + HDMI_CON6)
+	       & RG_HTPLL_PREDIV_MASK) >> RG_HTPLL_PREDIV;
+	switch (val) {
+	case 0x00:
+		out_rate = parent_rate;
+		break;
+	case 0x01:
+		out_rate = parent_rate / 2;
+		break;
+	default:
+		out_rate = parent_rate / 4;
+		break;
+	}
+
+	val = (readl(hdmi_phy->regs + HDMI_CON6)
+	       & RG_HTPLL_FBKDIV_MASK) >> RG_HTPLL_FBKDIV;
+	out_rate *= (val + 1) * 2;
+	val = (readl(hdmi_phy->regs + HDMI_CON2)
+	       & RG_HDMITX_TX_POSDIV_MASK);
+	out_rate >>= (val >> RG_HDMITX_TX_POSDIV);
+
+	if (readl(hdmi_phy->regs + HDMI_CON2) & RG_HDMITX_EN_TX_POSDIV)
+		out_rate /= 5;
+
+	return out_rate;
+}
+
 static const struct clk_ops mtk_hdmi_phy_pll_ops = {
 	.prepare = mtk_hdmi_pll_prepare,
 	.unprepare = mtk_hdmi_pll_unprepare,
@@ -174,7 +206,6 @@ static void mtk_hdmi_phy_enable_tmds(struct mtk_hdmi_phy *hdmi_phy)
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK);
 	usleep_range(80, 100);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN);
-	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK);
 	mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK);
@@ -186,7 +217,6 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy)
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_DRV_MASK);
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_PRED_MASK);
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SER_MASK);
-	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_EN_TX_POSDIV);
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON2, RG_HDMITX_MBIAS_LPF_EN);
 	usleep_range(80, 100);
 	mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_EN_SLDO_MASK);
diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
index ed5916b276584..83662a2084916 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
@@ -285,6 +285,14 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 	return 0;
 }
 
+static unsigned long mtk_hdmi_pll_recalc_rate(struct clk_hw *hw,
+					      unsigned long parent_rate)
+{
+	struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
+
+	return hdmi_phy->pll_rate;
+}
+
 static const struct clk_ops mtk_hdmi_phy_pll_ops = {
 	.prepare = mtk_hdmi_pll_prepare,
 	.unprepare = mtk_hdmi_pll_unprepare,
-- 
GitLab


From 827abdd024207146822f66ba3ba74867135866b9 Mon Sep 17 00:00:00 2001
From: Wangyan Wang <wangyan.wang@mediatek.com>
Date: Tue, 9 Apr 2019 14:53:03 +0800
Subject: [PATCH 0835/1861] drm/mediatek: remove flag CLK_SET_RATE_PARENT for
 MT2701 hdmi phy

This is the first step to make MT2701 hdmi stable.
The parent rate of hdmi phy had set by DPI driver.
We should not set or change the parent rate of MT2701 hdmi phy,
as a result we should remove the flags of "CLK_SET_RATE_PARENT"
from the clock of MT2701 hdmi phy.

Signed-off-by: Wangyan Wang <wangyan.wang@mediatek.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_hdmi_phy.c        | 13 +++++--------
 drivers/gpu/drm/mediatek/mtk_hdmi_phy.h        |  1 +
 drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c |  1 +
 drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c |  1 +
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
index efc400ebbb90b..08b029772c5a5 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
@@ -102,13 +102,11 @@ mtk_hdmi_phy_dev_get_ops(const struct mtk_hdmi_phy *hdmi_phy)
 		return NULL;
 }
 
-static void mtk_hdmi_phy_clk_get_ops(struct mtk_hdmi_phy *hdmi_phy,
-				     const struct clk_ops **ops)
+static void mtk_hdmi_phy_clk_get_data(struct mtk_hdmi_phy *hdmi_phy,
+				      struct clk_init_data *clk_init)
 {
-	if (hdmi_phy && hdmi_phy->conf && hdmi_phy->conf->hdmi_phy_clk_ops)
-		*ops = hdmi_phy->conf->hdmi_phy_clk_ops;
-	else
-		dev_err(hdmi_phy->dev, "Failed to get clk ops of phy\n");
+	clk_init->flags = hdmi_phy->conf->flags;
+	clk_init->ops = hdmi_phy->conf->hdmi_phy_clk_ops;
 }
 
 static int mtk_hdmi_phy_probe(struct platform_device *pdev)
@@ -121,7 +119,6 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev)
 	struct clk_init_data clk_init = {
 		.num_parents = 1,
 		.parent_names = (const char * const *)&ref_clk_name,
-		.flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE,
 	};
 
 	struct phy *phy;
@@ -159,7 +156,7 @@ static int mtk_hdmi_phy_probe(struct platform_device *pdev)
 	hdmi_phy->dev = dev;
 	hdmi_phy->conf =
 		(struct mtk_hdmi_phy_conf *)of_device_get_match_data(dev);
-	mtk_hdmi_phy_clk_get_ops(hdmi_phy, &clk_init.ops);
+	mtk_hdmi_phy_clk_get_data(hdmi_phy, &clk_init);
 	hdmi_phy->pll_hw.init = &clk_init;
 	hdmi_phy->pll = devm_clk_register(dev, &hdmi_phy->pll_hw);
 	if (IS_ERR(hdmi_phy->pll)) {
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
index 71430691ffe43..d28b8d5ed2b44 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
@@ -21,6 +21,7 @@ struct mtk_hdmi_phy;
 
 struct mtk_hdmi_phy_conf {
 	bool tz_disabled;
+	unsigned long flags;
 	const struct clk_ops *hdmi_phy_clk_ops;
 	void (*hdmi_phy_enable_tmds)(struct mtk_hdmi_phy *hdmi_phy);
 	void (*hdmi_phy_disable_tmds)(struct mtk_hdmi_phy *hdmi_phy);
diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
index feb6a7ed63d16..31f3175f032bc 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
@@ -232,6 +232,7 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy)
 
 struct mtk_hdmi_phy_conf mtk_hdmi_phy_2701_conf = {
 	.tz_disabled = true,
+	.flags = CLK_SET_RATE_GATE,
 	.hdmi_phy_clk_ops = &mtk_hdmi_phy_pll_ops,
 	.hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds,
 	.hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds,
diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
index 83662a2084916..37f9503d76433 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
@@ -317,6 +317,7 @@ static void mtk_hdmi_phy_disable_tmds(struct mtk_hdmi_phy *hdmi_phy)
 }
 
 struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf = {
+	.flags = CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE,
 	.hdmi_phy_clk_ops = &mtk_hdmi_phy_pll_ops,
 	.hdmi_phy_enable_tmds = mtk_hdmi_phy_enable_tmds,
 	.hdmi_phy_disable_tmds = mtk_hdmi_phy_disable_tmds,
-- 
GitLab


From 8eeb3946feeb00486ac0909e2309da87db8988a5 Mon Sep 17 00:00:00 2001
From: Wangyan Wang <wangyan.wang@mediatek.com>
Date: Tue, 9 Apr 2019 14:53:05 +0800
Subject: [PATCH 0836/1861] drm/mediatek: using new factor for tvdpll for
 MT2701 hdmi phy

This is the second step to make MT2701 HDMI stable.
The factor depends on the divider of DPI in MT2701, therefore,
we should fix this factor to the right and new one.
Test: search ok

Signed-off-by: Wangyan Wang <wangyan.wang@mediatek.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_dpi.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 22e68a100e7be..5d333138f9136 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -662,13 +662,11 @@ static unsigned int mt8173_calculate_factor(int clock)
 static unsigned int mt2701_calculate_factor(int clock)
 {
 	if (clock <= 64000)
-		return 16;
-	else if (clock <= 128000)
-		return 8;
-	else if (clock <= 256000)
 		return 4;
-	else
+	else if (clock <= 128000)
 		return 2;
+	else
+		return 1;
 }
 
 static const struct mtk_dpi_conf mt8173_conf = {
-- 
GitLab


From 9ee76098a1b8ae21cccac641b735ee4d3a77bccf Mon Sep 17 00:00:00 2001
From: Wangyan Wang <wangyan.wang@mediatek.com>
Date: Tue, 9 Apr 2019 14:53:07 +0800
Subject: [PATCH 0837/1861] drm/mediatek: no change parent rate in round_rate()
 for MT2701 hdmi phy

This is the third step to make MT2701 HDMI stable.
We should not change the rate of parent for hdmi phy when
doing round_rate for this clock. The parent clock of hdmi
phy must be the same as it. We change it when doing set_rate
only.

Signed-off-by: Wangyan Wang <wangyan.wang@mediatek.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_hdmi_phy.c        | 14 --------------
 drivers/gpu/drm/mediatek/mtk_hdmi_phy.h        |  2 --
 drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c |  6 ++++++
 drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 14 ++++++++++++++
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
index 08b029772c5a5..5223498502c49 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.c
@@ -15,20 +15,6 @@ static const struct phy_ops mtk_hdmi_phy_dev_ops = {
 	.owner = THIS_MODULE,
 };
 
-long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
-			     unsigned long *parent_rate)
-{
-	struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
-
-	hdmi_phy->pll_rate = rate;
-	if (rate <= 74250000)
-		*parent_rate = rate;
-	else
-		*parent_rate = rate / 2;
-
-	return rate;
-}
-
 void mtk_hdmi_phy_clear_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset,
 			     u32 bits)
 {
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
index d28b8d5ed2b44..2d8b3182470dc 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_phy.h
@@ -49,8 +49,6 @@ void mtk_hdmi_phy_set_bits(struct mtk_hdmi_phy *hdmi_phy, u32 offset,
 void mtk_hdmi_phy_mask(struct mtk_hdmi_phy *hdmi_phy, u32 offset,
 		       u32 val, u32 mask);
 struct mtk_hdmi_phy *to_mtk_hdmi_phy(struct clk_hw *hw);
-long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
-			     unsigned long *parent_rate);
 
 extern struct platform_driver mtk_hdmi_phy_driver;
 extern struct mtk_hdmi_phy_conf mtk_hdmi_phy_8173_conf;
diff --git a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
index 31f3175f032bc..d3cc4022e9884 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt2701_hdmi_phy.c
@@ -106,6 +106,12 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw)
 	usleep_range(80, 100);
 }
 
+static long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+				    unsigned long *parent_rate)
+{
+	return rate;
+}
+
 static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 				 unsigned long parent_rate)
 {
diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
index 37f9503d76433..47f8a29516822 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
@@ -199,6 +199,20 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw)
 	usleep_range(100, 150);
 }
 
+static long mtk_hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+				    unsigned long *parent_rate)
+{
+	struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
+
+	hdmi_phy->pll_rate = rate;
+	if (rate <= 74250000)
+		*parent_rate = rate;
+	else
+		*parent_rate = rate / 2;
+
+	return rate;
+}
+
 static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 				 unsigned long parent_rate)
 {
-- 
GitLab


From c4a586fdd440931c2773a9da42c1fc564068f128 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Fri, 15 Mar 2019 23:37:51 -0400
Subject: [PATCH 0838/1861] PM / core: fix kerneldoc comment for
 dpm_watchdog_handler()

This brings the kernel doc in line with the function signature.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index f80d298de3fa4..497238704abd8 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -478,7 +478,7 @@ struct dpm_watchdog {
 
 /**
  * dpm_watchdog_handler - Driver suspend / resume watchdog handler.
- * @data: Watchdog object address.
+ * @t: The timer that PM watchdog depends on.
  *
  * Called when a driver has timed out suspending or resuming.
  * There's not much we can do here to recover so panic() to
-- 
GitLab


From 0b237cb2fc7b10b2c4e92477760fa7442847c804 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Fri, 15 Mar 2019 23:48:41 -0400
Subject: [PATCH 0839/1861] PM / core: fix kerneldoc comment for
 device_pm_wait_for_dev()

Rearrange comment to make the comment style consistent, the previous
function parameters are described first.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 497238704abd8..41eba82ee7b9d 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -2069,8 +2069,8 @@ EXPORT_SYMBOL_GPL(__suspend_report_result);
 
 /**
  * device_pm_wait_for_dev - Wait for suspend/resume of a device to complete.
- * @dev: Device to wait for.
  * @subordinate: Device that needs to wait for @dev.
+ * @dev: Device to wait for.
  */
 int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
 {
-- 
GitLab


From 52c6d145da15a9a9cea14678be307c127ccc6ef5 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 25 Feb 2019 12:06:43 +0000
Subject: [PATCH 0840/1861] arm64: debug: Remove unused return value from
 do_debug_exception()

do_debug_exception() goes out of its way to return a value that isn't
ever used, so just make the thing void.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/fault.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 4f343e603925d..0cb0e09995e11 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -810,13 +810,12 @@ void __init hook_debug_fault_code(int nr,
 	debug_fault_info[nr].name	= name;
 }
 
-asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint,
-					      unsigned int esr,
-					      struct pt_regs *regs)
+asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
+					       unsigned int esr,
+					       struct pt_regs *regs)
 {
 	const struct fault_info *inf = esr_to_debug_fault_info(esr);
 	unsigned long pc = instruction_pointer(regs);
-	int rv;
 
 	/*
 	 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
@@ -828,17 +827,12 @@ asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint,
 	if (user_mode(regs) && !is_ttbr0_addr(pc))
 		arm64_apply_bp_hardening();
 
-	if (!inf->fn(addr_if_watchpoint, esr, regs)) {
-		rv = 1;
-	} else {
+	if (inf->fn(addr_if_watchpoint, esr, regs)) {
 		arm64_notify_die(inf->name, regs,
 				 inf->sig, inf->code, (void __user *)pc, esr);
-		rv = 0;
 	}
 
 	if (interrupts_enabled(regs))
 		trace_hardirqs_on();
-
-	return rv;
 }
 NOKPROBE_SYMBOL(do_debug_exception);
-- 
GitLab


From 5a9132add862f446d5f0d7fa7468887108b5898a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 25 Feb 2019 12:42:26 +0000
Subject: [PATCH 0841/1861] arm64: debug: Rename addr parameter for
 non-watchpoint exception hooks

Since the 'addr' parameter contains an UNKNOWN value for non-watchpoint
debug exceptions, rename it to 'unused' for those hooks so we don't get
tempted to use it in the future.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/debug-monitors.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index d7bb6aefae0a8..c4c263d0cf0ff 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -222,7 +222,7 @@ static void send_user_sigtrap(int si_code)
 			     "User debug trap");
 }
 
-static int single_step_handler(unsigned long addr, unsigned int esr,
+static int single_step_handler(unsigned long unused, unsigned int esr,
 			       struct pt_regs *regs)
 {
 	bool handler_found = false;
@@ -302,7 +302,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
 }
 NOKPROBE_SYMBOL(call_break_hook);
 
-static int brk_handler(unsigned long addr, unsigned int esr,
+static int brk_handler(unsigned long unused, unsigned int esr,
 		       struct pt_regs *regs)
 {
 	bool handler_found = false;
-- 
GitLab


From cb764a69fa41179fb222b53b1a33a9d7373f9249 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 25 Feb 2019 13:22:17 +0000
Subject: [PATCH 0842/1861] arm64: debug: Remove meaningless comment

The comment next to the definition of our 'break_hook' list head is
at best wrong but mainly just meaningless. Rip it out.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/debug-monitors.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c4c263d0cf0ff..744229d10ca8d 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -264,11 +264,6 @@ static int single_step_handler(unsigned long unused, unsigned int esr,
 }
 NOKPROBE_SYMBOL(single_step_handler);
 
-/*
- * Breakpoint handler is re-entrant as another breakpoint can
- * hit within breakpoint handler, especically in kprobes.
- * Use reader/writer locks instead of plain spinlock.
- */
 static LIST_HEAD(break_hook);
 static DEFINE_SPINLOCK(break_hook_lock);
 
-- 
GitLab


From 26a04d84bc5311d7785b229b353f327e866ab61a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Feb 2019 12:52:47 +0000
Subject: [PATCH 0843/1861] arm64: debug: Separate debug hooks based on target
 exception level

Mixing kernel and user debug hooks together is highly error-prone as it
relies on all of the hooks to figure out whether the exception came from
kernel or user, and then to act accordingly.

Make our debug hook code a little more robust by maintaining separate
hook lists for user and kernel, with separate registration functions
to force callers to be explicit about the exception levels that they
care about.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/brk-imm.h        |  1 +
 arch/arm64/include/asm/debug-monitors.h | 18 ++++--
 arch/arm64/kernel/debug-monitors.c      | 85 +++++++++++++++++--------
 arch/arm64/kernel/kgdb.c                | 22 +++----
 arch/arm64/kernel/probes/uprobes.c      |  7 +-
 arch/arm64/kernel/traps.c               | 20 +++---
 6 files changed, 95 insertions(+), 58 deletions(-)

diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
index 2945fe6cd863c..fec9e13846410 100644
--- a/arch/arm64/include/asm/brk-imm.h
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -23,5 +23,6 @@
 #define KGDB_COMPILED_DBG_BRK_IMM	0x401
 #define BUG_BRK_IMM			0x800
 #define KASAN_BRK_IMM			0x900
+#define KASAN_BRK_MASK			0x0ff
 
 #endif
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index a44cf52254296..7d37cfa5cc166 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -94,18 +94,24 @@ struct step_hook {
 	int (*fn)(struct pt_regs *regs, unsigned int esr);
 };
 
-void register_step_hook(struct step_hook *hook);
-void unregister_step_hook(struct step_hook *hook);
+void register_user_step_hook(struct step_hook *hook);
+void unregister_user_step_hook(struct step_hook *hook);
+
+void register_kernel_step_hook(struct step_hook *hook);
+void unregister_kernel_step_hook(struct step_hook *hook);
 
 struct break_hook {
 	struct list_head node;
-	u32 esr_val;
-	u32 esr_mask;
 	int (*fn)(struct pt_regs *regs, unsigned int esr);
+	u16 imm;
+	u16 mask; /* These bits are ignored when comparing with imm */
 };
 
-void register_break_hook(struct break_hook *hook);
-void unregister_break_hook(struct break_hook *hook);
+void register_user_break_hook(struct break_hook *hook);
+void unregister_user_break_hook(struct break_hook *hook);
+
+void register_kernel_break_hook(struct break_hook *hook);
+void unregister_kernel_break_hook(struct break_hook *hook);
 
 u8 debug_monitors_arch(void);
 
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 744229d10ca8d..9b3fd7fa5b436 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -163,25 +163,46 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(clear_regs_spsr_ss);
 
-/* EL1 Single Step Handler hooks */
-static LIST_HEAD(step_hook);
-static DEFINE_SPINLOCK(step_hook_lock);
+static DEFINE_SPINLOCK(debug_hook_lock);
+static LIST_HEAD(user_step_hook);
+static LIST_HEAD(kernel_step_hook);
 
-void register_step_hook(struct step_hook *hook)
+static void register_debug_hook(struct list_head *node, struct list_head *list)
 {
-	spin_lock(&step_hook_lock);
-	list_add_rcu(&hook->node, &step_hook);
-	spin_unlock(&step_hook_lock);
+	spin_lock(&debug_hook_lock);
+	list_add_rcu(node, list);
+	spin_unlock(&debug_hook_lock);
+
 }
 
-void unregister_step_hook(struct step_hook *hook)
+static void unregister_debug_hook(struct list_head *node)
 {
-	spin_lock(&step_hook_lock);
-	list_del_rcu(&hook->node);
-	spin_unlock(&step_hook_lock);
+	spin_lock(&debug_hook_lock);
+	list_del_rcu(node);
+	spin_unlock(&debug_hook_lock);
 	synchronize_rcu();
 }
 
+void register_user_step_hook(struct step_hook *hook)
+{
+	register_debug_hook(&hook->node, &user_step_hook);
+}
+
+void unregister_user_step_hook(struct step_hook *hook)
+{
+	unregister_debug_hook(&hook->node);
+}
+
+void register_kernel_step_hook(struct step_hook *hook)
+{
+	register_debug_hook(&hook->node, &kernel_step_hook);
+}
+
+void unregister_kernel_step_hook(struct step_hook *hook)
+{
+	unregister_debug_hook(&hook->node);
+}
+
 /*
  * Call registered single step handlers
  * There is no Syndrome info to check for determining the handler.
@@ -191,11 +212,14 @@ void unregister_step_hook(struct step_hook *hook)
 static int call_step_hook(struct pt_regs *regs, unsigned int esr)
 {
 	struct step_hook *hook;
+	struct list_head *list;
 	int retval = DBG_HOOK_ERROR;
 
+	list = user_mode(regs) ? &user_step_hook : &kernel_step_hook;
+
 	rcu_read_lock();
 
-	list_for_each_entry_rcu(hook, &step_hook, node)	{
+	list_for_each_entry_rcu(hook, list, node)	{
 		retval = hook->fn(regs, esr);
 		if (retval == DBG_HOOK_HANDLED)
 			break;
@@ -264,33 +288,44 @@ static int single_step_handler(unsigned long unused, unsigned int esr,
 }
 NOKPROBE_SYMBOL(single_step_handler);
 
-static LIST_HEAD(break_hook);
-static DEFINE_SPINLOCK(break_hook_lock);
+static LIST_HEAD(user_break_hook);
+static LIST_HEAD(kernel_break_hook);
 
-void register_break_hook(struct break_hook *hook)
+void register_user_break_hook(struct break_hook *hook)
 {
-	spin_lock(&break_hook_lock);
-	list_add_rcu(&hook->node, &break_hook);
-	spin_unlock(&break_hook_lock);
+	register_debug_hook(&hook->node, &user_break_hook);
 }
 
-void unregister_break_hook(struct break_hook *hook)
+void unregister_user_break_hook(struct break_hook *hook)
 {
-	spin_lock(&break_hook_lock);
-	list_del_rcu(&hook->node);
-	spin_unlock(&break_hook_lock);
-	synchronize_rcu();
+	unregister_debug_hook(&hook->node);
+}
+
+void register_kernel_break_hook(struct break_hook *hook)
+{
+	register_debug_hook(&hook->node, &kernel_break_hook);
+}
+
+void unregister_kernel_break_hook(struct break_hook *hook)
+{
+	unregister_debug_hook(&hook->node);
 }
 
 static int call_break_hook(struct pt_regs *regs, unsigned int esr)
 {
 	struct break_hook *hook;
+	struct list_head *list;
 	int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
 
+	list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
+
 	rcu_read_lock();
-	list_for_each_entry_rcu(hook, &break_hook, node)
-		if ((esr & hook->esr_mask) == hook->esr_val)
+	list_for_each_entry_rcu(hook, list, node) {
+		unsigned int comment = esr & BRK64_ESR_MASK;
+
+		if ((comment & ~hook->mask) == hook->imm)
 			fn = hook->fn;
+	}
 	rcu_read_unlock();
 
 	return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 691854b77c7fe..4c01f299aeb21 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -275,15 +275,13 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
 NOKPROBE_SYMBOL(kgdb_step_brk_fn);
 
 static struct break_hook kgdb_brkpt_hook = {
-	.esr_mask	= 0xffffffff,
-	.esr_val	= (u32)ESR_ELx_VAL_BRK64(KGDB_DYN_DBG_BRK_IMM),
-	.fn		= kgdb_brk_fn
+	.fn		= kgdb_brk_fn,
+	.imm		= KGDB_DYN_DBG_BRK_IMM,
 };
 
 static struct break_hook kgdb_compiled_brkpt_hook = {
-	.esr_mask	= 0xffffffff,
-	.esr_val	= (u32)ESR_ELx_VAL_BRK64(KGDB_COMPILED_DBG_BRK_IMM),
-	.fn		= kgdb_compiled_brk_fn
+	.fn		= kgdb_compiled_brk_fn,
+	.imm		= KGDB_COMPILED_DBG_BRK_IMM,
 };
 
 static struct step_hook kgdb_step_hook = {
@@ -332,9 +330,9 @@ int kgdb_arch_init(void)
 	if (ret != 0)
 		return ret;
 
-	register_break_hook(&kgdb_brkpt_hook);
-	register_break_hook(&kgdb_compiled_brkpt_hook);
-	register_step_hook(&kgdb_step_hook);
+	register_kernel_break_hook(&kgdb_brkpt_hook);
+	register_kernel_break_hook(&kgdb_compiled_brkpt_hook);
+	register_kernel_step_hook(&kgdb_step_hook);
 	return 0;
 }
 
@@ -345,9 +343,9 @@ int kgdb_arch_init(void)
  */
 void kgdb_arch_exit(void)
 {
-	unregister_break_hook(&kgdb_brkpt_hook);
-	unregister_break_hook(&kgdb_compiled_brkpt_hook);
-	unregister_step_hook(&kgdb_step_hook);
+	unregister_kernel_break_hook(&kgdb_brkpt_hook);
+	unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook);
+	unregister_kernel_step_hook(&kgdb_step_hook);
 	unregister_die_notifier(&kgdb_notifier);
 }
 
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 636ca0119c0ef..7d6ea88796a6b 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -195,8 +195,7 @@ static int uprobe_single_step_handler(struct pt_regs *regs,
 
 /* uprobe breakpoint handler hook */
 static struct break_hook uprobes_break_hook = {
-	.esr_mask = BRK64_ESR_MASK,
-	.esr_val = BRK64_ESR_UPROBES,
+	.imm = BRK64_ESR_UPROBES,
 	.fn = uprobe_breakpoint_handler,
 };
 
@@ -207,8 +206,8 @@ static struct step_hook uprobes_step_hook = {
 
 static int __init arch_init_uprobes(void)
 {
-	register_break_hook(&uprobes_break_hook);
-	register_step_hook(&uprobes_step_hook);
+	register_user_break_hook(&uprobes_break_hook);
+	register_user_step_hook(&uprobes_step_hook);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8ad119c3f665d..85fdc3d7c5566 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -969,9 +969,8 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr)
 }
 
 static struct break_hook bug_break_hook = {
-	.esr_val = 0xf2000000 | BUG_BRK_IMM,
-	.esr_mask = 0xffffffff,
 	.fn = bug_handler,
+	.imm = BUG_BRK_IMM,
 };
 
 #ifdef CONFIG_KASAN_SW_TAGS
@@ -1016,13 +1015,10 @@ static int kasan_handler(struct pt_regs *regs, unsigned int esr)
 	return DBG_HOOK_HANDLED;
 }
 
-#define KASAN_ESR_VAL (0xf2000000 | KASAN_BRK_IMM)
-#define KASAN_ESR_MASK 0xffffff00
-
 static struct break_hook kasan_break_hook = {
-	.esr_val = KASAN_ESR_VAL,
-	.esr_mask = KASAN_ESR_MASK,
-	.fn = kasan_handler,
+	.fn	= kasan_handler,
+	.imm	= KASAN_BRK_IMM,
+	.mask	= KASAN_BRK_MASK,
 };
 #endif
 
@@ -1034,7 +1030,9 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
 		struct pt_regs *regs)
 {
 #ifdef CONFIG_KASAN_SW_TAGS
-	if ((esr & KASAN_ESR_MASK) == KASAN_ESR_VAL)
+	unsigned int comment = esr & BRK64_ESR_MASK;
+
+	if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
 		return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
 #endif
 	return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
@@ -1043,8 +1041,8 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
 /* This registration must happen early, before debug_traps_init(). */
 void __init trap_init(void)
 {
-	register_break_hook(&bug_break_hook);
+	register_kernel_break_hook(&bug_break_hook);
 #ifdef CONFIG_KASAN_SW_TAGS
-	register_break_hook(&kasan_break_hook);
+	register_kernel_break_hook(&kasan_break_hook);
 #endif
 }
-- 
GitLab


From a22d570aee77ae626e4d3532478ae8058a24bdb3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Feb 2019 14:35:00 +0000
Subject: [PATCH 0844/1861] arm64: kprobes: Avoid calling kprobes debug
 handlers explicitly

Kprobes bypasses our debug hook registration code so that it doesn't
get tangled up with recursive debug exceptions from things like lockdep:

  http://lists.infradead.org/pipermail/linux-arm-kernel/2015-February/324385.html

However, since then, (a) the hook list has become RCU protected and (b)
the kprobes hooks were found not to filter out exceptions from userspace
correctly. On top of that, the step handler is invoked directly from
single_step_handler(), which *does* use the debug hook list, so it's
clearly not the end of the world.

For now, have kprobes use the debug hook registration API like everybody
else. We can revisit this in the future if this is found to limit
coverage significantly.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/kprobes.h   |  2 --
 arch/arm64/kernel/debug-monitors.c | 10 ----------
 arch/arm64/kernel/probes/kprobes.c | 16 ++++++++++++++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index d5a44cf859e94..21721fbf44e74 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -54,8 +54,6 @@ void arch_remove_kprobe(struct kprobe *);
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
 int kprobe_exceptions_notify(struct notifier_block *self,
 			     unsigned long val, void *data);
-int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr);
-int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr);
 void kretprobe_trampoline(void);
 void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
 
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 9b3fd7fa5b436..f4d8cda8830d8 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -258,10 +258,6 @@ static int single_step_handler(unsigned long unused, unsigned int esr,
 	if (!reinstall_suspended_bps(regs))
 		return 0;
 
-#ifdef	CONFIG_KPROBES
-	if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
-		handler_found = true;
-#endif
 	if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
 		handler_found = true;
 
@@ -337,12 +333,6 @@ static int brk_handler(unsigned long unused, unsigned int esr,
 {
 	bool handler_found = false;
 
-#ifdef	CONFIG_KPROBES
-	if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
-		if (kprobe_breakpoint_handler(regs, esr) == DBG_HOOK_HANDLED)
-			handler_found = true;
-	}
-#endif
 	if (!handler_found && call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
 		handler_found = true;
 
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 7a679caf45856..baf97f47aec0a 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -439,7 +439,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
 	return DBG_HOOK_ERROR;
 }
 
-int __kprobes
+static int __kprobes
 kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -461,7 +461,11 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
 	return retval;
 }
 
-int __kprobes
+static struct step_hook kprobes_step_hook = {
+	.fn = kprobe_single_step_handler,
+};
+
+static int __kprobes
 kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
 {
 	if (user_mode(regs))
@@ -471,6 +475,11 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
 	return DBG_HOOK_HANDLED;
 }
 
+static struct break_hook kprobes_break_hook = {
+	.imm = BRK64_ESR_KPROBES,
+	.fn = kprobe_breakpoint_handler,
+};
+
 /*
  * Provide a blacklist of symbols identifying ranges which cannot be kprobed.
  * This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
@@ -599,5 +608,8 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 
 int __init arch_init_kprobes(void)
 {
+	register_kernel_break_hook(&kprobes_break_hook);
+	register_kernel_step_hook(&kprobes_step_hook);
+
 	return 0;
 }
-- 
GitLab


From fb610f2a2006322bebeb30408fefce6a01df09ea Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Feb 2019 15:37:09 +0000
Subject: [PATCH 0845/1861] arm64: debug: Remove redundant user_mode(regs)
 checks from debug handlers

Now that the debug hook dispatching code takes the triggering exception
level into account, there's no need for the hooks themselves to poke
around with user_mode(regs).

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/kgdb.c           |  8 +-------
 arch/arm64/kernel/probes/kprobes.c |  6 ------
 arch/arm64/kernel/probes/uprobes.c | 12 ++++--------
 arch/arm64/kernel/traps.c          |  6 ------
 4 files changed, 5 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 4c01f299aeb21..30853d5b78592 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -244,9 +244,6 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
 
 static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
 {
-	if (user_mode(regs))
-		return DBG_HOOK_ERROR;
-
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
 	return DBG_HOOK_HANDLED;
 }
@@ -254,9 +251,6 @@ NOKPROBE_SYMBOL(kgdb_brk_fn)
 
 static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
 {
-	if (user_mode(regs))
-		return DBG_HOOK_ERROR;
-
 	compiled_break = 1;
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
 
@@ -266,7 +260,7 @@ NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
 
 static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
 {
-	if (user_mode(regs) || !kgdb_single_step)
+	if (!kgdb_single_step)
 		return DBG_HOOK_ERROR;
 
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index baf97f47aec0a..000f32d1a7564 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -445,9 +445,6 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	int retval;
 
-	if (user_mode(regs))
-		return DBG_HOOK_ERROR;
-
 	/* return error if this is not our step */
 	retval = kprobe_ss_hit(kcb, instruction_pointer(regs));
 
@@ -468,9 +465,6 @@ static struct step_hook kprobes_step_hook = {
 static int __kprobes
 kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
 {
-	if (user_mode(regs))
-		return DBG_HOOK_ERROR;
-
 	kprobe_handler(regs);
 	return DBG_HOOK_HANDLED;
 }
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 7d6ea88796a6b..f37ab95676761 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -171,7 +171,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
 static int uprobe_breakpoint_handler(struct pt_regs *regs,
 		unsigned int esr)
 {
-	if (user_mode(regs) && uprobe_pre_sstep_notifier(regs))
+	if (uprobe_pre_sstep_notifier(regs))
 		return DBG_HOOK_HANDLED;
 
 	return DBG_HOOK_ERROR;
@@ -182,13 +182,9 @@ static int uprobe_single_step_handler(struct pt_regs *regs,
 {
 	struct uprobe_task *utask = current->utask;
 
-	if (user_mode(regs)) {
-		WARN_ON(utask &&
-			(instruction_pointer(regs) != utask->xol_vaddr + 4));
-
-		if (uprobe_post_sstep_notifier(regs))
-			return DBG_HOOK_HANDLED;
-	}
+	WARN_ON(utask && (instruction_pointer(regs) != utask->xol_vaddr + 4));
+	if (uprobe_post_sstep_notifier(regs))
+		return DBG_HOOK_HANDLED;
 
 	return DBG_HOOK_ERROR;
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 85fdc3d7c5566..091379744d2f1 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -947,9 +947,6 @@ int is_valid_bugaddr(unsigned long addr)
 
 static int bug_handler(struct pt_regs *regs, unsigned int esr)
 {
-	if (user_mode(regs))
-		return DBG_HOOK_ERROR;
-
 	switch (report_bug(regs->pc, regs)) {
 	case BUG_TRAP_TYPE_BUG:
 		die("Oops - BUG", regs, 0);
@@ -988,9 +985,6 @@ static int kasan_handler(struct pt_regs *regs, unsigned int esr)
 	u64 addr = regs->regs[0];
 	u64 pc = regs->pc;
 
-	if (user_mode(regs))
-		return DBG_HOOK_ERROR;
-
 	kasan_report(addr, size, write, pc);
 
 	/*
-- 
GitLab


From 453b7740ebfda2d84be7fb583c54f0c91c592869 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Feb 2019 15:06:42 +0000
Subject: [PATCH 0846/1861] arm64: probes: Move magic BRK values into brk-imm.h

kprobes and uprobes reserve some BRK immediates for installing their
probes. Define these along with the other reservations in brk-imm.h
and rename the ESR definitions to be consistent with the others that we
already have.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/brk-imm.h        | 4 ++++
 arch/arm64/include/asm/debug-monitors.h | 7 ++-----
 arch/arm64/include/asm/esr.h            | 4 +---
 arch/arm64/kernel/debug-monitors.c      | 2 +-
 arch/arm64/kernel/probes/kprobes.c      | 2 +-
 arch/arm64/kernel/probes/uprobes.c      | 2 +-
 arch/arm64/kernel/traps.c               | 2 +-
 7 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
index fec9e13846410..d84294064e6a0 100644
--- a/arch/arm64/include/asm/brk-imm.h
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -11,6 +11,8 @@
 
 /*
  * #imm16 values used for BRK instruction generation
+ * 0x004: for installing kprobes
+ * 0x005: for installing uprobes
  * Allowed values for kgdb are 0x400 - 0x7ff
  * 0x100: for triggering a fault on purpose (reserved)
  * 0x400: for dynamic BRK instruction
@@ -18,6 +20,8 @@
  * 0x800: kernel-mode BUG() and WARN() traps
  * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff)
  */
+#define KPROBES_BRK_IMM			0x004
+#define UPROBES_BRK_IMM			0x005
 #define FAULT_BRK_IMM			0x100
 #define KGDB_DYN_DBG_BRK_IMM		0x400
 #define KGDB_COMPILED_DBG_BRK_IMM	0x401
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7d37cfa5cc166..0679f781696d4 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -65,12 +65,9 @@
 #define CACHE_FLUSH_IS_SAFE		1
 
 /* kprobes BRK opcodes with ESR encoding  */
-#define BRK64_ESR_MASK		0xFFFF
-#define BRK64_ESR_KPROBES	0x0004
-#define BRK64_OPCODE_KPROBES	(AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5))
+#define BRK64_OPCODE_KPROBES	(AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
 /* uprobes BRK opcodes with ESR encoding  */
-#define BRK64_ESR_UPROBES	0x0005
-#define BRK64_OPCODE_UPROBES	(AARCH64_BREAK_MON | (BRK64_ESR_UPROBES << 5))
+#define BRK64_OPCODE_UPROBES	(AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
 
 /* AArch32 */
 #define DBG_ESR_EVT_BKPT	0x4
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 52233f00d53d8..3541720189c95 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -156,9 +156,7 @@
 				 ESR_ELx_WFx_ISS_WFI)
 
 /* BRK instruction trap from AArch64 state */
-#define ESR_ELx_VAL_BRK64(imm)					\
-	((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL |	\
-	 ((imm) & 0xffff))
+#define ESR_ELx_BRK64_ISS_COMMENT_MASK	0xffff
 
 /* ISS field definitions for System instruction traps */
 #define ESR_ELx_SYS64_ISS_RES0_SHIFT	22
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index f4d8cda8830d8..2692a0a27cf3a 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -317,7 +317,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(hook, list, node) {
-		unsigned int comment = esr & BRK64_ESR_MASK;
+		unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
 
 		if ((comment & ~hook->mask) == hook->imm)
 			fn = hook->fn;
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 000f32d1a7564..2509fcb6d4048 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -470,7 +470,7 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
 }
 
 static struct break_hook kprobes_break_hook = {
-	.imm = BRK64_ESR_KPROBES,
+	.imm = KPROBES_BRK_IMM,
 	.fn = kprobe_breakpoint_handler,
 };
 
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index f37ab95676761..605945eac1f84 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -191,7 +191,7 @@ static int uprobe_single_step_handler(struct pt_regs *regs,
 
 /* uprobe breakpoint handler hook */
 static struct break_hook uprobes_break_hook = {
-	.imm = BRK64_ESR_UPROBES,
+	.imm = UPROBES_BRK_IMM,
 	.fn = uprobe_breakpoint_handler,
 };
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 091379744d2f1..74598396e0bf4 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -1024,7 +1024,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
 		struct pt_regs *regs)
 {
 #ifdef CONFIG_KASAN_SW_TAGS
-	unsigned int comment = esr & BRK64_ESR_MASK;
+	unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
 
 	if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
 		return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
-- 
GitLab


From ab6211c90052435126ad1319e9223b68e154b9f0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Feb 2019 15:39:47 +0000
Subject: [PATCH 0847/1861] arm64: debug: Clean up brk_handler()

brk_handler() now looks pretty strange and can be refactored to drop its
funny 'handler_found' local variable altogether.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/debug-monitors.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 2692a0a27cf3a..800486cc48239 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -331,14 +331,12 @@ NOKPROBE_SYMBOL(call_break_hook);
 static int brk_handler(unsigned long unused, unsigned int esr,
 		       struct pt_regs *regs)
 {
-	bool handler_found = false;
-
-	if (!handler_found && call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
-		handler_found = true;
+	if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+		return 0;
 
-	if (!handler_found && user_mode(regs)) {
+	if (user_mode(regs)) {
 		send_user_sigtrap(TRAP_BRKPT);
-	} else if (!handler_found) {
+	} else {
 		pr_warn("Unexpected kernel BRK exception at EL1\n");
 		return -EFAULT;
 	}
-- 
GitLab


From 90292aca9854a2cbd904127337d6fea30f46290c Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Mon, 11 Mar 2019 18:57:46 -0600
Subject: [PATCH 0848/1861] arm64: mm: use appropriate ctors for page tables

For pte page, use pgtable_page_ctor(); for pmd page, use
pgtable_pmd_page_ctor(); and for the rest (pud, p4d and pgd),
don't use any.

For now, we don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK and
pgtable_pmd_page_ctor() is a nop. When we do in patch 3, we
make sure pmd is not folded so we won't mistakenly call
pgtable_pmd_page_ctor() on pud or p4d.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/mmu.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e97f018ff740f..66460c2074f69 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -97,7 +97,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
 
-static phys_addr_t __init early_pgtable_alloc(void)
+static phys_addr_t __init early_pgtable_alloc(int shift)
 {
 	phys_addr_t phys;
 	void *ptr;
@@ -174,7 +174,7 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
 static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 				unsigned long end, phys_addr_t phys,
 				pgprot_t prot,
-				phys_addr_t (*pgtable_alloc)(void),
+				phys_addr_t (*pgtable_alloc)(int),
 				int flags)
 {
 	unsigned long next;
@@ -184,7 +184,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 	if (pmd_none(pmd)) {
 		phys_addr_t pte_phys;
 		BUG_ON(!pgtable_alloc);
-		pte_phys = pgtable_alloc();
+		pte_phys = pgtable_alloc(PAGE_SHIFT);
 		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
 		pmd = READ_ONCE(*pmdp);
 	}
@@ -208,7 +208,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 
 static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
 		     phys_addr_t phys, pgprot_t prot,
-		     phys_addr_t (*pgtable_alloc)(void), int flags)
+		     phys_addr_t (*pgtable_alloc)(int), int flags)
 {
 	unsigned long next;
 	pmd_t *pmdp;
@@ -246,7 +246,7 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
 static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 				unsigned long end, phys_addr_t phys,
 				pgprot_t prot,
-				phys_addr_t (*pgtable_alloc)(void), int flags)
+				phys_addr_t (*pgtable_alloc)(int), int flags)
 {
 	unsigned long next;
 	pud_t pud = READ_ONCE(*pudp);
@@ -258,7 +258,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 	if (pud_none(pud)) {
 		phys_addr_t pmd_phys;
 		BUG_ON(!pgtable_alloc);
-		pmd_phys = pgtable_alloc();
+		pmd_phys = pgtable_alloc(PMD_SHIFT);
 		__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
 		pud = READ_ONCE(*pudp);
 	}
@@ -294,7 +294,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 
 static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 			   phys_addr_t phys, pgprot_t prot,
-			   phys_addr_t (*pgtable_alloc)(void),
+			   phys_addr_t (*pgtable_alloc)(int),
 			   int flags)
 {
 	unsigned long next;
@@ -304,7 +304,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	if (pgd_none(pgd)) {
 		phys_addr_t pud_phys;
 		BUG_ON(!pgtable_alloc);
-		pud_phys = pgtable_alloc();
+		pud_phys = pgtable_alloc(PUD_SHIFT);
 		__pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
 		pgd = READ_ONCE(*pgdp);
 	}
@@ -345,7 +345,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 				 unsigned long virt, phys_addr_t size,
 				 pgprot_t prot,
-				 phys_addr_t (*pgtable_alloc)(void),
+				 phys_addr_t (*pgtable_alloc)(int),
 				 int flags)
 {
 	unsigned long addr, length, end, next;
@@ -371,11 +371,23 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	} while (pgdp++, addr = next, addr != end);
 }
 
-static phys_addr_t pgd_pgtable_alloc(void)
+static phys_addr_t pgd_pgtable_alloc(int shift)
 {
 	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
-	if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
-		BUG();
+	BUG_ON(!ptr);
+
+	/*
+	 * Call proper page table ctor in case later we need to
+	 * call core mm functions like apply_to_page_range() on
+	 * this pre-allocated page table.
+	 *
+	 * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is
+	 * folded, and if so pgtable_pmd_page_ctor() becomes nop.
+	 */
+	if (shift == PAGE_SHIFT)
+		BUG_ON(!pgtable_page_ctor(virt_to_page(ptr)));
+	else if (shift == PMD_SHIFT)
+		BUG_ON(!pgtable_pmd_page_ctor(virt_to_page(ptr)));
 
 	/* Ensure the zeroed page is visible to the page table walker */
 	dsb(ishst);
-- 
GitLab


From 369aaab845a65b90ceac2801edd577fae442476c Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Mon, 11 Mar 2019 18:57:47 -0600
Subject: [PATCH 0849/1861] arm64: mm: don't call page table ctors for init_mm

init_mm doesn't require page table lock to be initialized at
any level. Add a separate page table allocator for it, and the
new one skips page table ctors.

The ctors allocate memory when ALLOC_SPLIT_PTLOCKS is set. Not
calling them avoids memory leak in case we call pte_free_kernel()
on init_mm.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/mmu.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 66460c2074f69..83a221d2d24c3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -371,6 +371,16 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	} while (pgdp++, addr = next, addr != end);
 }
 
+static phys_addr_t pgd_kernel_pgtable_alloc(int shift)
+{
+	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
+	BUG_ON(!ptr);
+
+	/* Ensure the zeroed page is visible to the page table walker */
+	dsb(ishst);
+	return __pa(ptr);
+}
+
 static phys_addr_t pgd_pgtable_alloc(int shift)
 {
 	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
@@ -595,7 +605,7 @@ static int __init map_entry_trampoline(void)
 	/* Map only the text into the trampoline page table */
 	memset(tramp_pg_dir, 0, PGD_SIZE);
 	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
-			     prot, pgd_pgtable_alloc, 0);
+			     prot, pgd_kernel_pgtable_alloc, 0);
 
 	/* Map both the text and data into the kernel page table */
 	__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
@@ -1067,7 +1077,8 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
-			     size, PAGE_KERNEL, pgd_pgtable_alloc, flags);
+			     size, PAGE_KERNEL, pgd_kernel_pgtable_alloc,
+			     flags);
 
 	return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
 			   altmap, want_memblock);
-- 
GitLab


From 14b94d07572619af896c6d2d83b1196c4041fe19 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 12 Mar 2019 18:55:45 +0530
Subject: [PATCH 0850/1861] KVM: ARM: Remove pgtable page standard functions
 from stage-2 page tables

ARM64 standard pgtable functions are going to use pgtable_page_[ctor|dtor]
or pgtable_pmd_page_[ctor|dtor] constructs. At present KVM guest stage-2
PUD|PMD|PTE level page tabe pages are allocated with __get_free_page()
via mmu_memory_cache_alloc() but released with standard pud|pmd_free() or
pte_free_kernel(). These will fail once they start calling into pgtable_
[pmd]_page_dtor() for pages which never originally went through respective
constructor functions. Hence convert all stage-2 page table page release
functions to call buddy directly while freeing pages.

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Acked-by: Yu Zhao <yuzhao@google.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/stage2_pgtable.h   | 4 ++--
 arch/arm64/include/asm/stage2_pgtable.h | 4 ++--
 virt/kvm/arm/mmu.c                      | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
index 9e11dce55e06f..9587517649bd7 100644
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -32,14 +32,14 @@
 #define stage2_pgd_present(kvm, pgd)		pgd_present(pgd)
 #define stage2_pgd_populate(kvm, pgd, pud)	pgd_populate(NULL, pgd, pud)
 #define stage2_pud_offset(kvm, pgd, address)	pud_offset(pgd, address)
-#define stage2_pud_free(kvm, pud)		pud_free(NULL, pud)
+#define stage2_pud_free(kvm, pud)		do { } while (0)
 
 #define stage2_pud_none(kvm, pud)		pud_none(pud)
 #define stage2_pud_clear(kvm, pud)		pud_clear(pud)
 #define stage2_pud_present(kvm, pud)		pud_present(pud)
 #define stage2_pud_populate(kvm, pud, pmd)	pud_populate(NULL, pud, pmd)
 #define stage2_pmd_offset(kvm, pud, address)	pmd_offset(pud, address)
-#define stage2_pmd_free(kvm, pmd)		pmd_free(NULL, pmd)
+#define stage2_pmd_free(kvm, pmd)		free_page((unsigned long)pmd)
 
 #define stage2_pud_huge(kvm, pud)		pud_huge(pud)
 
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index 5412fa40825e8..915809e4ac32d 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -119,7 +119,7 @@ static inline pud_t *stage2_pud_offset(struct kvm *kvm,
 static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
 {
 	if (kvm_stage2_has_pud(kvm))
-		pud_free(NULL, pud);
+		free_page((unsigned long)pud);
 }
 
 static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
@@ -192,7 +192,7 @@ static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
 static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
 {
 	if (kvm_stage2_has_pmd(kvm))
-		pmd_free(NULL, pmd);
+		free_page((unsigned long)pmd);
 }
 
 static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 27c958306449f..ad90ea3e5558a 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -189,7 +189,7 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
 	VM_BUG_ON(pmd_thp_or_huge(*pmd));
 	pmd_clear(pmd);
 	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pte_free_kernel(NULL, pte_table);
+	free_page((unsigned long)pte_table);
 	put_page(virt_to_page(pmd));
 }
 
-- 
GitLab


From 54c8d9119ec85de48fd0707946a5e57df9ad7acf Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Mon, 11 Mar 2019 18:57:49 -0600
Subject: [PATCH 0851/1861] arm64: mm: enable per pmd page table lock

Switch from per mm_struct to per pmd page table lock by enabling
ARCH_ENABLE_SPLIT_PMD_PTLOCK. This provides better granularity for
large system.

I'm not sure if there is contention on mm->page_table_lock. Given
the option comes at no cost (apart from initializing more spin
locks), why not enable it now.

We only do so when pmd is not folded, so we don't mistakenly call
pgtable_pmd_page_ctor() on pud or p4d in pgd_pgtable_alloc().

Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig               |  3 +++
 arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
 arch/arm64/include/asm/tlb.h     |  5 ++++-
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de1..555af50355920 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -889,6 +889,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
 config ARCH_HAS_CACHE_LINE_SIZE
 	def_bool y
 
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+	def_bool y if PGTABLE_LEVELS > 2
+
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	---help---
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 52fa47c73bf04..dabba4b2c61f1 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -33,12 +33,22 @@
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)__get_free_page(PGALLOC_GFP);
+	struct page *page;
+
+	page = alloc_page(PGALLOC_GFP);
+	if (!page)
+		return NULL;
+	if (!pgtable_pmd_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	return page_address(page);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
 {
 	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+	pgtable_pmd_page_dtor(virt_to_page(pmdp));
 	free_page((unsigned long)pmdp);
 }
 
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 106fdc951b6ee..4e3becfed3877 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -62,7 +62,10 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 				  unsigned long addr)
 {
-	tlb_remove_table(tlb, virt_to_page(pmdp));
+	struct page *page = virt_to_page(pmdp);
+
+	pgtable_pmd_page_dtor(page);
+	tlb_remove_table(tlb, page);
 }
 #endif
 
-- 
GitLab


From 475ba3fc194b6429eebcca0e1cf9917de6fa173e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Apr 2019 11:23:48 +0100
Subject: [PATCH 0852/1861] arm64: mm: Consolidate early page table allocation

The logic for early allocation of page tables is duplicated between
pgd_kernel_pgtable_alloc() and pgd_pgtable_alloc(). Drop the duplication
by calling one from the other and renaming pgd_kernel_pgtable_alloc()
accordingly.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/mmu.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 83a221d2d24c3..ef82312860ac3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -371,7 +371,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	} while (pgdp++, addr = next, addr != end);
 }
 
-static phys_addr_t pgd_kernel_pgtable_alloc(int shift)
+static phys_addr_t __pgd_pgtable_alloc(int shift)
 {
 	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
 	BUG_ON(!ptr);
@@ -383,8 +383,7 @@ static phys_addr_t pgd_kernel_pgtable_alloc(int shift)
 
 static phys_addr_t pgd_pgtable_alloc(int shift)
 {
-	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
-	BUG_ON(!ptr);
+	phys_addr_t pa = __pgd_pgtable_alloc(shift);
 
 	/*
 	 * Call proper page table ctor in case later we need to
@@ -395,13 +394,11 @@ static phys_addr_t pgd_pgtable_alloc(int shift)
 	 * folded, and if so pgtable_pmd_page_ctor() becomes nop.
 	 */
 	if (shift == PAGE_SHIFT)
-		BUG_ON(!pgtable_page_ctor(virt_to_page(ptr)));
+		BUG_ON(!pgtable_page_ctor(phys_to_page(pa)));
 	else if (shift == PMD_SHIFT)
-		BUG_ON(!pgtable_pmd_page_ctor(virt_to_page(ptr)));
+		BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa)));
 
-	/* Ensure the zeroed page is visible to the page table walker */
-	dsb(ishst);
-	return __pa(ptr);
+	return pa;
 }
 
 /*
@@ -605,7 +602,7 @@ static int __init map_entry_trampoline(void)
 	/* Map only the text into the trampoline page table */
 	memset(tramp_pg_dir, 0, PGD_SIZE);
 	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
-			     prot, pgd_kernel_pgtable_alloc, 0);
+			     prot, __pgd_pgtable_alloc, 0);
 
 	/* Map both the text and data into the kernel page table */
 	__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
@@ -1077,8 +1074,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
-			     size, PAGE_KERNEL, pgd_kernel_pgtable_alloc,
-			     flags);
+			     size, PAGE_KERNEL, __pgd_pgtable_alloc, flags);
 
 	return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
 			   altmap, want_memblock);
-- 
GitLab


From 6fda41bf12615ee7c3ddac88155099b1a8cf8d00 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Mon, 8 Apr 2019 18:17:18 +0100
Subject: [PATCH 0853/1861] arm64: Clear OSDLR_EL1 on CPU boot

Some firmwares may reboot CPUs with OS Double Lock set. Make sure that
it is unlocked, in order to use debug exceptions.

Cc: <stable@vger.kernel.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/debug-monitors.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 800486cc48239..555b6bd2f3d68 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -135,6 +135,7 @@ NOKPROBE_SYMBOL(disable_debug_monitors);
  */
 static int clear_os_lock(unsigned int cpu)
 {
+	write_sysreg(0, osdlr_el1);
 	write_sysreg(0, oslar_el1);
 	isb();
 	return 0;
-- 
GitLab


From 827a108e354db633698f0b4a10c1ffd2b1f8d1d0 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Mon, 8 Apr 2019 18:17:19 +0100
Subject: [PATCH 0854/1861] arm64: Save and restore OSDLR_EL1 across
 suspend/resume

When the CPU comes out of suspend, the firmware may have modified the OS
Double Lock Register. Save it in an unused slot of cpu_suspend_ctx, and
restore it on resume.

Cc: <stable@vger.kernel.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/proc.S | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index aa0817c9c4c36..fdd626d34274f 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -65,24 +65,25 @@ ENTRY(cpu_do_suspend)
 	mrs	x2, tpidr_el0
 	mrs	x3, tpidrro_el0
 	mrs	x4, contextidr_el1
-	mrs	x5, cpacr_el1
-	mrs	x6, tcr_el1
-	mrs	x7, vbar_el1
-	mrs	x8, mdscr_el1
-	mrs	x9, oslsr_el1
-	mrs	x10, sctlr_el1
+	mrs	x5, osdlr_el1
+	mrs	x6, cpacr_el1
+	mrs	x7, tcr_el1
+	mrs	x8, vbar_el1
+	mrs	x9, mdscr_el1
+	mrs	x10, oslsr_el1
+	mrs	x11, sctlr_el1
 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x11, tpidr_el1
+	mrs	x12, tpidr_el1
 alternative_else
-	mrs	x11, tpidr_el2
+	mrs	x12, tpidr_el2
 alternative_endif
-	mrs	x12, sp_el0
+	mrs	x13, sp_el0
 	stp	x2, x3, [x0]
-	stp	x4, xzr, [x0, #16]
-	stp	x5, x6, [x0, #32]
-	stp	x7, x8, [x0, #48]
-	stp	x9, x10, [x0, #64]
-	stp	x11, x12, [x0, #80]
+	stp	x4, x5, [x0, #16]
+	stp	x6, x7, [x0, #32]
+	stp	x8, x9, [x0, #48]
+	stp	x10, x11, [x0, #64]
+	stp	x12, x13, [x0, #80]
 	ret
 ENDPROC(cpu_do_suspend)
 
@@ -105,8 +106,8 @@ ENTRY(cpu_do_resume)
 	msr	cpacr_el1, x6
 
 	/* Don't change t0sz here, mask those bits when restoring */
-	mrs	x5, tcr_el1
-	bfi	x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+	mrs	x7, tcr_el1
+	bfi	x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
 
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
@@ -130,6 +131,7 @@ alternative_endif
 	/*
 	 * Restore oslsr_el1 by writing oslar_el1
 	 */
+	msr	osdlr_el1, x5
 	ubfx	x11, x11, #1, #1
 	msr	oslar_el1, x11
 	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
-- 
GitLab


From 78ad2341521d5ea96cb936244ed4c4c4ef9ec13b Mon Sep 17 00:00:00 2001
From: Alexander Wetzel <alexander@wetzel-home.de>
Date: Sat, 9 Feb 2019 15:01:38 +0100
Subject: [PATCH 0855/1861] mac80211: Honor SW_CRYPTO_CONTROL for unicast keys
 in AP VLAN mode

Restore SW_CRYPTO_CONTROL operation on AP_VLAN interfaces for unicast
keys, the original override was intended to be done for group keys as
those are treated specially by mac80211 and would always have been
rejected.

Now the situation is that AP_VLAN support must be enabled by the driver
if it can support it (meaning it can support software crypto GTK TX).

Thus, also simplify the code - if we get here with AP_VLAN and non-
pairwise key, software crypto must be used (driver doesn't know about
the interface) and can be used (driver must've advertised AP_VLAN if
it also uses SW_CRYPTO_CONTROL).

Fixes: db3bdcb9c3ff ("mac80211: allow AP_VLAN operation on crypto controlled devices")
Signed-off-by: Alexander Wetzel <alexander@wetzel-home.de>
[rewrite commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/key.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 4700718e010f5..37e372896230a 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -167,8 +167,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 		 * The driver doesn't know anything about VLAN interfaces.
 		 * Hence, don't send GTKs for VLAN interfaces to the driver.
 		 */
-		if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE))
+		if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+			ret = 1;
 			goto out_unsupported;
+		}
 	}
 
 	ret = drv_set_key(key->local, SET_KEY, sdata,
@@ -213,11 +215,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 		/* all of these we can do in software - if driver can */
 		if (ret == 1)
 			return 0;
-		if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) {
-			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-				return 0;
+		if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
 			return -EINVAL;
-		}
 		return 0;
 	default:
 		return -EINVAL;
-- 
GitLab


From bee58fe346750d53e3cf141554eea6164c8c748a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 9 Jan 2019 14:52:05 -0800
Subject: [PATCH 0856/1861] net/ipv4/netfilter: Update comment from
 call_rcu_bh() to call_rcu()

The RCU flavors have been consolidated, so this commit replaces a
comment's mention of call_rcu_bh() with call_rcu().

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: <netfilter-devel@vger.kernel.org>
Cc: <coreteam@netfilter.org>
Cc: <netdev@vger.kernel.org>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 835d50b279f56..a2a88ab07f7be 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -56,7 +56,7 @@ struct clusterip_config {
 #endif
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
-	struct rcu_head rcu;			/* for call_rcu_bh */
+	struct rcu_head rcu;			/* for call_rcu */
 	struct net *net;			/* netns for pernet list */
 	char ifname[IFNAMSIZ];			/* device ifname */
 };
-- 
GitLab


From 1b937e8faa87ccfb4b7d5b230796fa67bc8a183b Mon Sep 17 00:00:00 2001
From: Anup Patel <Anup.Patel@wdc.com>
Date: Tue, 12 Mar 2019 22:08:12 +0000
Subject: [PATCH 0857/1861] RISC-V: Add separate defconfig for 32bit systems

This patch adds rv32_defconfig for 32bit systems. The only
difference between rv32_defconfig and defconfig is that
rv32_defconfig has  CONFIG_ARCH_RV32I=y.

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/configs/rv32_defconfig | 84 +++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 arch/riscv/configs/rv32_defconfig

diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
new file mode 100644
index 0000000000000..1a911ed8e7728
--- /dev/null
+++ b/arch/riscv/configs/rv32_defconfig
@@ -0,0 +1,84 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_ARCH_RV32I=y
+CONFIG_SMP=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCIE_XILINX=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_MACB=y
+CONFIG_E1000E=y
+CONFIG_R8169=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
+CONFIG_HVC_RISCV_SBI=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_PLATFORM=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_SIFIVE_PLIC=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_RCU_TRACE is not set
-- 
GitLab


From 31634bf5dcc418b5b2cacd954394c0c4620db6a2 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Tue, 19 Mar 2019 22:09:05 -0700
Subject: [PATCH 0858/1861] net/mlx5: FPGA, tls, hold rcu read lock a bit
 longer

To avoid use-after-free, hold the rcu read lock until we are done copying
flow data into the command buffer.

Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fpga/tls.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index 8de64e88c670c..08aa7266c8c06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -217,22 +217,22 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
 	void *cmd;
 	int ret;
 
-	rcu_read_lock();
-	flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
-	rcu_read_unlock();
-
-	if (!flow) {
-		WARN_ONCE(1, "Received NULL pointer for handle\n");
-		return -EINVAL;
-	}
-
 	buf = kzalloc(size, GFP_ATOMIC);
 	if (!buf)
 		return -ENOMEM;
 
 	cmd = (buf + 1);
 
+	rcu_read_lock();
+	flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+	if (unlikely(!flow)) {
+		rcu_read_unlock();
+		WARN_ONCE(1, "Received NULL pointer for handle\n");
+		kfree(buf);
+		return -EINVAL;
+	}
 	mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+	rcu_read_unlock();
 
 	MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
 	MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
-- 
GitLab


From df3a8344d404a810b4aadbf19b08c8232fbaa715 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Tue, 19 Mar 2019 01:05:41 -0700
Subject: [PATCH 0859/1861] net/mlx5: FPGA, tls, idr remove on flow delete

Flow is kfreed on mlx5_fpga_tls_del_flow but kept in the idr data
structure, this is risky and can cause use-after-free, since the
idr_remove is delayed until tls_send_teardown_cmd completion.

Instead of delaying idr_remove, in this patch we do it on
mlx5_fpga_tls_del_flow, before actually kfree(flow).

Added synchronize_rcu before kfree(flow)

Fixes: ab412e1dd7db ("net/mlx5: Accel, add TLS rx offload routines")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/fpga/tls.c    | 43 +++++++------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index 08aa7266c8c06..22a2ef1115144 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
 	return ret;
 }
 
-static void mlx5_fpga_tls_release_swid(struct idr *idr,
-				       spinlock_t *idr_spinlock, u32 swid)
+static void *mlx5_fpga_tls_release_swid(struct idr *idr,
+					spinlock_t *idr_spinlock, u32 swid)
 {
 	unsigned long flags;
+	void *ptr;
 
 	spin_lock_irqsave(idr_spinlock, flags);
-	idr_remove(idr, swid);
+	ptr = idr_remove(idr, swid);
 	spin_unlock_irqrestore(idr_spinlock, flags);
+	return ptr;
 }
 
 static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
@@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
 	kfree(buf);
 }
 
-struct mlx5_teardown_stream_context {
-	struct mlx5_fpga_tls_command_context cmd;
-	u32 swid;
-};
-
 static void
 mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
 				  struct mlx5_fpga_device *fdev,
 				  struct mlx5_fpga_tls_command_context *cmd,
 				  struct mlx5_fpga_dma_buf *resp)
 {
-	struct mlx5_teardown_stream_context *ctx =
-		    container_of(cmd, struct mlx5_teardown_stream_context, cmd);
-
 	if (resp) {
 		u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
 
@@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
 			mlx5_fpga_err(fdev,
 				      "Teardown stream failed with syndrome = %d",
 				      syndrome);
-		else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
-			mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
-						   &fdev->tls->tx_idr_spinlock,
-						   ctx->swid);
-		else
-			mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
-						   &fdev->tls->rx_idr_spinlock,
-						   ctx->swid);
 	}
 	mlx5_fpga_tls_put_command_ctx(cmd);
 }
@@ -253,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
 static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
 					    void *flow, u32 swid, gfp_t flags)
 {
-	struct mlx5_teardown_stream_context *ctx;
+	struct mlx5_fpga_tls_command_context *ctx;
 	struct mlx5_fpga_dma_buf *buf;
 	void *cmd;
 
@@ -261,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
 	if (!ctx)
 		return;
 
-	buf = &ctx->cmd.buf;
+	buf = &ctx->buf;
 	cmd = (ctx + 1);
 	MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
 	MLX5_SET(tls_cmd, cmd, swid, swid);
@@ -272,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
 	buf->sg[0].data = cmd;
 	buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
 
-	ctx->swid = swid;
-	mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+	mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
 			       mlx5_fpga_tls_teardown_completion);
 }
 
@@ -283,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
 	struct mlx5_fpga_tls *tls = mdev->fpga->tls;
 	void *flow;
 
-	rcu_read_lock();
 	if (direction_sx)
-		flow = idr_find(&tls->tx_idr, swid);
+		flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
+						  &tls->tx_idr_spinlock,
+						  swid);
 	else
-		flow = idr_find(&tls->rx_idr, swid);
-
-	rcu_read_unlock();
+		flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
+						  &tls->rx_idr_spinlock,
+						  swid);
 
 	if (!flow) {
 		mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
@@ -297,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
 		return;
 	}
 
+	synchronize_rcu(); /* before kfree(flow) */
 	mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
 }
 
-- 
GitLab


From 192fba79822d9612af5ccd3f8aa05c922640ee13 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 28 Mar 2019 10:00:35 +0200
Subject: [PATCH 0860/1861] net/mlx5e: Skip un-needed tx recover if interface
 state is down

Skip recover operation if interface is in down state as TX objects are
not open. This fixes a bug were the recover flow re-opened TX objects
which were not opened before, leading to a possible memory leak at
driver unload.

Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 9d38e62cdf248..a85843e4e925d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -186,12 +186,18 @@ static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
 
 static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
 {
-	int err;
+	int err = 0;
 
 	rtnl_lock();
 	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto out;
+
 	mlx5e_close_locked(priv->netdev);
 	err = mlx5e_open_locked(priv->netdev);
+
+out:
 	mutex_unlock(&priv->state_lock);
 	rtnl_unlock();
 
-- 
GitLab


From 484c1ada0bd2bdcb76f849ae77983e24320a2d1d Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 28 Mar 2019 14:26:47 +0200
Subject: [PATCH 0861/1861] net/mlx5e: Use fail-safe channels reopen in tx
 reporter recover

When requested to recover from error, the tx reporter might open new
channels and close the existing ones. Use safe channels switch flow in
order to guarantee opened channels at the end of the recover flow.
For this purpose, define mlx5e_safe_reopen_channels function and use it
within those flows.

Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h        |  1 +
 .../ethernet/mellanox/mlx5/core/en/reporter_tx.c    |  3 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c   | 13 ++++++++++---
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 71c65cc179048..d3eaf2ceaa397 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -858,6 +858,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs);
  * switching channels
  */
 typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
 int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
 			       struct mlx5e_channels *new_chs,
 			       mlx5e_fp_hw_modify hw_modify);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index a85843e4e925d..476dd97f7f2f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -194,8 +194,7 @@ static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		goto out;
 
-	mlx5e_close_locked(priv->netdev);
-	err = mlx5e_open_locked(priv->netdev);
+	err = mlx5e_safe_reopen_channels(priv);
 
 out:
 	mutex_unlock(&priv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b5fdbd3190d9f..002e2adb37224 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2937,6 +2937,14 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
 	return 0;
 }
 
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
+{
+	struct mlx5e_channels new_channels = {};
+
+	new_channels.params = priv->channels.params;
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+}
+
 void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 {
 	priv->tstamp.tx_type   = HWTSTAMP_TX_OFF;
@@ -4161,11 +4169,10 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
 	if (!report_failed)
 		goto unlock;
 
-	mlx5e_close_locked(priv->netdev);
-	err = mlx5e_open_locked(priv->netdev);
+	err = mlx5e_safe_reopen_channels(priv);
 	if (err)
 		netdev_err(priv->netdev,
-			   "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+			   "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
 			   err);
 
 unlock:
-- 
GitLab


From 5d0bb3bac4b9f6c22280b04545626fdfd99edc6b Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 21 Mar 2019 19:07:20 -0700
Subject: [PATCH 0862/1861] net/mlx5e: XDP, Avoid checksum complete when XDP
 prog is loaded

XDP programs might change packets data contents which will make the
reported skb checksum (checksum complete) invalid.

When XDP programs are loaded/unloaded set/clear rx RQs
MLX5E_RQ_STATE_NO_CSUM_COMPLETE flag.

Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support")
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c    | 6 +++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c      | 3 ++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 5efce4a3ff796..76a3d01a489e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1768,7 +1768,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
 	struct mlx5e_channel *c;
 	int i;
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
+	    priv->channels.params.xdp_prog)
 		return 0;
 
 	for (i = 0; i < channels->num; i++) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 002e2adb37224..4187c43b20ada 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -951,7 +951,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
 	if (params->rx_dim_enabled)
 		__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 
-	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE))
+	/* We disable csum_complete when XDP is enabled since
+	 * XDP programs might manipulate packets which will render
+	 * skb->checksum incorrect.
+	 */
+	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
 		__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3dde5c7e0739a..91af48344743b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -752,7 +752,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		return;
 	}
 
-	if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)))
+	/* True when explicitly set via priv flag, or XDP prog is loaded */
+	if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
 		goto csum_unnecessary;
 
 	/* CQE csum doesn't cover padding octets in short ethernet
-- 
GitLab


From 0aa1d18615c163f92935b806dcaff9157645233a Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Tue, 12 Mar 2019 00:24:52 -0700
Subject: [PATCH 0863/1861] net/mlx5e: Rx, Fixup skb checksum for packets with
 tail padding

When an ethernet frame with ip payload is padded, the padding octets are
not covered by the hardware checksum.

Prior to the cited commit, skb checksum was forced to be CHECKSUM_NONE
when padding is detected. After it, the kernel will try to trim the
padding bytes and subtract their checksum from skb->csum.

In this patch we fixup skb->csum for any ip packet with tail padding of
any size, if any padding found.
FCS case is just one special case of this general purpose patch, hence,
it is removed.

Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"),
Cc: Eric Dumazet <edumazet@google.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 79 +++++++++++++++----
 .../ethernet/mellanox/mlx5/core/en_stats.c    |  6 ++
 .../ethernet/mellanox/mlx5/core/en_stats.h    |  4 +
 3 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 91af48344743b..0e254de23f3d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -712,17 +712,6 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
 	rq->stats->ecn_mark += !!rc;
 }
 
-static u32 mlx5e_get_fcs(const struct sk_buff *skb)
-{
-	const void *fcs_bytes;
-	u32 _fcs_bytes;
-
-	fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
-				       ETH_FCS_LEN, &_fcs_bytes);
-
-	return __get_unaligned_cpu32(fcs_bytes);
-}
-
 static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
 {
 	void *ip_p = skb->data + network_depth;
@@ -733,6 +722,68 @@ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
 
 #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
 
+#define MAX_PADDING 8
+
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+		       struct mlx5e_rq_stats *stats)
+{
+	stats->csum_complete_tail_slow++;
+	skb->csum = csum_block_add(skb->csum,
+				   skb_checksum(skb, offset, len, 0),
+				   offset);
+}
+
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+		  struct mlx5e_rq_stats *stats)
+{
+	u8 tail_padding[MAX_PADDING];
+	int len = skb->len - offset;
+	void *tail;
+
+	if (unlikely(len > MAX_PADDING)) {
+		tail_padding_csum_slow(skb, offset, len, stats);
+		return;
+	}
+
+	tail = skb_header_pointer(skb, offset, len, tail_padding);
+	if (unlikely(!tail)) {
+		tail_padding_csum_slow(skb, offset, len, stats);
+		return;
+	}
+
+	stats->csum_complete_tail++;
+	skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
+		       struct mlx5e_rq_stats *stats)
+{
+	struct ipv6hdr *ip6;
+	struct iphdr   *ip4;
+	int pkt_len;
+
+	switch (proto) {
+	case htons(ETH_P_IP):
+		ip4 = (struct iphdr *)(skb->data + network_depth);
+		pkt_len = network_depth + ntohs(ip4->tot_len);
+		break;
+	case htons(ETH_P_IPV6):
+		ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+		pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+		break;
+	default:
+		return;
+	}
+
+	if (likely(pkt_len >= skb->len))
+		return;
+
+	tail_padding_csum(skb, pkt_len, stats);
+}
+
 static inline void mlx5e_handle_csum(struct net_device *netdev,
 				     struct mlx5_cqe64 *cqe,
 				     struct mlx5e_rq *rq,
@@ -781,10 +832,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 			skb->csum = csum_partial(skb->data + ETH_HLEN,
 						 network_depth - ETH_HLEN,
 						 skb->csum);
-		if (unlikely(netdev->features & NETIF_F_RXFCS))
-			skb->csum = csum_block_add(skb->csum,
-						   (__force __wsum)mlx5e_get_fcs(skb),
-						   skb->len - ETH_FCS_LEN);
+
+		mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
 		stats->csum_complete++;
 		return;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 1a78e05cbba81..b75aa8b8bf04e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -59,6 +59,8 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
@@ -151,6 +153,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 		s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
 		s->rx_csum_none	+= rq_stats->csum_none;
 		s->rx_csum_complete += rq_stats->csum_complete;
+		s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+		s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
 		s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
 		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
 		s->rx_xdp_drop     += rq_stats->xdp_drop;
@@ -1190,6 +1194,8 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 4640d4f986f8c..16c3b785f282b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -71,6 +71,8 @@ struct mlx5e_sw_stats {
 	u64 rx_csum_unnecessary;
 	u64 rx_csum_none;
 	u64 rx_csum_complete;
+	u64 rx_csum_complete_tail;
+	u64 rx_csum_complete_tail_slow;
 	u64 rx_csum_unnecessary_inner;
 	u64 rx_xdp_drop;
 	u64 rx_xdp_redirect;
@@ -181,6 +183,8 @@ struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
 	u64 csum_complete;
+	u64 csum_complete_tail;
+	u64 csum_complete_tail_slow;
 	u64 csum_unnecessary;
 	u64 csum_unnecessary_inner;
 	u64 csum_none;
-- 
GitLab


From 0318a7b7fcad9765931146efa7ca3a034194737c Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Mon, 25 Mar 2019 22:10:59 -0700
Subject: [PATCH 0864/1861] net/mlx5e: Rx, Check ip headers sanity

In the two places is_last_ethertype_ip is being called, the caller will
be looking inside the ip header, to be safe, add ip{4,6} header sanity
check. And return true only on valid ip headers, i.e: the whole header
is contained in the linear part of the skb.

Note: Such situation is very rare and hard to reproduce, since mlx5e
allocates a large enough headroom to contain the largest header one can
imagine.

Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets")
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 0e254de23f3d7..36fd628188bc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -692,7 +692,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
 {
 	*proto = ((struct ethhdr *)skb->data)->h_proto;
 	*proto = __vlan_get_protocol(skb, *proto, network_depth);
-	return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
+
+	if (*proto == htons(ETH_P_IP))
+		return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+	if (*proto == htons(ETH_P_IPV6))
+		return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+	return false;
 }
 
 static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
-- 
GitLab


From 5e0060b1491b299b1706414e61ede0b02265680e Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dmitrolin@mellanox.com>
Date: Fri, 29 Mar 2019 12:50:37 +0000
Subject: [PATCH 0865/1861] net/mlx5e: Protect against non-uplink representor
 for encap

TC encap offload is supported only for the physical uplink
representor. Fail for non uplink representor.

Fixes: 3e621b19b0bb ("net/mlx5e: Support TC encapsulation offloads with upper devices")
Signed-off-by: Dmytro Linkin <dmitrolin@mellanox.com>
Reviewed-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index fa2a3c444cdc6..eec07b34b4ad0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -39,6 +39,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
 			return -EOPNOTSUPP;
 	}
 
+	if (!(mlx5e_eswitch_rep(*out_dev) &&
+	      mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
+		return -EOPNOTSUPP;
+
 	return 0;
 }
 
-- 
GitLab


From 8c8811d46d00d119ffbe039a6e52a0b504df1c2c Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 31 Mar 2019 12:53:03 +0000
Subject: [PATCH 0866/1861] Revert "net/mlx5e: Enable reporting checksum
 unnecessary also for L3 packets"

This reverts commit b820e6fb0978f9c2ac438c199d2bb2f35950e9c9.

Prior the commit we are reverting, checksum unnecessary was only set when
both the L3 OK and L4 OK bits are set on the CQE. This caused packets
of IP protocols such as SCTP which are not dealt by the current HW L4
parser (hence the L4 OK bit is not set, but the L4 header type none bit
is set) to go through the checksum none code, where currently we wrongly
report checksum unnecessary for them, a regression. Fix this by a revert.

Note that on our usual track we report checksum complete, so the revert
isn't expected to have any notable performance impact. Also, when we are
not on the checksum complete track, the L4 protocols for which we report
checksum none are not high performance ones, we will still report
checksum unnecessary for UDP/TCP.

Fixes: b820e6fb0978 ("net/mlx5e: Enable reporting checksum unnecessary also for L3 packets")
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Avi Urman <aviu@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 36fd628188bc0..c3b3002ff62f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -847,8 +847,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 
 csum_unnecessary:
 	if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
-		   ((cqe->hds_ip_ext & CQE_L4_OK) ||
-		    (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) {
+		   (cqe->hds_ip_ext & CQE_L4_OK))) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		if (cqe_is_tunneled(cqe)) {
 			skb->csum_level = 1;
-- 
GitLab


From 7ee2ace9c544a0886e02b54b625e521df8692d20 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 31 Aug 2018 14:29:16 +0300
Subject: [PATCH 0867/1861] net/mlx5e: Switch to Toeplitz RSS hash by default

Although XOR hash function can perform very well on some special use
cases, to align with all drivers, mlx5 driver should use Toeplitz hash
by default.
Toeplitz is more stable for the general use case and it is more standard
and reliable.

On top of that, since XOR (MLX5_RX_HASH_FN_INVERTED_XOR8) gives only a
repeated 8 bits pattern. When used for udp tunneling RSS source port
manipulation it results in fixed source port, which will cause bad RSS
spread.

Fixes: 2be6967cdbc9 ("net/mlx5e: Support ETH_RSS_HASH_XOR")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 4187c43b20ada..f7eb521db5800 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4564,7 +4564,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
 {
 	enum mlx5e_traffic_types tt;
 
-	rss_params->hfunc = ETH_RSS_HASH_XOR;
+	rss_params->hfunc = ETH_RSS_HASH_TOP;
 	netdev_rss_key_fill(rss_params->toeplitz_hash_key,
 			    sizeof(rss_params->toeplitz_hash_key));
 	mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
-- 
GitLab


From 49a27e279052cf71ba931a26b00194ff46510480 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 27 Mar 2019 15:35:45 +0100
Subject: [PATCH 0868/1861] PM / Domains: Add generic data pointer to struct
 genpd_power_state

Add a data pointer to the genpd_power_state struct, to allow a genpd
backend driver to store per-state specific data. To introduce the
pointer, change the way genpd deals with freeing of the corresponding
allocated data.

More precisely, clarify the responsibility of whom that shall free the
data, by adding a ->free_states() callback to the generic_pm_domain
structure. The one allocating the data will be expected to set the
callback, to allow genpd to invoke it from genpd_remove().

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 12 ++++++++++--
 include/linux/pm_domain.h   |  4 +++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 96a6dc9d305c8..ff6f992f7a1d1 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1686,6 +1686,12 @@ out:
 }
 EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
 
+static void genpd_free_default_power_state(struct genpd_power_state *states,
+					   unsigned int state_count)
+{
+	kfree(states);
+}
+
 static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
 {
 	struct genpd_power_state *state;
@@ -1696,7 +1702,7 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
 
 	genpd->states = state;
 	genpd->state_count = 1;
-	genpd->free = state;
+	genpd->free_states = genpd_free_default_power_state;
 
 	return 0;
 }
@@ -1812,7 +1818,9 @@ static int genpd_remove(struct generic_pm_domain *genpd)
 	list_del(&genpd->gpd_list_node);
 	genpd_unlock(genpd);
 	cancel_work_sync(&genpd->power_off_work);
-	kfree(genpd->free);
+	if (genpd->free_states)
+		genpd->free_states(genpd->states, genpd->state_count);
+
 	pr_debug("%s: removed %s\n", __func__, genpd->name);
 
 	return 0;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 1ed5874bcee09..8e1399231753c 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -69,6 +69,7 @@ struct genpd_power_state {
 	s64 residency_ns;
 	struct fwnode_handle *fwnode;
 	ktime_t idle_time;
+	void *data;
 };
 
 struct genpd_lock_ops;
@@ -110,9 +111,10 @@ struct generic_pm_domain {
 			   struct device *dev);
 	unsigned int flags;		/* Bit field of configs for genpd */
 	struct genpd_power_state *states;
+	void (*free_states)(struct genpd_power_state *states,
+			    unsigned int state_count);
 	unsigned int state_count; /* number of states */
 	unsigned int state_idx; /* state that genpd will go to when off */
-	void *free; /* Free the state that was allocated for default */
 	ktime_t on_time;
 	ktime_t accounting_time;
 	const struct genpd_lock_ops *lock_ops;
-- 
GitLab


From eb594b7325f61835555140922a4cb715264a325c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 27 Mar 2019 15:35:46 +0100
Subject: [PATCH 0869/1861] PM / Domains: Add support for CPU devices to genpd

To enable a CPU device to be attached to a PM domain managed by genpd,
make a few changes to it for convenience.

To be able to quickly find out what CPUs are attached to a genpd,
which typically becomes useful from a genpd governor as subsequent
changes are about to show, add a cpumask to struct generic_pm_domain
to be updated when a CPU device gets attached to the genpd containing
that cpumask. Also, propagate the cpumask changes upwards in the
domain hierarchy to the master PM domains. This way, the cpumask for
a genpd hierarchically reflects all CPUs attached to the topology
below it.

Finally, make this an opt-in feature, to avoid having to manage CPUs
and the cpumask for a genpd that don't need it. To that end, add
a new genpd configuration bit, GENPD_FLAG_CPU_DOMAIN.

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 65 ++++++++++++++++++++++++++++++++++++-
 include/linux/pm_domain.h   | 13 ++++++++
 2 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index ff6f992f7a1d1..ecac03dcc9b26 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -22,6 +22,7 @@
 #include <linux/sched.h>
 #include <linux/suspend.h>
 #include <linux/export.h>
+#include <linux/cpu.h>
 
 #include "power.h"
 
@@ -128,6 +129,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
 #define genpd_is_irq_safe(genpd)	(genpd->flags & GENPD_FLAG_IRQ_SAFE)
 #define genpd_is_always_on(genpd)	(genpd->flags & GENPD_FLAG_ALWAYS_ON)
 #define genpd_is_active_wakeup(genpd)	(genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP)
+#define genpd_is_cpu_domain(genpd)	(genpd->flags & GENPD_FLAG_CPU_DOMAIN)
 
 static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
 		const struct generic_pm_domain *genpd)
@@ -1454,6 +1456,56 @@ static void genpd_free_dev_data(struct device *dev,
 	dev_pm_put_subsys_data(dev);
 }
 
+static void __genpd_update_cpumask(struct generic_pm_domain *genpd,
+				   int cpu, bool set, unsigned int depth)
+{
+	struct gpd_link *link;
+
+	if (!genpd_is_cpu_domain(genpd))
+		return;
+
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		struct generic_pm_domain *master = link->master;
+
+		genpd_lock_nested(master, depth + 1);
+		__genpd_update_cpumask(master, cpu, set, depth + 1);
+		genpd_unlock(master);
+	}
+
+	if (set)
+		cpumask_set_cpu(cpu, genpd->cpus);
+	else
+		cpumask_clear_cpu(cpu, genpd->cpus);
+}
+
+static void genpd_update_cpumask(struct generic_pm_domain *genpd,
+				 struct device *dev, bool set)
+{
+	int cpu;
+
+	if (!genpd_is_cpu_domain(genpd))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		if (get_cpu_device(cpu) == dev) {
+			__genpd_update_cpumask(genpd, cpu, set, 0);
+			return;
+		}
+	}
+}
+
+static void genpd_set_cpumask(struct generic_pm_domain *genpd,
+			      struct device *dev)
+{
+	genpd_update_cpumask(genpd, dev, true);
+}
+
+static void genpd_clear_cpumask(struct generic_pm_domain *genpd,
+				struct device *dev)
+{
+	genpd_update_cpumask(genpd, dev, false);
+}
+
 static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 			    struct gpd_timing_data *td)
 {
@@ -1475,6 +1527,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 
 	genpd_lock(genpd);
 
+	genpd_set_cpumask(genpd, dev);
 	dev_pm_domain_set(dev, &genpd->domain);
 
 	genpd->device_count++;
@@ -1532,6 +1585,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
 	genpd->device_count--;
 	genpd->max_off_time_changed = true;
 
+	genpd_clear_cpumask(genpd, dev);
 	dev_pm_domain_set(dev, NULL);
 
 	list_del_init(&pdd->list_node);
@@ -1768,11 +1822,18 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 	if (genpd_is_always_on(genpd) && !genpd_status_on(genpd))
 		return -EINVAL;
 
+	if (genpd_is_cpu_domain(genpd) &&
+	    !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
+		return -ENOMEM;
+
 	/* Use only one "off" state if there were no states declared */
 	if (genpd->state_count == 0) {
 		ret = genpd_set_default_power_state(genpd);
-		if (ret)
+		if (ret) {
+			if (genpd_is_cpu_domain(genpd))
+				free_cpumask_var(genpd->cpus);
 			return ret;
+		}
 	} else if (!gov && genpd->state_count > 1) {
 		pr_warn("%s: no governor for states\n", genpd->name);
 	}
@@ -1818,6 +1879,8 @@ static int genpd_remove(struct generic_pm_domain *genpd)
 	list_del(&genpd->gpd_list_node);
 	genpd_unlock(genpd);
 	cancel_work_sync(&genpd->power_off_work);
+	if (genpd_is_cpu_domain(genpd))
+		free_cpumask_var(genpd->cpus);
 	if (genpd->free_states)
 		genpd->free_states(genpd->states, genpd->state_count);
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 8e1399231753c..a6e251fe9deb7 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
+#include <linux/cpumask.h>
 
 /*
  * Flags to control the behaviour of a genpd.
@@ -42,11 +43,22 @@
  * GENPD_FLAG_ACTIVE_WAKEUP:	Instructs genpd to keep the PM domain powered
  *				on, in case any of its attached devices is used
  *				in the wakeup path to serve system wakeups.
+ *
+ * GENPD_FLAG_CPU_DOMAIN:	Instructs genpd that it should expect to get
+ *				devices attached, which may belong to CPUs or
+ *				possibly have subdomains with CPUs attached.
+ *				This flag enables the genpd backend driver to
+ *				deploy idle power management support for CPUs
+ *				and groups of CPUs. Note that, the backend
+ *				driver must then comply with the so called,
+ *				last-man-standing algorithm, for the CPUs in the
+ *				PM domain.
  */
 #define GENPD_FLAG_PM_CLK	 (1U << 0)
 #define GENPD_FLAG_IRQ_SAFE	 (1U << 1)
 #define GENPD_FLAG_ALWAYS_ON	 (1U << 2)
 #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3)
+#define GENPD_FLAG_CPU_DOMAIN	 (1U << 4)
 
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
@@ -94,6 +106,7 @@ struct generic_pm_domain {
 	unsigned int suspended_count;	/* System suspend device counter */
 	unsigned int prepared_count;	/* Suspend counter of prepared devices */
 	unsigned int performance_state;	/* Aggregated max performance state */
+	cpumask_var_t cpus;		/* A cpumask of the attached CPUs */
 	int (*power_off)(struct generic_pm_domain *domain);
 	int (*power_on)(struct generic_pm_domain *domain);
 	struct opp_table *opp_table;	/* OPP table of the genpd */
-- 
GitLab


From 6f9b83ac877fb5558d76b9f78590f3afd1bdf421 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 27 Mar 2019 15:35:47 +0100
Subject: [PATCH 0870/1861] cpuidle: Export the next timer expiration for CPUs

To be able to predict the sleep duration for a CPU entering idle, it
is essential to know the expiration time of the next timer.  Both the
teo and the menu cpuidle governors already use this information for
CPU idle state selection.

Moving forward, a similar prediction needs to be made for a group of
idle CPUs rather than for a single one and the following changes
implement a new genpd governor for that purpose.

In order to support that feature, add a new function called
tick_nohz_get_next_hrtimer() that will return the next hrtimer
expiration time of a given CPU to be invoked after deciding
whether or not to stop the scheduler tick on that CPU.

Make the cpuidle core call tick_nohz_get_next_hrtimer() right
before invoking the ->enter() callback provided by the cpuidle
driver for the given state and store its return value in the
per-CPU struct cpuidle_device, so as to make it available to code
outside of cpuidle.

Note that at the point when cpuidle calls tick_nohz_get_next_hrtimer(),
the governor's ->select() callback has already returned and indicated
whether or not the tick should be stopped, so in fact the value
returned by tick_nohz_get_next_hrtimer() always is the next hrtimer
expiration time for the given CPU, possibly including the tick (if
it hasn't been stopped).

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Co-developed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c | 19 +++++++++++++++++--
 include/linux/cpuidle.h   |  1 +
 include/linux/tick.h      |  7 ++++++-
 kernel/time/tick-sched.c  | 12 ++++++++++++
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 7f108309e871e..0f4b7c45df3e1 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		  int index)
 {
+	int ret = 0;
+
+	/*
+	 * Store the next hrtimer, which becomes either next tick or the next
+	 * timer event, whatever expires first. Additionally, to make this data
+	 * useful for consumers outside cpuidle, we rely on that the governor's
+	 * ->select() callback have decided, whether to stop the tick or not.
+	 */
+	WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer());
+
 	if (cpuidle_state_is_coupled(drv, index))
-		return cpuidle_enter_state_coupled(dev, drv, index);
-	return cpuidle_enter_state(dev, drv, index);
+		ret = cpuidle_enter_state_coupled(dev, drv, index);
+	else
+		ret = cpuidle_enter_state(dev, drv, index);
+
+	WRITE_ONCE(dev->next_hrtimer, 0);
+	return ret;
 }
 
 /**
@@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev)
 {
 	memset(dev->states_usage, 0, sizeof(dev->states_usage));
 	dev->last_residency = 0;
+	dev->next_hrtimer = 0;
 }
 
 /**
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3b39472324a31..bb9a0db89f1ab 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -83,6 +83,7 @@ struct cpuidle_device {
 	unsigned int		use_deepest_state:1;
 	unsigned int		poll_time_limit:1;
 	unsigned int		cpu;
+	ktime_t			next_hrtimer;
 
 	int			last_residency;
 	struct cpuidle_state_usage	states_usage[CPUIDLE_STATE_MAX];
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 55388ab45fd4d..8891b5ac3e403 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -122,6 +122,7 @@ extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern bool tick_nohz_idle_got_tick(void);
+extern ktime_t tick_nohz_get_next_hrtimer(void);
 extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
 extern unsigned long tick_nohz_get_idle_calls(void);
 extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
@@ -145,7 +146,11 @@ static inline void tick_nohz_idle_restart_tick(void) { }
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
 static inline bool tick_nohz_idle_got_tick(void) { return false; }
-
+static inline ktime_t tick_nohz_get_next_hrtimer(void)
+{
+	/* Next wake up is the tick period, assume it starts now */
+	return ktime_add(ktime_get(), TICK_NSEC);
+}
 static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
 {
 	*delta_next = TICK_NSEC;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6fa52cd6df0be..8d18e03124ff4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1022,6 +1022,18 @@ bool tick_nohz_idle_got_tick(void)
 	return false;
 }
 
+/**
+ * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
+ * or the tick, whatever that expires first. Note that, if the tick has been
+ * stopped, it returns the next hrtimer.
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_next_hrtimer(void)
+{
+	return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
+}
+
 /**
  * tick_nohz_get_sleep_length - return the expected length of the current sleep
  * @delta_next: duration until the next event if the tick cannot be stopped
-- 
GitLab


From dbe7208c6c4aec083571f2ec742870a0d0edbea3 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Sun, 7 Apr 2019 11:12:48 -0700
Subject: [PATCH 0871/1861] power: supply: cpcap-battery: Fix division by zero

If called fast enough so samples do not increment, we can get
division by zero in kernel:

__div0
cpcap_battery_cc_raw_div
cpcap_battery_get_property
power_supply_get_property.part.1
power_supply_get_property
power_supply_show_property
power_supply_uevent

Fixes: 874b2adbed12 ("power: supply: cpcap-battery: Add a battery driver")
Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/cpcap-battery.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c
index 08d5037fd0521..6887870ba32c3 100644
--- a/drivers/power/supply/cpcap-battery.c
+++ b/drivers/power/supply/cpcap-battery.c
@@ -221,6 +221,9 @@ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata,
 	int avg_current;
 	u32 cc_lsb;
 
+	if (!divider)
+		return 0;
+
 	sample &= 0xffffff;		/* 24-bits, unsigned */
 	offset &= 0x7ff;		/* 10-bits, signed */
 
-- 
GitLab


From 6ec4bae178d8a1e9814eb3bfdd321b0475de0468 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul.walmsley@sifive.com>
Date: Wed, 3 Apr 2019 17:38:20 -0700
Subject: [PATCH 0872/1861] dt-bindings: clock: sifive: add FU540-C000 PRCI
 clock constants

Add preprocessor macros for the important PRCI output clocks
that are needed by both the FU540 PRCI driver and DT data.
Details are available in the FU540 manual in Chapter 7 of

    https://static.dev.sifive.com/FU540-C000-v1.0.pdf

Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 include/dt-bindings/clock/sifive-fu540-prci.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 include/dt-bindings/clock/sifive-fu540-prci.h

diff --git a/include/dt-bindings/clock/sifive-fu540-prci.h b/include/dt-bindings/clock/sifive-fu540-prci.h
new file mode 100644
index 0000000000000..6a0b70a37d785
--- /dev/null
+++ b/include/dt-bindings/clock/sifive-fu540-prci.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018-2019 SiFive, Inc.
+ * Wesley Terpstra
+ * Paul Walmsley
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H
+#define __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H
+
+/* Clock indexes for use by Device Tree data and the PRCI driver */
+
+#define PRCI_CLK_COREPLL	       0
+#define PRCI_CLK_DDRPLL		       1
+#define PRCI_CLK_GEMGXLPLL	       2
+#define PRCI_CLK_TLCLK		       3
+
+#endif
-- 
GitLab


From 771acc7e4a6e5dba779cb1a7fd851a164bc81033 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Tue, 9 Apr 2019 11:49:17 -0700
Subject: [PATCH 0873/1861] Bluetooth: btusb: request wake pin with NOAUTOEN

Badly-designed systems might have (for example) active-high wake pins
that default to high (e.g., because of external pull ups) until they
have an active firmware which starts driving it low.  This can cause an
interrupt storm in the time between request_irq() and disable_irq().

We don't support shared interrupts here, so let's just pre-configure the
interrupt to avoid auto-enabling it.

Fixes: fd913ef7ce61 ("Bluetooth: btusb: Add out-of-band wakeup support")
Fixes: 5364a0b4f4be ("arm64: dts: rockchip: move QCA6174A wakeup pin into its USB node")
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/bluetooth/btusb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index ded198328f216..7db48ae65cd2d 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2942,6 +2942,7 @@ static int btusb_config_oob_wake(struct hci_dev *hdev)
 		return 0;
 	}
 
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
 	ret = devm_request_irq(&hdev->dev, irq, btusb_oob_wake_handler,
 			       0, "OOB Wake-on-BT", data);
 	if (ret) {
@@ -2956,7 +2957,6 @@ static int btusb_config_oob_wake(struct hci_dev *hdev)
 	}
 
 	data->oob_wake_irq = irq;
-	disable_irq(irq);
 	bt_dev_info(hdev, "OOB Wake-on-BT configured at IRQ %u", irq);
 	return 0;
 }
-- 
GitLab


From cf7cf6977f531acd5dfe55250d0ee8cbbb6f1ae8 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 9 Apr 2019 15:43:11 +1000
Subject: [PATCH 0874/1861] powerpc/mm: Define MAX_PHYSMEM_BITS for all 64-bit
 configs

The recent commit 8bc086899816 ("powerpc/mm: Only define
MAX_PHYSMEM_BITS in SPARSEMEM configurations") removed our definition
of MAX_PHYSMEM_BITS when SPARSEMEM is disabled.

This inadvertently broke some 64-bit FLATMEM using configs with eg:

  arch/powerpc/include/asm/book3s/64/mmu-hash.h:584:6: error: "MAX_PHYSMEM_BITS" is not defined, evaluates to 0
   #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
        ^~~~~~~~~~~~~~~~

Fix it by making sure we define MAX_PHYSMEM_BITS for all 64-bit
configs regardless of SPARSEMEM.

Fixes: 8bc086899816 ("powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations")
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Reported-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 598cdcdd13553..8ddd4a91bdc1e 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void)
 #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) &&	\
 	defined (CONFIG_PPC_64K_PAGES)
 #define MAX_PHYSMEM_BITS        51
-#elif defined(CONFIG_SPARSEMEM)
+#elif defined(CONFIG_PPC64)
 #define MAX_PHYSMEM_BITS        46
 #endif
 
-- 
GitLab


From 2f36bde0fc8f1ab79d54bd2caa7c1cf874fd2206 Mon Sep 17 00:00:00 2001
From: "Andrew-sh.Cheng" <andrew-sh.cheng@mediatek.com>
Date: Fri, 29 Mar 2019 14:46:10 +0800
Subject: [PATCH 0875/1861] OPP: Introduce dev_pm_opp_find_freq_ceil_by_volt()

This patch introduces a new helper routine in the OPP core, which
returns the OPP with the highest frequency which has voltage less than
or equal to the target voltage passed to the helper.

Signed-off-by: Andrew-sh.Cheng <andrew-sh.cheng@mediatek.com>
[ Viresh: Massaged the commit log and renamed the helper with some
cleanups. ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 54 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h |  8 +++++++
 2 files changed, 62 insertions(+)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 0420f7e8ad5b0..0e7703fe733fc 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -526,6 +526,60 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
+/**
+ * dev_pm_opp_find_freq_ceil_by_volt() - Find OPP with highest frequency for
+ *					 target voltage.
+ * @dev:	Device for which we do this operation.
+ * @u_volt:	Target voltage.
+ *
+ * Search for OPP with highest (ceil) frequency and has voltage <= u_volt.
+ *
+ * Return: matching *opp, else returns ERR_PTR in case of error which should be
+ * handled using IS_ERR.
+ *
+ * Error return values can be:
+ * EINVAL:	bad parameters
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev,
+						     unsigned long u_volt)
+{
+	struct opp_table *opp_table;
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+
+	if (!dev || !u_volt) {
+		dev_err(dev, "%s: Invalid argument volt=%lu\n", __func__,
+			u_volt);
+		return ERR_PTR(-EINVAL);
+	}
+
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table))
+		return ERR_CAST(opp_table);
+
+	mutex_lock(&opp_table->lock);
+
+	list_for_each_entry(temp_opp, &opp_table->opp_list, node) {
+		if (temp_opp->available) {
+			if (temp_opp->supplies[0].u_volt > u_volt)
+				break;
+			opp = temp_opp;
+		}
+	}
+
+	/* Increment the reference count of OPP */
+	if (!IS_ERR(opp))
+		dev_pm_opp_get(opp);
+
+	mutex_unlock(&opp_table->lock);
+	dev_pm_opp_put_opp_table(opp_table);
+
+	return opp;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_by_volt);
+
 static int _set_opp_voltage(struct device *dev, struct regulator *reg,
 			    struct dev_pm_opp_supply *supply)
 {
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 24c757a32a7b1..b150fe97ce5ae 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -102,6 +102,8 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 
 struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					      unsigned long *freq);
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev,
+						     unsigned long u_volt);
 
 struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					     unsigned long *freq);
@@ -207,6 +209,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 	return ERR_PTR(-ENOTSUPP);
 }
 
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev,
+					unsigned long u_volt)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					unsigned long *freq)
 {
-- 
GitLab


From d8743230c9f4e92f370ecd2a90c680ddcede6ae5 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Tue, 9 Apr 2019 18:35:46 +0100
Subject: [PATCH 0876/1861] sched/topology: Fix build_sched_groups() comment

The comment was introduced (pre 2.6.12) by:

  8a7a2318dc07 ("[PATCH] sched: consolidate sched domains")

and referred to sched_group->cpu_power. This was folded into
sched_group->sched_group_power in

  commit 9c3f75cbd144 ("sched: Break out cpu_power from the sched_group structure")

The comment was then updated in:

  ced549fa5fc1 ("sched: Remove remaining dubious usage of "power"")

but should have replaced "sg->cpu_capacity" with
"sg->sched_group_capacity". Do that now.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Cc: Dietmar.Eggemann@arm.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: morten.rasmussen@arm.com
Cc: qais.yousef@arm.com
Link: http://lkml.kernel.org/r/20190409173546.4747-3-valentin.schneider@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/topology.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 64bec54ded3e0..90e1a870fb0df 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1087,8 +1087,8 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd)
 
 /*
  * build_sched_groups will build a circular linked list of the groups
- * covered by the given span, and will set each group's ->cpumask correctly,
- * and ->cpu_capacity to 0.
+ * covered by the given span, will set each group's ->cpumask correctly,
+ * and will initialize their ->sgc.
  *
  * Assumes the sched_domain tree is fully constructed
  */
-- 
GitLab


From 67d4f6ff2fb69e02bd6365a91ca3939b7a14deac Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Tue, 9 Apr 2019 18:35:45 +0100
Subject: [PATCH 0877/1861] sched/topology: Skip duplicate group rewrites in
 build_sched_groups()

While staring at build_sched_domains(), I realized that get_group()
does several duplicate (thus useless) writes.

If you take the Arm Juno r0 (LITTLEs = [0, 3, 4, 5], bigs = [1, 2]), the
sched_group build flow would look like this:

('MC[cpu]->sg' means 'per_cpu_ptr(&tl->data->sg, cpu)' with 'tl == MC')

build_sched_groups(MC[CPU0]->sd, CPU0)
  get_group(0) -> MC[CPU0]->sg
  get_group(3) -> MC[CPU3]->sg
  get_group(4) -> MC[CPU4]->sg
  get_group(5) -> MC[CPU5]->sg

build_sched_groups(DIE[CPU0]->sd, CPU0)
  get_group(0) -> DIE[CPU0]->sg
  get_group(1) -> DIE[CPU1]->sg <=================+
						  |
build_sched_groups(MC[CPU1]->sd, CPU1)            |
  get_group(1) -> MC[CPU1]->sg                    |
  get_group(2) -> MC[CPU2]->sg                    |
						  |
build_sched_groups(DIE[CPU1]->sd, CPU1)           ^
  get_group(1) -> DIE[CPU1]->sg  } We've set up these two up here!
  get_group(3) -> DIE[CPU0]->sg  }

From this point on, we will only use sched_groups that have been
previously visited & initialized. The only new operation will
be which group pointer we affect to sd->groups.

On the Juno r0 we get 32 get_group() calls, every single one of them
writing to a sched_group->cpumask. However, all of the data structures
we need are set up after 8 visits (see above).

Return early from get_group() if we've already visited (and thus
initialized) the sched_group we're looking at. Overlapping domains
are not affected as they do not use build_sched_groups().

Tested on a Juno and a 2 * (Xeon E5-2690) system.

( FWIW I initially checked the refs for both sg && sg->sgc, but figured if
  they weren't both 0 or > 1 then something must have gone wrong, so I
  threw in a WARN_ON(). )

No change in functionality intended.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/topology.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 90e1a870fb0df..c65b31e9458be 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1059,6 +1059,7 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd)
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
 	struct sched_domain *child = sd->child;
 	struct sched_group *sg;
+	bool already_visited;
 
 	if (child)
 		cpu = cpumask_first(sched_domain_span(child));
@@ -1066,9 +1067,14 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd)
 	sg = *per_cpu_ptr(sdd->sg, cpu);
 	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
 
-	/* For claim_allocations: */
-	atomic_inc(&sg->ref);
-	atomic_inc(&sg->sgc->ref);
+	/* Increase refcounts for claim_allocations: */
+	already_visited = atomic_inc_return(&sg->ref) > 1;
+	/* sgc visits should follow a similar trend as sg */
+	WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));
+
+	/* If we have already visited that group, it's already initialized. */
+	if (already_visited)
+		return sg;
 
 	if (child) {
 		cpumask_copy(sched_group_span(sg), sched_domain_span(child));
-- 
GitLab


From 547571b5abe61bb33c6005d8981e86e3c61fedcc Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Wed, 27 Mar 2019 09:15:19 -0600
Subject: [PATCH 0878/1861] x86/asm: Modernize sync_bitops.h

Add missing instruction suffixes and use rmwcc.h just like was (more or less)
recently done for bitops.h as well, see:

  22636f8c9511: x86/asm: Add instruction suffixes to bitops
  288e4521f0f6: x86/asm: 'Simplify' GEN_*_RMWcc() macros

No change in functionality intended.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/5C9B93870200007800222289@prv1-mh.provo.novell.com
[ Cleaned up the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/sync_bitops.h | 31 +++++++++---------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 2fe745356fb11..6d8d6bc183b74 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  */
 
+#include <asm/rmwcc.h>
+
 #define ADDR (*(volatile long *)addr)
 
 /**
@@ -29,7 +31,7 @@
  */
 static inline void sync_set_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; bts %1,%0"
+	asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btr %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline void sync_change_bit(long nr, volatile unsigned long *addr)
 {
-	asm volatile("lock; btc %1,%0"
+	asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
 		     : "+m" (ADDR)
 		     : "Ir" (nr)
 		     : "memory");
@@ -78,14 +80,9 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; bts %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
 }
 
 /**
@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btr %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
 }
 
 /**
@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
-
-	asm volatile("lock; btc %2,%1\n\tsetc %0"
-		     : "=qm" (oldbit), "+m" (ADDR)
-		     : "Ir" (nr) : "memory");
-	return oldbit;
+	return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
 }
 
 #define sync_test_bit(nr, addr) test_bit(nr, addr)
-- 
GitLab


From 9df1e2c60d2b90024d5c2544f07ac9f539524322 Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Wed, 10 Apr 2019 11:59:52 +0800
Subject: [PATCH 0879/1861] cpufreq: boost: Remove CONFIG_CPU_FREQ_BOOST_SW
 Kconfig option

Commit 2fb4719b2560 ("cpufreq / boost: Kconfig: Support for
software-managed BOOST") added the CONFIG_CPU_FREQ_BOOST_SW config.
However EXYNOS based cpufreq drivers have been removed because of
switching to cpufreq-dt driver which will set boost-attr if required.

So, let's remove this option and update cpufreq_generic_attr[].

Signed-off-by: Yue Hu <huyue2@yulong.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig      | 4 ----
 drivers/cpufreq/freq_table.c | 3 ---
 2 files changed, 7 deletions(-)

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index b22e6bba71f15..4d2b33a30292a 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -26,10 +26,6 @@ config CPU_FREQ_GOV_COMMON
 	select IRQ_WORK
 	bool
 
-config CPU_FREQ_BOOST_SW
-	bool
-	depends on THERMAL
-
 config CPU_FREQ_STAT
 	bool "CPU frequency transition statistics"
 	help
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 3a8cc99e6815f..e7be0af3199f8 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -290,9 +290,6 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_boost_freqs);
 
 struct freq_attr *cpufreq_generic_attr[] = {
 	&cpufreq_freq_attr_scaling_available_freqs,
-#ifdef CONFIG_CPU_FREQ_BOOST_SW
-	&cpufreq_freq_attr_scaling_boost_freqs,
-#endif
 	NULL,
 };
 EXPORT_SYMBOL_GPL(cpufreq_generic_attr);
-- 
GitLab


From f2a424f6c613a98560dc49fd9984589401d51648 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Sat, 16 Mar 2019 00:59:25 -0400
Subject: [PATCH 0880/1861] PM / core: Introduce dpm_async_fn() helper

When we want to execute device pm functions asynchronously, we'll
do the following for the device:

  1) reinit_completion(&dev->power.completion);
  2) Check if the device enables asynchronous suspend.
  3) If necessary, execute the corresponding function asynchronously.

There are a lot of such repeated operations here, in fact we can avoid
this. So introduce dpm_async_fn() to have better code readability and
reuse.

And use this function to do some cleanup.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c | 62 +++++++++++++++------------------------
 1 file changed, 23 insertions(+), 39 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 41eba82ee7b9d..9b8c829798f19 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -706,6 +706,19 @@ static bool is_async(struct device *dev)
 		&& !pm_trace_is_enabled();
 }
 
+static bool dpm_async_fn(struct device *dev, async_func_t func)
+{
+	reinit_completion(&dev->power.completion);
+
+	if (is_async(dev)) {
+		get_device(dev);
+		async_schedule(func, dev);
+		return true;
+	}
+
+	return false;
+}
+
 static void async_resume_noirq(void *data, async_cookie_t cookie)
 {
 	struct device *dev = (struct device *)data;
@@ -732,13 +745,8 @@ void dpm_noirq_resume_devices(pm_message_t state)
 	 * in case the starting of async threads is
 	 * delayed by non-async resuming devices.
 	 */
-	list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
-		reinit_completion(&dev->power.completion);
-		if (is_async(dev)) {
-			get_device(dev);
-			async_schedule_dev(async_resume_noirq, dev);
-		}
-	}
+	list_for_each_entry(dev, &dpm_noirq_list, power.entry)
+		dpm_async_fn(dev, async_resume_noirq);
 
 	while (!list_empty(&dpm_noirq_list)) {
 		dev = to_device(dpm_noirq_list.next);
@@ -889,13 +897,8 @@ void dpm_resume_early(pm_message_t state)
 	 * in case the starting of async threads is
 	 * delayed by non-async resuming devices.
 	 */
-	list_for_each_entry(dev, &dpm_late_early_list, power.entry) {
-		reinit_completion(&dev->power.completion);
-		if (is_async(dev)) {
-			get_device(dev);
-			async_schedule_dev(async_resume_early, dev);
-		}
-	}
+	list_for_each_entry(dev, &dpm_late_early_list, power.entry)
+		dpm_async_fn(dev, async_resume_early);
 
 	while (!list_empty(&dpm_late_early_list)) {
 		dev = to_device(dpm_late_early_list.next);
@@ -1053,13 +1056,8 @@ void dpm_resume(pm_message_t state)
 	pm_transition = state;
 	async_error = 0;
 
-	list_for_each_entry(dev, &dpm_suspended_list, power.entry) {
-		reinit_completion(&dev->power.completion);
-		if (is_async(dev)) {
-			get_device(dev);
-			async_schedule_dev(async_resume, dev);
-		}
-	}
+	list_for_each_entry(dev, &dpm_suspended_list, power.entry)
+		dpm_async_fn(dev, async_resume);
 
 	while (!list_empty(&dpm_suspended_list)) {
 		dev = to_device(dpm_suspended_list.next);
@@ -1373,13 +1371,9 @@ static void async_suspend_noirq(void *data, async_cookie_t cookie)
 
 static int device_suspend_noirq(struct device *dev)
 {
-	reinit_completion(&dev->power.completion);
-
-	if (is_async(dev)) {
-		get_device(dev);
-		async_schedule_dev(async_suspend_noirq, dev);
+	if (dpm_async_fn(dev, async_suspend_noirq))
 		return 0;
-	}
+
 	return __device_suspend_noirq(dev, pm_transition, false);
 }
 
@@ -1576,13 +1570,8 @@ static void async_suspend_late(void *data, async_cookie_t cookie)
 
 static int device_suspend_late(struct device *dev)
 {
-	reinit_completion(&dev->power.completion);
-
-	if (is_async(dev)) {
-		get_device(dev);
-		async_schedule_dev(async_suspend_late, dev);
+	if (dpm_async_fn(dev, async_suspend_late))
 		return 0;
-	}
 
 	return __device_suspend_late(dev, pm_transition, false);
 }
@@ -1842,13 +1831,8 @@ static void async_suspend(void *data, async_cookie_t cookie)
 
 static int device_suspend(struct device *dev)
 {
-	reinit_completion(&dev->power.completion);
-
-	if (is_async(dev)) {
-		get_device(dev);
-		async_schedule_dev(async_suspend, dev);
+	if (dpm_async_fn(dev, async_suspend))
 		return 0;
-	}
 
 	return __device_suspend(dev, pm_transition, false);
 }
-- 
GitLab


From a5881bea88616e3aacf521dbdbe0e323257aaba1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Apr 2019 10:02:19 +0200
Subject: [PATCH 0881/1861] x86/Kconfig: Remove the unused X86_DMA_REMAP
 KConfig symbol

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190410080220.21705-2-hch@lst.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c3..7f93e013e6dbf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -704,7 +704,6 @@ config STA2X11
 	depends on X86_32_NON_STANDARD && PCI
 	select ARCH_HAS_PHYS_TO_DMA
 	select X86_DEV_DMA_OPS
-	select X86_DMA_REMAP
 	select SWIOTLB
 	select MFD_STA2X11
 	select GPIOLIB
@@ -2886,10 +2885,6 @@ config X86_DEV_DMA_OPS
 	bool
 	depends on X86_64 || STA2X11
 
-config X86_DMA_REMAP
-	bool
-	depends on STA2X11
-
 config HAVE_GENERIC_GUP
 	def_bool y
 
-- 
GitLab


From eecec78f777742903ec9167490c625661284155d Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:10 -0400
Subject: [PATCH 0882/1861] locking/rwsem: Relocate rwsem_down_read_failed()

The rwsem_down_read_failed*() functions were relocated from above the
optimistic spinning section to below that section. This enables the
reader functions to use optimisitic spinning in future patches. There
is no code change.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-2-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 172 ++++++++++++++++++------------------
 1 file changed, 86 insertions(+), 86 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index fbe96341beeed..0d518b52ade4e 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -224,92 +224,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 		atomic_long_add(adjustment, &sem->count);
 }
 
-/*
- * Wait for the read lock to be granted
- */
-static inline struct rw_semaphore __sched *
-__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
-{
-	long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
-	struct rwsem_waiter waiter;
-	DEFINE_WAKE_Q(wake_q);
-
-	waiter.task = current;
-	waiter.type = RWSEM_WAITING_FOR_READ;
-
-	raw_spin_lock_irq(&sem->wait_lock);
-	if (list_empty(&sem->wait_list)) {
-		/*
-		 * In case the wait queue is empty and the lock isn't owned
-		 * by a writer, this reader can exit the slowpath and return
-		 * immediately as its RWSEM_ACTIVE_READ_BIAS has already
-		 * been set in the count.
-		 */
-		if (atomic_long_read(&sem->count) >= 0) {
-			raw_spin_unlock_irq(&sem->wait_lock);
-			return sem;
-		}
-		adjustment += RWSEM_WAITING_BIAS;
-	}
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we're now waiting on the lock, but no longer actively locking */
-	count = atomic_long_add_return(adjustment, &sem->count);
-
-	/*
-	 * If there are no active locks, wake the front queued process(es).
-	 *
-	 * If there are no writers and we are first in the queue,
-	 * wake our own waiter to join the existing active readers !
-	 */
-	if (count == RWSEM_WAITING_BIAS ||
-	    (count > RWSEM_WAITING_BIAS &&
-	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
-		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
-	raw_spin_unlock_irq(&sem->wait_lock);
-	wake_up_q(&wake_q);
-
-	/* wait to be given the lock */
-	while (true) {
-		set_current_state(state);
-		if (!waiter.task)
-			break;
-		if (signal_pending_state(state, current)) {
-			raw_spin_lock_irq(&sem->wait_lock);
-			if (waiter.task)
-				goto out_nolock;
-			raw_spin_unlock_irq(&sem->wait_lock);
-			break;
-		}
-		schedule();
-	}
-
-	__set_current_state(TASK_RUNNING);
-	return sem;
-out_nolock:
-	list_del(&waiter.list);
-	if (list_empty(&sem->wait_list))
-		atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
-	raw_spin_unlock_irq(&sem->wait_lock);
-	__set_current_state(TASK_RUNNING);
-	return ERR_PTR(-EINTR);
-}
-
-__visible struct rw_semaphore * __sched
-rwsem_down_read_failed(struct rw_semaphore *sem)
-{
-	return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(rwsem_down_read_failed);
-
-__visible struct rw_semaphore * __sched
-rwsem_down_read_failed_killable(struct rw_semaphore *sem)
-{
-	return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
-}
-EXPORT_SYMBOL(rwsem_down_read_failed_killable);
-
 /*
  * This function must be called with the sem->wait_lock held to prevent
  * race conditions between checking the rwsem wait list and setting the
@@ -504,6 +418,92 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
 }
 #endif
 
+/*
+ * Wait for the read lock to be granted
+ */
+static inline struct rw_semaphore __sched *
+__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
+{
+	long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
+	struct rwsem_waiter waiter;
+	DEFINE_WAKE_Q(wake_q);
+
+	waiter.task = current;
+	waiter.type = RWSEM_WAITING_FOR_READ;
+
+	raw_spin_lock_irq(&sem->wait_lock);
+	if (list_empty(&sem->wait_list)) {
+		/*
+		 * In case the wait queue is empty and the lock isn't owned
+		 * by a writer, this reader can exit the slowpath and return
+		 * immediately as its RWSEM_ACTIVE_READ_BIAS has already
+		 * been set in the count.
+		 */
+		if (atomic_long_read(&sem->count) >= 0) {
+			raw_spin_unlock_irq(&sem->wait_lock);
+			return sem;
+		}
+		adjustment += RWSEM_WAITING_BIAS;
+	}
+	list_add_tail(&waiter.list, &sem->wait_list);
+
+	/* we're now waiting on the lock, but no longer actively locking */
+	count = atomic_long_add_return(adjustment, &sem->count);
+
+	/*
+	 * If there are no active locks, wake the front queued process(es).
+	 *
+	 * If there are no writers and we are first in the queue,
+	 * wake our own waiter to join the existing active readers !
+	 */
+	if (count == RWSEM_WAITING_BIAS ||
+	    (count > RWSEM_WAITING_BIAS &&
+	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+
+	raw_spin_unlock_irq(&sem->wait_lock);
+	wake_up_q(&wake_q);
+
+	/* wait to be given the lock */
+	while (true) {
+		set_current_state(state);
+		if (!waiter.task)
+			break;
+		if (signal_pending_state(state, current)) {
+			raw_spin_lock_irq(&sem->wait_lock);
+			if (waiter.task)
+				goto out_nolock;
+			raw_spin_unlock_irq(&sem->wait_lock);
+			break;
+		}
+		schedule();
+	}
+
+	__set_current_state(TASK_RUNNING);
+	return sem;
+out_nolock:
+	list_del(&waiter.list);
+	if (list_empty(&sem->wait_list))
+		atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
+	raw_spin_unlock_irq(&sem->wait_lock);
+	__set_current_state(TASK_RUNNING);
+	return ERR_PTR(-EINTR);
+}
+
+__visible struct rw_semaphore * __sched
+rwsem_down_read_failed(struct rw_semaphore *sem)
+{
+	return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(rwsem_down_read_failed);
+
+__visible struct rw_semaphore * __sched
+rwsem_down_read_failed_killable(struct rw_semaphore *sem)
+{
+	return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(rwsem_down_read_failed_killable);
+
 /*
  * Wait until we successfully acquire the write lock
  */
-- 
GitLab


From c7580c1e84435c9ccc6c612d9fee8e71811f7be6 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:11 -0400
Subject: [PATCH 0883/1861] locking/rwsem: Move owner setting code from rwsem.c
 to rwsem.h

Move all the owner setting code closer to the rwsem-xadd fast paths
directly within rwsem.h file as well as in the slowpaths where owner
setting is done after acquring the lock. This will enable us to add
DEBUG_RWSEMS check in a later patch to make sure that read lock is
really acquired when rwsem_down_read_failed() returns, for instance.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-3-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c |  6 +++---
 kernel/locking/rwsem.c      | 19 ++-----------------
 kernel/locking/rwsem.h      | 17 +++++++++++++++--
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 0d518b52ade4e..c213869e1aa75 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -176,9 +176,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 			goto try_reader_grant;
 		}
 		/*
-		 * It is not really necessary to set it to reader-owned here,
-		 * but it gives the spinners an early indication that the
-		 * readers now have the lock.
+		 * Set it to reader-owned to give spinners an early
+		 * indication that readers now have the lock.
 		 */
 		__rwsem_set_reader_owned(sem, waiter->task);
 	}
@@ -441,6 +440,7 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
 		 */
 		if (atomic_long_read(&sem->count) >= 0) {
 			raw_spin_unlock_irq(&sem->wait_lock);
+			rwsem_set_reader_owned(sem);
 			return sem;
 		}
 		adjustment += RWSEM_WAITING_BIAS;
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index e586f0d03ad38..59e584895532d 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem)
 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
-	rwsem_set_reader_owned(sem);
 }
 
 EXPORT_SYMBOL(down_read);
@@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
 		return -EINTR;
 	}
 
-	rwsem_set_reader_owned(sem);
 	return 0;
 }
 
@@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem)
 {
 	int ret = __down_read_trylock(sem);
 
-	if (ret == 1) {
+	if (ret == 1)
 		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
-		rwsem_set_reader_owned(sem);
-	}
 	return ret;
 }
 
@@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem)
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
-	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write);
@@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem)
 		return -EINTR;
 	}
 
-	rwsem_set_owner(sem);
 	return 0;
 }
 
@@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem)
 {
 	int ret = __down_write_trylock(sem);
 
-	if (ret == 1) {
+	if (ret == 1)
 		rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
-		rwsem_set_owner(sem);
-	}
 
 	return ret;
 }
@@ -119,7 +111,6 @@ void up_read(struct rw_semaphore *sem)
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
 	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
 
-	rwsem_clear_reader_owned(sem);
 	__up_read(sem);
 }
 
@@ -133,7 +124,6 @@ void up_write(struct rw_semaphore *sem)
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
 	DEBUG_RWSEMS_WARN_ON(sem->owner != current);
 
-	rwsem_clear_owner(sem);
 	__up_write(sem);
 }
 
@@ -147,7 +137,6 @@ void downgrade_write(struct rw_semaphore *sem)
 	lock_downgrade(&sem->dep_map, _RET_IP_);
 	DEBUG_RWSEMS_WARN_ON(sem->owner != current);
 
-	rwsem_set_reader_owned(sem);
 	__downgrade_write(sem);
 }
 
@@ -161,7 +150,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
-	rwsem_set_reader_owned(sem);
 }
 
 EXPORT_SYMBOL(down_read_nested);
@@ -172,7 +160,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
 	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
-	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(_down_write_nest_lock);
@@ -193,7 +180,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
-	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write_nested);
@@ -208,7 +194,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
 		return -EINTR;
 	}
 
-	rwsem_set_owner(sem);
 	return 0;
 }
 
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 1f5775aa6a1df..ee24c4f257a52 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -160,6 +160,8 @@ static inline void __down_read(struct rw_semaphore *sem)
 {
 	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
 		rwsem_down_read_failed(sem);
+	else
+		rwsem_set_reader_owned(sem);
 }
 
 static inline int __down_read_killable(struct rw_semaphore *sem)
@@ -167,8 +169,9 @@ static inline int __down_read_killable(struct rw_semaphore *sem)
 	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
 		if (IS_ERR(rwsem_down_read_failed_killable(sem)))
 			return -EINTR;
+	} else {
+		rwsem_set_reader_owned(sem);
 	}
-
 	return 0;
 }
 
@@ -182,6 +185,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 	do {
 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
 					tmp + RWSEM_ACTIVE_READ_BIAS)) {
+			rwsem_set_reader_owned(sem);
 			return 1;
 		}
 	} while (tmp >= 0);
@@ -199,6 +203,7 @@ static inline void __down_write(struct rw_semaphore *sem)
 					     &sem->count);
 	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
 		rwsem_down_write_failed(sem);
+	rwsem_set_owner(sem);
 }
 
 static inline int __down_write_killable(struct rw_semaphore *sem)
@@ -210,6 +215,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
 	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
 		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
 			return -EINTR;
+	rwsem_set_owner(sem);
 	return 0;
 }
 
@@ -219,7 +225,11 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 
 	tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
 		      RWSEM_ACTIVE_WRITE_BIAS);
-	return tmp == RWSEM_UNLOCKED_VALUE;
+	if (tmp == RWSEM_UNLOCKED_VALUE) {
+		rwsem_set_owner(sem);
+		return true;
+	}
+	return false;
 }
 
 /*
@@ -229,6 +239,7 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
 	long tmp;
 
+	rwsem_clear_reader_owned(sem);
 	tmp = atomic_long_dec_return_release(&sem->count);
 	if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
 		rwsem_wake(sem);
@@ -239,6 +250,7 @@ static inline void __up_read(struct rw_semaphore *sem)
  */
 static inline void __up_write(struct rw_semaphore *sem)
 {
+	rwsem_clear_owner(sem);
 	if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
 						    &sem->count) < 0))
 		rwsem_wake(sem);
@@ -259,6 +271,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 	 * write side. As such, rely on RELEASE semantics.
 	 */
 	tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
+	rwsem_set_reader_owned(sem);
 	if (tmp < 0)
 		rwsem_downgrade_wake(sem);
 }
-- 
GitLab


From 12a30a7fc142a123c61da9623bd824d95d36c12e Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:12 -0400
Subject: [PATCH 0884/1861] locking/rwsem: Move rwsem internal function
 declarations to rwsem-xadd.h

We don't need to expose rwsem internal functions which are not supposed
to be called directly from other kernel code.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-4-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rwsem.h  | 7 -------
 kernel/locking/rwsem.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 0fc41062c6495..b44e533235c73 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -46,13 +46,6 @@ struct rw_semaphore {
  */
 #define RWSEM_OWNER_UNKNOWN	((struct task_struct *)-2L)
 
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
 /* In all implementations count != 0 means locked */
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index ee24c4f257a52..19997c82270b6 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -153,6 +153,13 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
 }
 #endif
 
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
+
 /*
  * lock for reading
  */
-- 
GitLab


From a338ecb07a338c9a8b0ca0010e862ebe598b1551 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:13 -0400
Subject: [PATCH 0885/1861] locking/rwsem: Micro-optimize
 rwsem_try_read_lock_unqueued()

The atomic_long_cmpxchg_acquire() in rwsem_try_read_lock_unqueued() is
replaced by atomic_long_try_cmpxchg_acquire() to simpify the code and
generate slightly better assembly code.

There is no functional change.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-5-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-xadd.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index c213869e1aa75..f6198e1a58f6b 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -259,21 +259,16 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
  */
 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 {
-	long old, count = atomic_long_read(&sem->count);
+	long count = atomic_long_read(&sem->count);
 
-	while (true) {
-		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
-			return false;
-
-		old = atomic_long_cmpxchg_acquire(&sem->count, count,
-				      count + RWSEM_ACTIVE_WRITE_BIAS);
-		if (old == count) {
+	while (!count || count == RWSEM_WAITING_BIAS) {
+		if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
+					count + RWSEM_ACTIVE_WRITE_BIAS)) {
 			rwsem_set_owner(sem);
 			return true;
 		}
-
-		count = old;
 	}
+	return false;
 }
 
 static inline bool owner_on_cpu(struct task_struct *owner)
-- 
GitLab


From a68e2c4c637918da47b3aa270051545cff7d8245 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:14 -0400
Subject: [PATCH 0886/1861] locking/rwsem: Add debug check for __down_read*()

When rwsem_down_read_failed*() return, the read lock is acquired
indirectly by others. So debug checks are added in __down_read() and
__down_read_killable() to make sure the rwsem is really reader-owned.

The other debug check calls in kernel/locking/rwsem.c except the
one in up_read_non_owner() are also moved over to rwsem-xadd.h.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-6-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem.c |  3 ---
 kernel/locking/rwsem.h | 12 ++++++++++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 59e584895532d..90de5f1780bac 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -109,7 +109,6 @@ EXPORT_SYMBOL(down_write_trylock);
 void up_read(struct rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
-	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
 
 	__up_read(sem);
 }
@@ -122,7 +121,6 @@ EXPORT_SYMBOL(up_read);
 void up_write(struct rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
-	DEBUG_RWSEMS_WARN_ON(sem->owner != current);
 
 	__up_write(sem);
 }
@@ -135,7 +133,6 @@ EXPORT_SYMBOL(up_write);
 void downgrade_write(struct rw_semaphore *sem)
 {
 	lock_downgrade(&sem->dep_map, _RET_IP_);
-	DEBUG_RWSEMS_WARN_ON(sem->owner != current);
 
 	__downgrade_write(sem);
 }
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 19997c82270b6..1d8f722a6761b 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -165,10 +165,13 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
  */
 static inline void __down_read(struct rw_semaphore *sem)
 {
-	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
+	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
 		rwsem_down_read_failed(sem);
-	else
+		DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
+					RWSEM_READER_OWNED));
+	} else {
 		rwsem_set_reader_owned(sem);
+	}
 }
 
 static inline int __down_read_killable(struct rw_semaphore *sem)
@@ -176,6 +179,8 @@ static inline int __down_read_killable(struct rw_semaphore *sem)
 	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
 		if (IS_ERR(rwsem_down_read_failed_killable(sem)))
 			return -EINTR;
+		DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
+					RWSEM_READER_OWNED));
 	} else {
 		rwsem_set_reader_owned(sem);
 	}
@@ -246,6 +251,7 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
 	long tmp;
 
+	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
 	rwsem_clear_reader_owned(sem);
 	tmp = atomic_long_dec_return_release(&sem->count);
 	if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
@@ -257,6 +263,7 @@ static inline void __up_read(struct rw_semaphore *sem)
  */
 static inline void __up_write(struct rw_semaphore *sem)
 {
+	DEBUG_RWSEMS_WARN_ON(sem->owner != current);
 	rwsem_clear_owner(sem);
 	if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
 						    &sem->count) < 0))
@@ -277,6 +284,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 	 * read-locked region is ok to be re-ordered into the
 	 * write side. As such, rely on RELEASE semantics.
 	 */
+	DEBUG_RWSEMS_WARN_ON(sem->owner != current);
 	tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
 	rwsem_set_reader_owned(sem);
 	if (tmp < 0)
-- 
GitLab


From 3b4ba6643d26a95e08067fca9a5da1828f9afabf Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:15 -0400
Subject: [PATCH 0887/1861] locking/rwsem: Enhance DEBUG_RWSEMS_WARN_ON() macro

Currently, the DEBUG_RWSEMS_WARN_ON() macro just dumps a stack trace
when the rwsem isn't in the right state. It does not show the actual
states of the rwsem. This may not be that helpful in the debugging
process.

Enhance the DEBUG_RWSEMS_WARN_ON() macro to also show the current
content of the rwsem count and owner fields to give more information
about what is wrong with the rwsem. The debug_locks_off() function is
called as is done inside DEBUG_LOCKS_WARN_ON().

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-7-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem.c |  3 ++-
 kernel/locking/rwsem.h | 21 ++++++++++++++-------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 90de5f1780bac..ccbf18f560ff1 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -198,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested);
 
 void up_read_non_owner(struct rw_semaphore *sem)
 {
-	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
+	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
+				sem);
 	__up_read(sem);
 }
 
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 1d8f722a6761b..3059a2dc39f8e 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -27,9 +27,15 @@
 #define RWSEM_ANONYMOUSLY_OWNED	(1UL << 1)
 
 #ifdef CONFIG_DEBUG_RWSEMS
-# define DEBUG_RWSEMS_WARN_ON(c)	DEBUG_LOCKS_WARN_ON(c)
+# define DEBUG_RWSEMS_WARN_ON(c, sem)	do {			\
+	if (WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
+		#c, atomic_long_read(&(sem)->count),		\
+		(long)((sem)->owner), (long)current,		\
+		list_empty(&(sem)->wait_list) ? "" : "not "))	\
+			debug_locks_off();			\
+	} while (0)
 #else
-# define DEBUG_RWSEMS_WARN_ON(c)
+# define DEBUG_RWSEMS_WARN_ON(c, sem)
 #endif
 
 /*
@@ -168,7 +174,7 @@ static inline void __down_read(struct rw_semaphore *sem)
 	if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
 		rwsem_down_read_failed(sem);
 		DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
-					RWSEM_READER_OWNED));
+					RWSEM_READER_OWNED), sem);
 	} else {
 		rwsem_set_reader_owned(sem);
 	}
@@ -180,7 +186,7 @@ static inline int __down_read_killable(struct rw_semaphore *sem)
 		if (IS_ERR(rwsem_down_read_failed_killable(sem)))
 			return -EINTR;
 		DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
-					RWSEM_READER_OWNED));
+					RWSEM_READER_OWNED), sem);
 	} else {
 		rwsem_set_reader_owned(sem);
 	}
@@ -251,7 +257,8 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
 	long tmp;
 
-	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
+	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
+				sem);
 	rwsem_clear_reader_owned(sem);
 	tmp = atomic_long_dec_return_release(&sem->count);
 	if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
@@ -263,7 +270,7 @@ static inline void __up_read(struct rw_semaphore *sem)
  */
 static inline void __up_write(struct rw_semaphore *sem)
 {
-	DEBUG_RWSEMS_WARN_ON(sem->owner != current);
+	DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
 	rwsem_clear_owner(sem);
 	if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
 						    &sem->count) < 0))
@@ -284,7 +291,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 	 * read-locked region is ok to be re-ordered into the
 	 * write side. As such, rely on RELEASE semantics.
 	 */
-	DEBUG_RWSEMS_WARN_ON(sem->owner != current);
+	DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
 	tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
 	rwsem_set_reader_owned(sem);
 	if (tmp < 0)
-- 
GitLab


From ad53fa10fa9e816067bbae7109845940f5e6df50 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:16 -0400
Subject: [PATCH 0888/1861] locking/qspinlock_stat: Introduce generic
 lockevent_*() counting APIs

The percpu event counts used by qspinlock code can be useful for
other locking code as well. So a new set of lockevent_* counting APIs
is introduced with the lock event names extracted out into the new
lock_events_list.h header file for easier addition in the future.

The existing qstat_inc() calls are replaced by either lockevent_inc() or
lockevent_cond_inc() calls.

The qstat_hop() call is renamed to lockevent_pv_hop(). The "reset_counters"
debugfs file is also renamed to ".reset_counts".

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-8-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lock_events.h        |  55 ++++++++++
 kernel/locking/lock_events_list.h   |  50 +++++++++
 kernel/locking/qspinlock.c          |   8 +-
 kernel/locking/qspinlock_paravirt.h |  19 ++--
 kernel/locking/qspinlock_stat.h     | 163 +++++++++++-----------------
 5 files changed, 181 insertions(+), 114 deletions(-)
 create mode 100644 kernel/locking/lock_events.h
 create mode 100644 kernel/locking/lock_events_list.h

diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h
new file mode 100644
index 0000000000000..4009e07b474a8
--- /dev/null
+++ b/kernel/locking/lock_events.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <longman@redhat.com>
+ */
+
+enum lock_events {
+
+#include "lock_events_list.h"
+
+	lockevent_num,	/* Total number of lock event counts */
+	LOCKEVENT_reset_cnts = lockevent_num,
+};
+
+#ifdef CONFIG_QUEUED_LOCK_STAT
+/*
+ * Per-cpu counters
+ */
+DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
+
+/*
+ * Increment the PV qspinlock statistical counters
+ */
+static inline void __lockevent_inc(enum lock_events event, bool cond)
+{
+	if (cond)
+		__this_cpu_inc(lockevents[event]);
+}
+
+#define lockevent_inc(ev)	  __lockevent_inc(LOCKEVENT_ ##ev, true)
+#define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c)
+
+static inline void __lockevent_add(enum lock_events event, int inc)
+{
+	__this_cpu_add(lockevents[event], inc);
+}
+
+#define lockevent_add(ev, c)	__lockevent_add(LOCKEVENT_ ##ev, c)
+
+#else  /* CONFIG_QUEUED_LOCK_STAT */
+
+#define lockevent_inc(ev)
+#define lockevent_add(ev, c)
+#define lockevent_cond_inc(ev, c)
+
+#endif /* CONFIG_QUEUED_LOCK_STAT */
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
new file mode 100644
index 0000000000000..8b4d2e180475e
--- /dev/null
+++ b/kernel/locking/lock_events_list.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <longman@redhat.com>
+ */
+
+#ifndef LOCK_EVENT
+#define LOCK_EVENT(name)	LOCKEVENT_ ## name,
+#endif
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+/*
+ * Locking events for PV qspinlock.
+ */
+LOCK_EVENT(pv_hash_hops)	/* Average # of hops per hashing operation */
+LOCK_EVENT(pv_kick_unlock)	/* # of vCPU kicks issued at unlock time   */
+LOCK_EVENT(pv_kick_wake)	/* # of vCPU kicks for pv_latency_wake	   */
+LOCK_EVENT(pv_latency_kick)	/* Average latency (ns) of vCPU kick	   */
+LOCK_EVENT(pv_latency_wake)	/* Average latency (ns) of kick-to-wakeup  */
+LOCK_EVENT(pv_lock_stealing)	/* # of lock stealing operations	   */
+LOCK_EVENT(pv_spurious_wakeup)	/* # of spurious wakeups in non-head vCPUs */
+LOCK_EVENT(pv_wait_again)	/* # of wait's after queue head vCPU kick  */
+LOCK_EVENT(pv_wait_early)	/* # of early vCPU wait's		   */
+LOCK_EVENT(pv_wait_head)	/* # of vCPU wait's at the queue head	   */
+LOCK_EVENT(pv_wait_node)	/* # of vCPU wait's at non-head queue node */
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+/*
+ * Locking events for qspinlock
+ *
+ * Subtracting lock_use_node[234] from lock_slowpath will give you
+ * lock_use_node1.
+ */
+LOCK_EVENT(lock_pending)	/* # of locking ops via pending code	     */
+LOCK_EVENT(lock_slowpath)	/* # of locking ops via MCS lock queue	     */
+LOCK_EVENT(lock_use_node2)	/* # of locking ops that use 2nd percpu node */
+LOCK_EVENT(lock_use_node3)	/* # of locking ops that use 3rd percpu node */
+LOCK_EVENT(lock_use_node4)	/* # of locking ops that use 4th percpu node */
+LOCK_EVENT(lock_no_node)	/* # of locking ops w/o using percpu node    */
+#endif /* CONFIG_QUEUED_SPINLOCKS */
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 5e9247dc25158..e14b32c69639e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 * 0,1,0 -> 0,0,1
 	 */
 	clear_pending_set_locked(lock);
-	qstat_inc(qstat_lock_pending, true);
+	lockevent_inc(lock_pending);
 	return;
 
 	/*
@@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 	 * queuing.
 	 */
 queue:
-	qstat_inc(qstat_lock_slowpath, true);
+	lockevent_inc(lock_slowpath);
 pv_queue:
 	node = this_cpu_ptr(&qnodes[0].mcs);
 	idx = node->count++;
@@ -419,7 +419,7 @@ pv_queue:
 	 * simple enough.
 	 */
 	if (unlikely(idx >= MAX_NODES)) {
-		qstat_inc(qstat_lock_no_node, true);
+		lockevent_inc(lock_no_node);
 		while (!queued_spin_trylock(lock))
 			cpu_relax();
 		goto release;
@@ -430,7 +430,7 @@ pv_queue:
 	/*
 	 * Keep counts of non-zero index values:
 	 */
-	qstat_inc(qstat_lock_use_node2 + idx - 1, idx);
+	lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
 
 	/*
 	 * Ensure that we increment the head node->count before initialising
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 8f36c27c17948..89bab079e7a4d 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
 
 		if (!(val & _Q_LOCKED_PENDING_MASK) &&
 		   (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
-			qstat_inc(qstat_pv_lock_stealing, true);
+			lockevent_inc(pv_lock_stealing);
 			return true;
 		}
 		if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK))
@@ -219,7 +219,7 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
 		hopcnt++;
 		if (!cmpxchg(&he->lock, NULL, lock)) {
 			WRITE_ONCE(he->node, node);
-			qstat_hop(hopcnt);
+			lockevent_pv_hop(hopcnt);
 			return &he->lock;
 		}
 	}
@@ -320,8 +320,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 		smp_store_mb(pn->state, vcpu_halted);
 
 		if (!READ_ONCE(node->locked)) {
-			qstat_inc(qstat_pv_wait_node, true);
-			qstat_inc(qstat_pv_wait_early, wait_early);
+			lockevent_inc(pv_wait_node);
+			lockevent_cond_inc(pv_wait_early, wait_early);
 			pv_wait(&pn->state, vcpu_halted);
 		}
 
@@ -339,7 +339,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 		 * So it is better to spin for a while in the hope that the
 		 * MCS lock will be released soon.
 		 */
-		qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked));
+		lockevent_cond_inc(pv_spurious_wakeup,
+				  !READ_ONCE(node->locked));
 	}
 
 	/*
@@ -416,7 +417,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 	/*
 	 * Tracking # of slowpath locking operations
 	 */
-	qstat_inc(qstat_lock_slowpath, true);
+	lockevent_inc(lock_slowpath);
 
 	for (;; waitcnt++) {
 		/*
@@ -464,8 +465,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 			}
 		}
 		WRITE_ONCE(pn->state, vcpu_hashed);
-		qstat_inc(qstat_pv_wait_head, true);
-		qstat_inc(qstat_pv_wait_again, waitcnt);
+		lockevent_inc(pv_wait_head);
+		lockevent_cond_inc(pv_wait_again, waitcnt);
 		pv_wait(&lock->locked, _Q_SLOW_VAL);
 
 		/*
@@ -528,7 +529,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
 	 * vCPU is harmless other than the additional latency in completing
 	 * the unlock.
 	 */
-	qstat_inc(qstat_pv_kick_unlock, true);
+	lockevent_inc(pv_kick_unlock);
 	pv_kick(node->cpu);
 }
 
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index d73f85388d5c1..1db5b375fcf49 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -38,8 +38,8 @@
  * Subtracting lock_use_node[234] from lock_slowpath will give you
  * lock_use_node1.
  *
- * Writing to the "reset_counters" file will reset all the above counter
- * values.
+ * Writing to the special ".reset_counts" file will reset all the above
+ * counter values.
  *
  * These statistical counters are implemented as per-cpu variables which are
  * summed and computed whenever the corresponding debugfs files are read. This
@@ -48,27 +48,7 @@
  *
  * There may be slight difference between pv_kick_wake and pv_kick_unlock.
  */
-enum qlock_stats {
-	qstat_pv_hash_hops,
-	qstat_pv_kick_unlock,
-	qstat_pv_kick_wake,
-	qstat_pv_latency_kick,
-	qstat_pv_latency_wake,
-	qstat_pv_lock_stealing,
-	qstat_pv_spurious_wakeup,
-	qstat_pv_wait_again,
-	qstat_pv_wait_early,
-	qstat_pv_wait_head,
-	qstat_pv_wait_node,
-	qstat_lock_pending,
-	qstat_lock_slowpath,
-	qstat_lock_use_node2,
-	qstat_lock_use_node3,
-	qstat_lock_use_node4,
-	qstat_lock_no_node,
-	qstat_num,	/* Total number of statistical counters */
-	qstat_reset_cnts = qstat_num,
-};
+#include "lock_events.h"
 
 #ifdef CONFIG_QUEUED_LOCK_STAT
 /*
@@ -79,99 +59,91 @@ enum qlock_stats {
 #include <linux/sched/clock.h>
 #include <linux/fs.h>
 
-static const char * const qstat_names[qstat_num + 1] = {
-	[qstat_pv_hash_hops]	   = "pv_hash_hops",
-	[qstat_pv_kick_unlock]     = "pv_kick_unlock",
-	[qstat_pv_kick_wake]       = "pv_kick_wake",
-	[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
-	[qstat_pv_latency_kick]	   = "pv_latency_kick",
-	[qstat_pv_latency_wake]    = "pv_latency_wake",
-	[qstat_pv_lock_stealing]   = "pv_lock_stealing",
-	[qstat_pv_wait_again]      = "pv_wait_again",
-	[qstat_pv_wait_early]      = "pv_wait_early",
-	[qstat_pv_wait_head]       = "pv_wait_head",
-	[qstat_pv_wait_node]       = "pv_wait_node",
-	[qstat_lock_pending]       = "lock_pending",
-	[qstat_lock_slowpath]      = "lock_slowpath",
-	[qstat_lock_use_node2]	   = "lock_use_node2",
-	[qstat_lock_use_node3]	   = "lock_use_node3",
-	[qstat_lock_use_node4]	   = "lock_use_node4",
-	[qstat_lock_no_node]	   = "lock_no_node",
-	[qstat_reset_cnts]         = "reset_counters",
+#define EVENT_COUNT(ev)	lockevents[LOCKEVENT_ ## ev]
+
+#undef  LOCK_EVENT
+#define LOCK_EVENT(name)	[LOCKEVENT_ ## name] = #name,
+
+static const char * const lockevent_names[lockevent_num + 1] = {
+
+#include "lock_events_list.h"
+
+	[LOCKEVENT_reset_cnts] = ".reset_counts",
 };
 
 /*
  * Per-cpu counters
  */
-static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
+DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
 static DEFINE_PER_CPU(u64, pv_kick_time);
 
 /*
  * Function to read and return the qlock statistical counter values
  *
  * The following counters are handled specially:
- * 1. qstat_pv_latency_kick
+ * 1. pv_latency_kick
  *    Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
- * 2. qstat_pv_latency_wake
+ * 2. pv_latency_wake
  *    Average wake latency (ns) = pv_latency_wake/pv_kick_wake
- * 3. qstat_pv_hash_hops
+ * 3. pv_hash_hops
  *    Average hops/hash = pv_hash_hops/pv_kick_unlock
  */
-static ssize_t qstat_read(struct file *file, char __user *user_buf,
-			  size_t count, loff_t *ppos)
+static ssize_t lockevent_read(struct file *file, char __user *user_buf,
+			      size_t count, loff_t *ppos)
 {
 	char buf[64];
-	int cpu, counter, len;
-	u64 stat = 0, kicks = 0;
+	int cpu, id, len;
+	u64 sum = 0, kicks = 0;
 
 	/*
 	 * Get the counter ID stored in file->f_inode->i_private
 	 */
-	counter = (long)file_inode(file)->i_private;
+	id = (long)file_inode(file)->i_private;
 
-	if (counter >= qstat_num)
+	if (id >= lockevent_num)
 		return -EBADF;
 
 	for_each_possible_cpu(cpu) {
-		stat += per_cpu(qstats[counter], cpu);
+		sum += per_cpu(lockevents[id], cpu);
 		/*
-		 * Need to sum additional counter for some of them
+		 * Need to sum additional counters for some of them
 		 */
-		switch (counter) {
+		switch (id) {
 
-		case qstat_pv_latency_kick:
-		case qstat_pv_hash_hops:
-			kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
+		case LOCKEVENT_pv_latency_kick:
+		case LOCKEVENT_pv_hash_hops:
+			kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu);
 			break;
 
-		case qstat_pv_latency_wake:
-			kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
+		case LOCKEVENT_pv_latency_wake:
+			kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu);
 			break;
 		}
 	}
 
-	if (counter == qstat_pv_hash_hops) {
+	if (id == LOCKEVENT_pv_hash_hops) {
 		u64 frac = 0;
 
 		if (kicks) {
-			frac = 100ULL * do_div(stat, kicks);
+			frac = 100ULL * do_div(sum, kicks);
 			frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
 		}
 
 		/*
 		 * Return a X.XX decimal number
 		 */
-		len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
+		len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n",
+			       sum, frac);
 	} else {
 		/*
 		 * Round to the nearest ns
 		 */
-		if ((counter == qstat_pv_latency_kick) ||
-		    (counter == qstat_pv_latency_wake)) {
+		if ((id == LOCKEVENT_pv_latency_kick) ||
+		    (id == LOCKEVENT_pv_latency_wake)) {
 			if (kicks)
-				stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
+				sum = DIV_ROUND_CLOSEST_ULL(sum, kicks);
 		}
-		len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
+		len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
 	}
 
 	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
@@ -180,11 +152,9 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf,
 /*
  * Function to handle write request
  *
- * When counter = reset_cnts, reset all the counter values.
- * Since the counter updates aren't atomic, the resetting is done twice
- * to make sure that the counters are very likely to be all cleared.
+ * When id = .reset_cnts, reset all the counter values.
  */
-static ssize_t qstat_write(struct file *file, const char __user *user_buf,
+static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
 			   size_t count, loff_t *ppos)
 {
 	int cpu;
@@ -192,14 +162,14 @@ static ssize_t qstat_write(struct file *file, const char __user *user_buf,
 	/*
 	 * Get the counter ID stored in file->f_inode->i_private
 	 */
-	if ((long)file_inode(file)->i_private != qstat_reset_cnts)
+	if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
 		return count;
 
 	for_each_possible_cpu(cpu) {
 		int i;
-		unsigned long *ptr = per_cpu_ptr(qstats, cpu);
+		unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
 
-		for (i = 0 ; i < qstat_num; i++)
+		for (i = 0 ; i < lockevent_num; i++)
 			WRITE_ONCE(ptr[i], 0);
 	}
 	return count;
@@ -208,9 +178,9 @@ static ssize_t qstat_write(struct file *file, const char __user *user_buf,
 /*
  * Debugfs data structures
  */
-static const struct file_operations fops_qstat = {
-	.read = qstat_read,
-	.write = qstat_write,
+static const struct file_operations fops_lockevent = {
+	.read = lockevent_read,
+	.write = lockevent_write,
 	.llseek = default_llseek,
 };
 
@@ -219,10 +189,10 @@ static const struct file_operations fops_qstat = {
  */
 static int __init init_qspinlock_stat(void)
 {
-	struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
+	struct dentry *d_counts = debugfs_create_dir("qlockstat", NULL);
 	int i;
 
-	if (!d_qstat)
+	if (!d_counts)
 		goto out;
 
 	/*
@@ -232,39 +202,31 @@ static int __init init_qspinlock_stat(void)
 	 * root is allowed to do the read/write to limit impact to system
 	 * performance.
 	 */
-	for (i = 0; i < qstat_num; i++)
-		if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
-					 (void *)(long)i, &fops_qstat))
+	for (i = 0; i < lockevent_num; i++)
+		if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
+					 (void *)(long)i, &fops_lockevent))
 			goto fail_undo;
 
-	if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
-				 (void *)(long)qstat_reset_cnts, &fops_qstat))
+	if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
+				 d_counts, (void *)(long)LOCKEVENT_reset_cnts,
+				 &fops_lockevent))
 		goto fail_undo;
 
 	return 0;
 fail_undo:
-	debugfs_remove_recursive(d_qstat);
+	debugfs_remove_recursive(d_counts);
 out:
 	pr_warn("Could not create 'qlockstat' debugfs entries\n");
 	return -ENOMEM;
 }
 fs_initcall(init_qspinlock_stat);
 
-/*
- * Increment the PV qspinlock statistical counters
- */
-static inline void qstat_inc(enum qlock_stats stat, bool cond)
-{
-	if (cond)
-		this_cpu_inc(qstats[stat]);
-}
-
 /*
  * PV hash hop count
  */
-static inline void qstat_hop(int hopcnt)
+static inline void lockevent_pv_hop(int hopcnt)
 {
-	this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
+	this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt);
 }
 
 /*
@@ -276,7 +238,7 @@ static inline void __pv_kick(int cpu)
 
 	per_cpu(pv_kick_time, cpu) = start;
 	pv_kick(cpu);
-	this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
+	this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start);
 }
 
 /*
@@ -289,9 +251,9 @@ static inline void __pv_wait(u8 *ptr, u8 val)
 	*pkick_time = 0;
 	pv_wait(ptr, val);
 	if (*pkick_time) {
-		this_cpu_add(qstats[qstat_pv_latency_wake],
+		this_cpu_add(EVENT_COUNT(pv_latency_wake),
 			     sched_clock() - *pkick_time);
-		qstat_inc(qstat_pv_kick_wake, true);
+		lockevent_inc(pv_kick_wake);
 	}
 }
 
@@ -300,7 +262,6 @@ static inline void __pv_wait(u8 *ptr, u8 val)
 
 #else /* CONFIG_QUEUED_LOCK_STAT */
 
-static inline void qstat_inc(enum qlock_stats stat, bool cond)	{ }
-static inline void qstat_hop(int hopcnt)			{ }
+static inline void lockevent_pv_hop(int hopcnt)	{ }
 
 #endif /* CONFIG_QUEUED_LOCK_STAT */
-- 
GitLab


From fb346fd9fc081c3d978c3f3d26d39334527a2662 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:17 -0400
Subject: [PATCH 0889/1861] locking/lock_events: Make lock_events available for
 all archs & other locks

The QUEUED_LOCK_STAT option to report queued spinlocks event counts
was previously allowed only on x86 architecture. To make the locking
event counting code more useful, it is now renamed to a more generic
LOCK_EVENT_COUNTS config option. This new option will be available to
all the architectures that use qspinlock at the moment.

Other locking code can now start to use the generic locking event
counting code by including lock_events.h and put the new locking event
names into the lock_events_list.h header file.

My experience with lock event counting is that it gives valuable insight
on how the locking code works and what can be done to make it better. I
would like to extend this benefit to other locking code like mutex and
rwsem in the near future.

The PV qspinlock specific code will stay in qspinlock_stat.h. The
locking event counters will now reside in the <debugfs>/lock_event_counts
directory.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-9-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig                    |  10 +++
 arch/x86/Kconfig                |   8 --
 kernel/locking/Makefile         |   1 +
 kernel/locking/lock_events.c    | 153 ++++++++++++++++++++++++++++++++
 kernel/locking/lock_events.h    |  10 ++-
 kernel/locking/qspinlock_stat.h | 141 +++--------------------------
 6 files changed, 183 insertions(+), 140 deletions(-)
 create mode 100644 kernel/locking/lock_events.c

diff --git a/arch/Kconfig b/arch/Kconfig
index 33687dddd86a7..28c0f1ad80d75 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -901,6 +901,16 @@ config HAVE_ARCH_PREL32_RELOCATIONS
 config ARCH_USE_MEMREMAP_PROT
 	bool
 
+config LOCK_EVENT_COUNTS
+	bool "Locking event counts collection"
+	depends on DEBUG_FS
+	depends on QUEUED_SPINLOCKS
+	---help---
+	  Enable light-weight counting of various locking related events
+	  in the system with minimal performance impact. This reduces
+	  the chance of application behavior change because of timing
+	  differences. The counts are reported via debugfs.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 84b184e016cd4..7d160f58a8f68 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -780,14 +780,6 @@ config PARAVIRT_SPINLOCKS
 
 	  If you are unsure how to answer this question, answer Y.
 
-config QUEUED_LOCK_STAT
-	bool "Paravirt queued spinlock statistics"
-	depends on PARAVIRT_SPINLOCKS && DEBUG_FS
-	---help---
-	  Enable the collection of statistical data on the slowpath
-	  behavior of paravirtualized queued spinlocks and report
-	  them on debugfs.
-
 source "arch/x86/xen/Kconfig"
 
 config KVM_GUEST
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 1af83e9ce57d5..6fe2f333aecb5 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -28,3 +28,4 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
 obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
+obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c
new file mode 100644
index 0000000000000..71c36d1fb8341
--- /dev/null
+++ b/kernel/locking/lock_events.c
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <waiman.long@hpe.com>
+ */
+
+/*
+ * Collect locking event counts
+ */
+#include <linux/debugfs.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/fs.h>
+
+#include "lock_events.h"
+
+#undef  LOCK_EVENT
+#define LOCK_EVENT(name)	[LOCKEVENT_ ## name] = #name,
+
+#define LOCK_EVENTS_DIR		"lock_event_counts"
+
+/*
+ * When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different
+ * types of locks will be reported under the <debugfs>/lock_event_counts/
+ * directory. See lock_events_list.h for the list of available locking
+ * events.
+ *
+ * Writing to the special ".reset_counts" file will reset all the above
+ * locking event counts. This is a very slow operation and so should not
+ * be done frequently.
+ *
+ * These event counts are implemented as per-cpu variables which are
+ * summed and computed whenever the corresponding debugfs files are read. This
+ * minimizes added overhead making the counts usable even in a production
+ * environment.
+ */
+static const char * const lockevent_names[lockevent_num + 1] = {
+
+#include "lock_events_list.h"
+
+	[LOCKEVENT_reset_cnts] = ".reset_counts",
+};
+
+/*
+ * Per-cpu counts
+ */
+DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
+
+/*
+ * The lockevent_read() function can be overridden.
+ */
+ssize_t __weak lockevent_read(struct file *file, char __user *user_buf,
+			      size_t count, loff_t *ppos)
+{
+	char buf[64];
+	int cpu, id, len;
+	u64 sum = 0;
+
+	/*
+	 * Get the counter ID stored in file->f_inode->i_private
+	 */
+	id = (long)file_inode(file)->i_private;
+
+	if (id >= lockevent_num)
+		return -EBADF;
+
+	for_each_possible_cpu(cpu)
+		sum += per_cpu(lockevents[id], cpu);
+	len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+/*
+ * Function to handle write request
+ *
+ * When idx = reset_cnts, reset all the counts.
+ */
+static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
+			   size_t count, loff_t *ppos)
+{
+	int cpu;
+
+	/*
+	 * Get the counter ID stored in file->f_inode->i_private
+	 */
+	if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
+		return count;
+
+	for_each_possible_cpu(cpu) {
+		int i;
+		unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
+
+		for (i = 0 ; i < lockevent_num; i++)
+			WRITE_ONCE(ptr[i], 0);
+	}
+	return count;
+}
+
+/*
+ * Debugfs data structures
+ */
+static const struct file_operations fops_lockevent = {
+	.read = lockevent_read,
+	.write = lockevent_write,
+	.llseek = default_llseek,
+};
+
+/*
+ * Initialize debugfs for the locking event counts.
+ */
+static int __init init_lockevent_counts(void)
+{
+	struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL);
+	int i;
+
+	if (!d_counts)
+		goto out;
+
+	/*
+	 * Create the debugfs files
+	 *
+	 * As reading from and writing to the stat files can be slow, only
+	 * root is allowed to do the read/write to limit impact to system
+	 * performance.
+	 */
+	for (i = 0; i < lockevent_num; i++)
+		if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
+					 (void *)(long)i, &fops_lockevent))
+			goto fail_undo;
+
+	if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
+				 d_counts, (void *)(long)LOCKEVENT_reset_cnts,
+				 &fops_lockevent))
+		goto fail_undo;
+
+	return 0;
+fail_undo:
+	debugfs_remove_recursive(d_counts);
+out:
+	pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR);
+	return -ENOMEM;
+}
+fs_initcall(init_lockevent_counts);
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h
index 4009e07b474a8..feb1acc546111 100644
--- a/kernel/locking/lock_events.h
+++ b/kernel/locking/lock_events.h
@@ -13,6 +13,9 @@
  * Authors: Waiman Long <longman@redhat.com>
  */
 
+#ifndef __LOCKING_LOCK_EVENTS_H
+#define __LOCKING_LOCK_EVENTS_H
+
 enum lock_events {
 
 #include "lock_events_list.h"
@@ -21,7 +24,7 @@ enum lock_events {
 	LOCKEVENT_reset_cnts = lockevent_num,
 };
 
-#ifdef CONFIG_QUEUED_LOCK_STAT
+#ifdef CONFIG_LOCK_EVENT_COUNTS
 /*
  * Per-cpu counters
  */
@@ -46,10 +49,11 @@ static inline void __lockevent_add(enum lock_events event, int inc)
 
 #define lockevent_add(ev, c)	__lockevent_add(LOCKEVENT_ ##ev, c)
 
-#else  /* CONFIG_QUEUED_LOCK_STAT */
+#else  /* CONFIG_LOCK_EVENT_COUNTS */
 
 #define lockevent_inc(ev)
 #define lockevent_add(ev, c)
 #define lockevent_cond_inc(ev, c)
 
-#endif /* CONFIG_QUEUED_LOCK_STAT */
+#endif /* CONFIG_LOCK_EVENT_COUNTS */
+#endif /* __LOCKING_LOCK_EVENTS_H */
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 1db5b375fcf49..54152670ff248 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -9,76 +9,29 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * Authors: Waiman Long <waiman.long@hpe.com>
+ * Authors: Waiman Long <longman@redhat.com>
  */
 
-/*
- * When queued spinlock statistical counters are enabled, the following
- * debugfs files will be created for reporting the counter values:
- *
- * <debugfs>/qlockstat/
- *   pv_hash_hops	- average # of hops per hashing operation
- *   pv_kick_unlock	- # of vCPU kicks issued at unlock time
- *   pv_kick_wake	- # of vCPU kicks used for computing pv_latency_wake
- *   pv_latency_kick	- average latency (ns) of vCPU kick operation
- *   pv_latency_wake	- average latency (ns) from vCPU kick to wakeup
- *   pv_lock_stealing	- # of lock stealing operations
- *   pv_spurious_wakeup	- # of spurious wakeups in non-head vCPUs
- *   pv_wait_again	- # of wait's after a queue head vCPU kick
- *   pv_wait_early	- # of early vCPU wait's
- *   pv_wait_head	- # of vCPU wait's at the queue head
- *   pv_wait_node	- # of vCPU wait's at a non-head queue node
- *   lock_pending	- # of locking operations via pending code
- *   lock_slowpath	- # of locking operations via MCS lock queue
- *   lock_use_node2	- # of locking operations that use 2nd per-CPU node
- *   lock_use_node3	- # of locking operations that use 3rd per-CPU node
- *   lock_use_node4	- # of locking operations that use 4th per-CPU node
- *   lock_no_node	- # of locking operations without using per-CPU node
- *
- * Subtracting lock_use_node[234] from lock_slowpath will give you
- * lock_use_node1.
- *
- * Writing to the special ".reset_counts" file will reset all the above
- * counter values.
- *
- * These statistical counters are implemented as per-cpu variables which are
- * summed and computed whenever the corresponding debugfs files are read. This
- * minimizes added overhead making the counters usable even in a production
- * environment.
- *
- * There may be slight difference between pv_kick_wake and pv_kick_unlock.
- */
 #include "lock_events.h"
 
-#ifdef CONFIG_QUEUED_LOCK_STAT
+#ifdef CONFIG_LOCK_EVENT_COUNTS
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
 /*
- * Collect pvqspinlock statistics
+ * Collect pvqspinlock locking event counts
  */
-#include <linux/debugfs.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <linux/fs.h>
 
 #define EVENT_COUNT(ev)	lockevents[LOCKEVENT_ ## ev]
 
-#undef  LOCK_EVENT
-#define LOCK_EVENT(name)	[LOCKEVENT_ ## name] = #name,
-
-static const char * const lockevent_names[lockevent_num + 1] = {
-
-#include "lock_events_list.h"
-
-	[LOCKEVENT_reset_cnts] = ".reset_counts",
-};
-
 /*
- * Per-cpu counters
+ * PV specific per-cpu counter
  */
-DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
 static DEFINE_PER_CPU(u64, pv_kick_time);
 
 /*
- * Function to read and return the qlock statistical counter values
+ * Function to read and return the PV qspinlock counts.
  *
  * The following counters are handled specially:
  * 1. pv_latency_kick
@@ -88,8 +41,8 @@ static DEFINE_PER_CPU(u64, pv_kick_time);
  * 3. pv_hash_hops
  *    Average hops/hash = pv_hash_hops/pv_kick_unlock
  */
-static ssize_t lockevent_read(struct file *file, char __user *user_buf,
-			      size_t count, loff_t *ppos)
+ssize_t lockevent_read(struct file *file, char __user *user_buf,
+		       size_t count, loff_t *ppos)
 {
 	char buf[64];
 	int cpu, id, len;
@@ -149,78 +102,6 @@ static ssize_t lockevent_read(struct file *file, char __user *user_buf,
 	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
 }
 
-/*
- * Function to handle write request
- *
- * When id = .reset_cnts, reset all the counter values.
- */
-static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
-			   size_t count, loff_t *ppos)
-{
-	int cpu;
-
-	/*
-	 * Get the counter ID stored in file->f_inode->i_private
-	 */
-	if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
-		return count;
-
-	for_each_possible_cpu(cpu) {
-		int i;
-		unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
-
-		for (i = 0 ; i < lockevent_num; i++)
-			WRITE_ONCE(ptr[i], 0);
-	}
-	return count;
-}
-
-/*
- * Debugfs data structures
- */
-static const struct file_operations fops_lockevent = {
-	.read = lockevent_read,
-	.write = lockevent_write,
-	.llseek = default_llseek,
-};
-
-/*
- * Initialize debugfs for the qspinlock statistical counters
- */
-static int __init init_qspinlock_stat(void)
-{
-	struct dentry *d_counts = debugfs_create_dir("qlockstat", NULL);
-	int i;
-
-	if (!d_counts)
-		goto out;
-
-	/*
-	 * Create the debugfs files
-	 *
-	 * As reading from and writing to the stat files can be slow, only
-	 * root is allowed to do the read/write to limit impact to system
-	 * performance.
-	 */
-	for (i = 0; i < lockevent_num; i++)
-		if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
-					 (void *)(long)i, &fops_lockevent))
-			goto fail_undo;
-
-	if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
-				 d_counts, (void *)(long)LOCKEVENT_reset_cnts,
-				 &fops_lockevent))
-		goto fail_undo;
-
-	return 0;
-fail_undo:
-	debugfs_remove_recursive(d_counts);
-out:
-	pr_warn("Could not create 'qlockstat' debugfs entries\n");
-	return -ENOMEM;
-}
-fs_initcall(init_qspinlock_stat);
-
 /*
  * PV hash hop count
  */
@@ -260,8 +141,10 @@ static inline void __pv_wait(u8 *ptr, u8 val)
 #define pv_kick(c)	__pv_kick(c)
 #define pv_wait(p, v)	__pv_wait(p, v)
 
-#else /* CONFIG_QUEUED_LOCK_STAT */
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+#else /* CONFIG_LOCK_EVENT_COUNTS */
 
 static inline void lockevent_pv_hop(int hopcnt)	{ }
 
-#endif /* CONFIG_QUEUED_LOCK_STAT */
+#endif /* CONFIG_LOCK_EVENT_COUNTS */
-- 
GitLab


From bf20616f46e536fe8affed6f138db4b3040b55a6 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:18 -0400
Subject: [PATCH 0890/1861] locking/lock_events: Don't show pvqspinlock events
 on bare metal

On bare metal, the pvqspinlock event counts will always be 0. So there
is no point in showing their corresponding debugfs files. So they are
skipped in this case.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-10-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lock_events.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c
index 71c36d1fb8341..fa2c2f951c6b5 100644
--- a/kernel/locking/lock_events.c
+++ b/kernel/locking/lock_events.c
@@ -115,6 +115,29 @@ static const struct file_operations fops_lockevent = {
 	.llseek = default_llseek,
 };
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#include <asm/paravirt.h>
+
+static bool __init skip_lockevent(const char *name)
+{
+	static int pv_on __initdata = -1;
+
+	if (pv_on < 0)
+		pv_on = !pv_is_native_spin_unlock();
+	/*
+	 * Skip PV qspinlock events on bare metal.
+	 */
+	if (!pv_on && !memcmp(name, "pv_", 3))
+		return true;
+	return false;
+}
+#else
+static inline bool skip_lockevent(const char *name)
+{
+	return false;
+}
+#endif
+
 /*
  * Initialize debugfs for the locking event counts.
  */
@@ -133,10 +156,13 @@ static int __init init_lockevent_counts(void)
 	 * root is allowed to do the read/write to limit impact to system
 	 * performance.
 	 */
-	for (i = 0; i < lockevent_num; i++)
+	for (i = 0; i < lockevent_num; i++) {
+		if (skip_lockevent(lockevent_names[i]))
+			continue;
 		if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
 					 (void *)(long)i, &fops_lockevent))
 			goto fail_undo;
+	}
 
 	if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
 				 d_counts, (void *)(long)LOCKEVENT_reset_cnts,
-- 
GitLab


From a8654596f0371c2604c4d475422c48f4fc6a56c9 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:19 -0400
Subject: [PATCH 0891/1861] locking/rwsem: Enable lock event counting

Add lock event counting calls so that we can track the number of lock
events happening in the rwsem code.

With CONFIG_LOCK_EVENT_COUNTS on and booting a 4-socket 112-thread x86-64
system, the rwsem counts after system bootup were as follows:

  rwsem_opt_fail=261
  rwsem_opt_wlock=50636
  rwsem_rlock=445
  rwsem_rlock_fail=0
  rwsem_rlock_fast=22
  rwsem_rtrylock=810144
  rwsem_sleep_reader=441
  rwsem_sleep_writer=310
  rwsem_wake_reader=355
  rwsem_wake_writer=2335
  rwsem_wlock=261
  rwsem_wlock_fail=0
  rwsem_wtrylock=20583

It can be seen that most of the lock acquisitions in the slowpath were
write-locks in the optimistic spinning code path with no sleeping at
all. For this system, over 97% of the locks are acquired via optimistic
spinning. It illustrates the importance of optimistic spinning in
improving the performance of rwsem.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-11-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig                      |  1 -
 kernel/locking/lock_events_list.h | 17 +++++++++++++++++
 kernel/locking/rwsem-xadd.c       | 11 +++++++++++
 kernel/locking/rwsem.h            |  4 ++++
 4 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 28c0f1ad80d75..02cb4e6a3e380 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -904,7 +904,6 @@ config ARCH_USE_MEMREMAP_PROT
 config LOCK_EVENT_COUNTS
 	bool "Locking event counts collection"
 	depends on DEBUG_FS
-	depends on QUEUED_SPINLOCKS
 	---help---
 	  Enable light-weight counting of various locking related events
 	  in the system with minimal performance impact. This reduces
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index 8b4d2e180475e..ad7668cfc9dad 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -48,3 +48,20 @@ LOCK_EVENT(lock_use_node3)	/* # of locking ops that use 3rd percpu node */
 LOCK_EVENT(lock_use_node4)	/* # of locking ops that use 4th percpu node */
 LOCK_EVENT(lock_no_node)	/* # of locking ops w/o using percpu node    */
 #endif /* CONFIG_QUEUED_SPINLOCKS */
+
+/*
+ * Locking events for rwsem
+ */
+LOCK_EVENT(rwsem_sleep_reader)	/* # of reader sleeps			*/
+LOCK_EVENT(rwsem_sleep_writer)	/* # of writer sleeps			*/
+LOCK_EVENT(rwsem_wake_reader)	/* # of reader wakeups			*/
+LOCK_EVENT(rwsem_wake_writer)	/* # of writer wakeups			*/
+LOCK_EVENT(rwsem_opt_wlock)	/* # of write locks opt-spin acquired	*/
+LOCK_EVENT(rwsem_opt_fail)	/* # of failed opt-spinnings		*/
+LOCK_EVENT(rwsem_rlock)		/* # of read locks acquired		*/
+LOCK_EVENT(rwsem_rlock_fast)	/* # of fast read locks acquired	*/
+LOCK_EVENT(rwsem_rlock_fail)	/* # of failed read lock acquisitions	*/
+LOCK_EVENT(rwsem_rtrylock)	/* # of read trylock calls		*/
+LOCK_EVENT(rwsem_wlock)		/* # of write locks acquired		*/
+LOCK_EVENT(rwsem_wlock_fail)	/* # of failed write lock acquisitions	*/
+LOCK_EVENT(rwsem_wtrylock)	/* # of write trylock calls		*/
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index f6198e1a58f6b..6b3ee9948bf17 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 			 * will notice the queued writer.
 			 */
 			wake_q_add(wake_q, waiter->task);
+			lockevent_inc(rwsem_wake_writer);
 		}
 
 		return;
@@ -214,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 	}
 
 	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+	lockevent_cond_inc(rwsem_wake_reader, woken);
 	if (list_empty(&sem->wait_list)) {
 		/* hit end of list above */
 		adjustment -= RWSEM_WAITING_BIAS;
@@ -265,6 +267,7 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
 					count + RWSEM_ACTIVE_WRITE_BIAS)) {
 			rwsem_set_owner(sem);
+			lockevent_inc(rwsem_opt_wlock);
 			return true;
 		}
 	}
@@ -389,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 	osq_unlock(&sem->osq);
 done:
 	preempt_enable();
+	lockevent_cond_inc(rwsem_opt_fail, !taken);
 	return taken;
 }
 
@@ -436,6 +440,7 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
 		if (atomic_long_read(&sem->count) >= 0) {
 			raw_spin_unlock_irq(&sem->wait_lock);
 			rwsem_set_reader_owned(sem);
+			lockevent_inc(rwsem_rlock_fast);
 			return sem;
 		}
 		adjustment += RWSEM_WAITING_BIAS;
@@ -472,9 +477,11 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
 			break;
 		}
 		schedule();
+		lockevent_inc(rwsem_sleep_reader);
 	}
 
 	__set_current_state(TASK_RUNNING);
+	lockevent_inc(rwsem_rlock);
 	return sem;
 out_nolock:
 	list_del(&waiter.list);
@@ -482,6 +489,7 @@ out_nolock:
 		atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
 	raw_spin_unlock_irq(&sem->wait_lock);
 	__set_current_state(TASK_RUNNING);
+	lockevent_inc(rwsem_rlock_fail);
 	return ERR_PTR(-EINTR);
 }
 
@@ -575,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 				goto out_nolock;
 
 			schedule();
+			lockevent_inc(rwsem_sleep_writer);
 			set_current_state(state);
 		} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
 
@@ -583,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 	__set_current_state(TASK_RUNNING);
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
+	lockevent_inc(rwsem_wlock);
 
 	return ret;
 
@@ -596,6 +606,7 @@ out_nolock:
 		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 	raw_spin_unlock_irq(&sem->wait_lock);
 	wake_up_q(&wake_q);
+	lockevent_inc(rwsem_wlock_fail);
 
 	return ERR_PTR(-EINTR);
 }
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 3059a2dc39f8e..37db17890e36a 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -23,6 +23,8 @@
  * is involved. Ideally we would like to track all the readers that own
  * a rwsem, but the overhead is simply too big.
  */
+#include "lock_events.h"
+
 #define RWSEM_READER_OWNED	(1UL << 0)
 #define RWSEM_ANONYMOUSLY_OWNED	(1UL << 1)
 
@@ -200,6 +202,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 	 */
 	long tmp = RWSEM_UNLOCKED_VALUE;
 
+	lockevent_inc(rwsem_rtrylock);
 	do {
 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
 					tmp + RWSEM_ACTIVE_READ_BIAS)) {
@@ -241,6 +244,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
 	long tmp;
 
+	lockevent_inc(rwsem_wtrylock);
 	tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
 		      RWSEM_ACTIVE_WRITE_BIAS);
 	if (tmp == RWSEM_UNLOCKED_VALUE) {
-- 
GitLab


From 364f784f048c984721986db90c95ca8350213c91 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 4 Apr 2019 13:43:20 -0400
Subject: [PATCH 0892/1861] locking/rwsem: Optimize rwsem structure for
 uncontended lock acquisition

For an uncontended rwsem, count and owner are the only fields a task
needs to touch when acquiring the rwsem. So they are put next to each
other to increase the chance that they will share the same cacheline.

On a ThunderX2 99xx (arm64) system with 32K L1 cache and 256K L2
cache, a rwsem locking microbenchmark with one locking thread was
run to write-lock and write-unlock an array of rwsems separated 2
cachelines apart in a 1M byte memory block. The locking rates (kops/s)
of the microbenchmark when the rwsems are at various "long" (8-byte)
offsets from beginning of the cacheline before and after the patch were
as follows:

  Cacheline Offset   Pre-patch    Post-patch
  ----------------   ---------    ----------
        0             17,449        16,588
        1             17,450        16,465
	2             17,450        16,460
	3             17,453        16,462
	4             14,867        16,471
	5             14,867        16,470
	6             14,853        16,464
	7             14,867        13,172

Before the patch, the count and owner are 4 "long"s apart. After the
patch, they are only 1 "long" apart.

The rwsem data have to be loaded from the L3 cache for each access. It
can be seen that the locking rates are more consistent after the patch
than before. Note that for this particular system, the performance
drop happens whenever the count and owner are at an odd multiples of
"long"s apart. No performance drop was observed when only a single rwsem
was used (hot cache). So the drop is likely just an idiosyncrasy of the
cache architecture of this chip than an inherent problem with the patch.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-12-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rwsem.h | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index b44e533235c73..2ea18a3def045 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -20,21 +20,30 @@
 #include <linux/osq_lock.h>
 #endif
 
-struct rw_semaphore;
-
-/* All arch specific implementations share the same struct */
+/*
+ * For an uncontended rwsem, count and owner are the only fields a task
+ * needs to touch when acquiring the rwsem. So they are put next to each
+ * other to increase the chance that they will share the same cacheline.
+ *
+ * In a contended rwsem, the owner is likely the most frequently accessed
+ * field in the structure as the optimistic waiter that holds the osq lock
+ * will spin on owner. For an embedded rwsem, other hot fields in the
+ * containing structure should be moved further away from the rwsem to
+ * reduce the chance that they will share the same cacheline causing
+ * cacheline bouncing problem.
+ */
 struct rw_semaphore {
 	atomic_long_t count;
-	struct list_head wait_list;
-	raw_spinlock_t wait_lock;
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-	struct optimistic_spin_queue osq; /* spinner MCS lock */
 	/*
 	 * Write owner. Used as a speculative check to see
 	 * if the owner is running on the cpu.
 	 */
 	struct task_struct *owner;
+	struct optimistic_spin_queue osq; /* spinner MCS lock */
 #endif
+	raw_spinlock_t wait_lock;
+	struct list_head wait_list;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
-- 
GitLab


From 9752c37cc89f43675e70cf9acff23519fa84b48c Mon Sep 17 00:00:00 2001
From: Vitor Soares <vitor.soares@synopsys.com>
Date: Tue, 9 Apr 2019 18:59:59 +0200
Subject: [PATCH 0893/1861] i3c: Fix the verification of random PID

The validation of random PID should be done by checking the
boardinfo->pid instead of info.pid which is empty.

Doing the change the info struture declaration is no longer necessary.

Cc: Boris Brezillon <bbrezillon@kernel.org>
Cc: <stable@vger.kernel.org>
Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure")
Signed-off-by: Vitor Soares <vitor.soares@synopsys.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/i3c/master.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 2dc628d4f1aee..1412abcff0109 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -1980,7 +1980,6 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master,
 {
 	struct i3c_dev_boardinfo *boardinfo;
 	struct device *dev = &master->dev;
-	struct i3c_device_info info = { };
 	enum i3c_addr_slot_status addrstatus;
 	u32 init_dyn_addr = 0;
 
@@ -2012,8 +2011,8 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master,
 
 	boardinfo->pid = ((u64)reg[1] << 32) | reg[2];
 
-	if ((info.pid & GENMASK_ULL(63, 48)) ||
-	    I3C_PID_RND_LOWER_32BITS(info.pid))
+	if ((boardinfo->pid & GENMASK_ULL(63, 48)) ||
+	    I3C_PID_RND_LOWER_32BITS(boardinfo->pid))
 		return -EINVAL;
 
 	boardinfo->init_dyn_addr = init_dyn_addr;
-- 
GitLab


From 907621e94d49b85cd76f13110eceb940a182c69e Mon Sep 17 00:00:00 2001
From: Vitor Soares <vitor.soares@synopsys.com>
Date: Mon, 8 Apr 2019 13:13:34 +0200
Subject: [PATCH 0894/1861] i3c: dw: Fix dw_i3c_master_disable controller by
 using correct mask

The controller was being disabled incorrectly. The correct way is to clear
the DEV_CTRL_ENABLE bit.

Fix this by clearing this bit.

Cc: Boris Brezillon <bbrezillon@kernel.org>
Cc: <stable@vger.kernel.org>
Fixes: 1dd728f5d4d4 ("i3c: master: Add driver for Synopsys DesignWare IP")
Signed-off-by: Vitor Soares <vitor.soares@synopsys.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/i3c/master/dw-i3c-master.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index 59279224e07fc..10c26ffaa8eff 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -300,7 +300,7 @@ to_dw_i3c_master(struct i3c_master_controller *master)
 
 static void dw_i3c_master_disable(struct dw_i3c_master *master)
 {
-	writel(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_ENABLE,
+	writel(readl(master->regs + DEVICE_CTRL) & ~DEV_CTRL_ENABLE,
 	       master->regs + DEVICE_CTRL);
 }
 
-- 
GitLab


From 709a53e1948494cc4f6c4636c6f84a4d36a8117e Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Sat, 30 Mar 2019 09:02:14 +0100
Subject: [PATCH 0895/1861] MAINTAINERS: Fix the I3C entry

There's no include/dt-bindings/i3c/ directory, remove this F: entry
from the I3C file patterns.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Joe Perches <joe@perches.com>
Reported-by: Joe Perches <joe@perches.com>
Fixes: 4f26d0666961 ("MAINTAINERS: Add myself as the I3C subsystem maintainer")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2359e12e4c41e..30d3010c8825e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7333,7 +7333,6 @@ F:	Documentation/devicetree/bindings/i3c/
 F:	Documentation/driver-api/i3c
 F:	drivers/i3c/
 F:	include/linux/i3c/
-F:	include/dt-bindings/i3c/
 
 I3C DRIVER FOR SYNOPSYS DESIGNWARE
 M:	Vitor Soares <vitor.soares@synopsys.com>
-- 
GitLab


From 3966c3feca3fd10b2935caa0b4a08c7dd59469e5 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 2 Apr 2019 15:21:18 +0000
Subject: [PATCH 0896/1861] x86/perf/amd: Remove need to check "running" bit in
 NMI handler

Spurious interrupt support was added to perf in the following commit, almost
a decade ago:

  63e6be6d98e1 ("perf, x86: Catch spurious interrupts after disabling counters")

The two previous patches (resolving the race condition when disabling a
PMC and NMI latency mitigation) allow for the removal of this older
spurious interrupt support.

Currently in x86_pmu_stop(), the bit for the PMC in the active_mask bitmap
is cleared before disabling the PMC, which sets up a race condition. This
race condition was mitigated by introducing the running bitmap. That race
condition can be eliminated by first disabling the PMC, waiting for PMC
reset on overflow and then clearing the bit for the PMC in the active_mask
bitmap. The NMI handler will not re-enable a disabled counter.

If x86_pmu_stop() is called from the perf NMI handler, the NMI latency
mitigation support will guard against any unhandled NMI messages.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org> # 4.14.x-
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: https://lkml.kernel.org/r/Message-ID:
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/amd/core.c | 21 +++++++++++++++++++--
 arch/x86/events/core.c     | 13 +++----------
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 34c191453ce33..0ecfac84ba911 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -4,8 +4,8 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
-#include <linux/nmi.h>
 #include <asm/apicdef.h>
+#include <asm/nmi.h>
 
 #include "../perf_event.h"
 
@@ -491,6 +491,23 @@ static void amd_pmu_disable_all(void)
 	}
 }
 
+static void amd_pmu_disable_event(struct perf_event *event)
+{
+	x86_pmu_disable_event(event);
+
+	/*
+	 * This can be called from NMI context (via x86_pmu_stop). The counter
+	 * may have overflowed, but either way, we'll never see it get reset
+	 * by the NMI if we're already in the NMI. And the NMI latency support
+	 * below will take care of any pending NMI that might have been
+	 * generated by the overflow.
+	 */
+	if (in_nmi())
+		return;
+
+	amd_pmu_wait_on_overflow(event->hw.idx);
+}
+
 /*
  * Because of NMI latency, if multiple PMC counters are active or other sources
  * of NMIs are received, the perf NMI handler can handle one or more overflowed
@@ -738,7 +755,7 @@ static __initconst const struct x86_pmu amd_pmu = {
 	.disable_all		= amd_pmu_disable_all,
 	.enable_all		= x86_pmu_enable_all,
 	.enable			= x86_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
+	.disable		= amd_pmu_disable_event,
 	.hw_config		= amd_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_K7_EVNTSEL0,
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e2b1447192a88..81911e11a15df 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1349,8 +1349,9 @@ void x86_pmu_stop(struct perf_event *event, int flags)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+	if (test_bit(hwc->idx, cpuc->active_mask)) {
 		x86_pmu.disable(event);
+		__clear_bit(hwc->idx, cpuc->active_mask);
 		cpuc->events[hwc->idx] = NULL;
 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 		hwc->state |= PERF_HES_STOPPED;
@@ -1447,16 +1448,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		if (!test_bit(idx, cpuc->active_mask)) {
-			/*
-			 * Though we deactivated the counter some cpus
-			 * might still deliver spurious interrupts still
-			 * in flight. Catch them:
-			 */
-			if (__test_and_clear_bit(idx, cpuc->running))
-				handled++;
+		if (!test_bit(idx, cpuc->active_mask))
 			continue;
-		}
 
 		event = cpuc->events[idx];
 
-- 
GitLab


From d6ba3f815bc5f3c4249d15c8bc5fbb012651b4a4 Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@google.com>
Date: Mon, 8 Apr 2019 17:08:58 +0800
Subject: [PATCH 0897/1861] ASoC: Intel: kbl: fix wrong number of channels

Fix wrong setting on number of channels.  The context wants to set
constraint to 2 channels instead of 4.

Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
index 7044d8c2b1873..879f14257a3ea 100644
--- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
@@ -405,7 +405,7 @@ static const struct snd_pcm_hw_constraint_list constraints_dmic_channels = {
 };
 
 static const unsigned int dmic_2ch[] = {
-	4,
+	2,
 };
 
 static const struct snd_pcm_hw_constraint_list constraints_dmic_2ch = {
-- 
GitLab


From e37c2deafe7058cf7989c4c47bbf1140cc867d89 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@st.com>
Date: Wed, 10 Apr 2019 10:08:36 +0200
Subject: [PATCH 0898/1861] ASoC: stm32: sai: fix master clock management

When master clock is used, master clock rate is set exclusively.
Parent clocks of master clock cannot be changed after a call to
clk_set_rate_exclusive(). So the parent clock of SAI kernel clock
must be set before.
Ensure also that exclusive rate operations are balanced
in STM32 SAI driver.

Signed-off-by: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai_sub.c | 64 +++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index 83d8a7ac56f42..d7045aa520de5 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -70,6 +70,7 @@
 #define SAI_IEC60958_STATUS_BYTES	24
 
 #define SAI_MCLK_NAME_LEN		32
+#define SAI_RATE_11K			11025
 
 /**
  * struct stm32_sai_sub_data - private data of SAI sub block (block A or B)
@@ -309,6 +310,25 @@ static int stm32_sai_set_clk_div(struct stm32_sai_sub_data *sai,
 	return ret;
 }
 
+static int stm32_sai_set_parent_clock(struct stm32_sai_sub_data *sai,
+				      unsigned int rate)
+{
+	struct platform_device *pdev = sai->pdev;
+	struct clk *parent_clk = sai->pdata->clk_x8k;
+	int ret;
+
+	if (!(rate % SAI_RATE_11K))
+		parent_clk = sai->pdata->clk_x11k;
+
+	ret = clk_set_parent(sai->sai_ck, parent_clk);
+	if (ret)
+		dev_err(&pdev->dev, " Error %d setting sai_ck parent clock. %s",
+			ret, ret == -EBUSY ?
+			"Active stream rates conflict\n" : "\n");
+
+	return ret;
+}
+
 static long stm32_sai_mclk_round_rate(struct clk_hw *hw, unsigned long rate,
 				      unsigned long *prate)
 {
@@ -490,25 +510,29 @@ static int stm32_sai_set_sysclk(struct snd_soc_dai *cpu_dai,
 	struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai);
 	int ret;
 
-	if (dir == SND_SOC_CLOCK_OUT) {
+	if (dir == SND_SOC_CLOCK_OUT && sai->sai_mclk) {
 		ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX,
 					 SAI_XCR1_NODIV,
 					 (unsigned int)~SAI_XCR1_NODIV);
 		if (ret < 0)
 			return ret;
 
-		dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq);
-		sai->mclk_rate = freq;
+		/* If master clock is used, set parent clock now */
+		ret = stm32_sai_set_parent_clock(sai, freq);
+		if (ret)
+			return ret;
 
-		if (sai->sai_mclk) {
-			ret = clk_set_rate_exclusive(sai->sai_mclk,
-						     sai->mclk_rate);
-			if (ret) {
-				dev_err(cpu_dai->dev,
-					"Could not set mclk rate\n");
-				return ret;
-			}
+		ret = clk_set_rate_exclusive(sai->sai_mclk, freq);
+		if (ret) {
+			dev_err(cpu_dai->dev,
+				ret == -EBUSY ?
+				"Active streams have incompatible rates" :
+				"Could not set mclk rate\n");
+			return ret;
 		}
+
+		dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq);
+		sai->mclk_rate = freq;
 	}
 
 	return 0;
@@ -916,11 +940,13 @@ static int stm32_sai_configure_clock(struct snd_soc_dai *cpu_dai,
 	int div = 0, cr1 = 0;
 	int sai_clk_rate, mclk_ratio, den;
 	unsigned int rate = params_rate(params);
+	int ret;
 
-	if (!(rate % 11025))
-		clk_set_parent(sai->sai_ck, sai->pdata->clk_x11k);
-	else
-		clk_set_parent(sai->sai_ck, sai->pdata->clk_x8k);
+	if (!sai->sai_mclk) {
+		ret = stm32_sai_set_parent_clock(sai, rate);
+		if (ret)
+			return ret;
+	}
 	sai_clk_rate = clk_get_rate(sai->sai_ck);
 
 	if (STM_SAI_IS_F4(sai->pdata)) {
@@ -1079,9 +1105,13 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream,
 	regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV,
 			   SAI_XCR1_NODIV);
 
-	clk_disable_unprepare(sai->sai_ck);
+	/* Release mclk rate only if rate was actually set */
+	if (sai->mclk_rate) {
+		clk_rate_exclusive_put(sai->sai_mclk);
+		sai->mclk_rate = 0;
+	}
 
-	clk_rate_exclusive_put(sai->sai_mclk);
+	clk_disable_unprepare(sai->sai_ck);
 
 	spin_lock_irqsave(&sai->irq_lock, flags);
 	sai->substream = NULL;
-- 
GitLab


From e33c1b9923775d17ad246946fe67fcb9be288677 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 8 Apr 2019 09:07:06 -0700
Subject: [PATCH 0899/1861] apparmor: Restore Y/N in /sys for apparmor's
 "enabled"

Before commit c5459b829b71 ("LSM: Plumb visibility into optional "enabled"
state"), /sys/module/apparmor/parameters/enabled would show "Y" or "N"
since it was using the "bool" handler. After being changed to "int",
this switched to "1" or "0", breaking the userspace AppArmor detection
of dbus-broker. This restores the Y/N output while keeping the LSM
infrastructure happy.

Before:
	$ cat /sys/module/apparmor/parameters/enabled
	1

After:
	$ cat /sys/module/apparmor/parameters/enabled
	Y

Reported-by: David Rheinsberg <david.rheinsberg@gmail.com>
Reviewed-by: David Rheinsberg <david.rheinsberg@gmail.com>
Link: https://lkml.kernel.org/r/CADyDSO6k8vYb1eryT4g6+EHrLCvb68GAbHVWuULkYjcZcYNhhw@mail.gmail.com
Fixes: c5459b829b71 ("LSM: Plumb visibility into optional "enabled" state")
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lsm.c | 49 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 49d664ddff444..87500bde5a92d 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1336,9 +1336,16 @@ module_param_named(path_max, aa_g_path_max, aauint, S_IRUSR);
 bool aa_g_paranoid_load = true;
 module_param_named(paranoid_load, aa_g_paranoid_load, aabool, S_IRUGO);
 
+static int param_get_aaintbool(char *buffer, const struct kernel_param *kp);
+static int param_set_aaintbool(const char *val, const struct kernel_param *kp);
+#define param_check_aaintbool param_check_int
+static const struct kernel_param_ops param_ops_aaintbool = {
+	.set = param_set_aaintbool,
+	.get = param_get_aaintbool
+};
 /* Boot time disable flag */
 static int apparmor_enabled __lsm_ro_after_init = 1;
-module_param_named(enabled, apparmor_enabled, int, 0444);
+module_param_named(enabled, apparmor_enabled, aaintbool, 0444);
 
 static int __init apparmor_enabled_setup(char *str)
 {
@@ -1413,6 +1420,46 @@ static int param_get_aauint(char *buffer, const struct kernel_param *kp)
 	return param_get_uint(buffer, kp);
 }
 
+/* Can only be set before AppArmor is initialized (i.e. on boot cmdline). */
+static int param_set_aaintbool(const char *val, const struct kernel_param *kp)
+{
+	struct kernel_param kp_local;
+	bool value;
+	int error;
+
+	if (apparmor_initialized)
+		return -EPERM;
+
+	/* Create local copy, with arg pointing to bool type. */
+	value = !!*((int *)kp->arg);
+	memcpy(&kp_local, kp, sizeof(kp_local));
+	kp_local.arg = &value;
+
+	error = param_set_bool(val, &kp_local);
+	if (!error)
+		*((int *)kp->arg) = *((bool *)kp_local.arg);
+	return error;
+}
+
+/*
+ * To avoid changing /sys/module/apparmor/parameters/enabled from Y/N to
+ * 1/0, this converts the "int that is actually bool" back to bool for
+ * display in the /sys filesystem, while keeping it "int" for the LSM
+ * infrastructure.
+ */
+static int param_get_aaintbool(char *buffer, const struct kernel_param *kp)
+{
+	struct kernel_param kp_local;
+	bool value;
+
+	/* Create local copy, with arg pointing to bool type. */
+	value = !!*((int *)kp->arg);
+	memcpy(&kp_local, kp, sizeof(kp_local));
+	kp_local.arg = &value;
+
+	return param_get_bool(buffer, &kp_local);
+}
+
 static int param_get_audit(char *buffer, const struct kernel_param *kp)
 {
 	if (!apparmor_enabled)
-- 
GitLab


From 70802487bb9145a4f8b26f5a11d0e7f83c25100a Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Mon, 8 Apr 2019 12:30:25 -0700
Subject: [PATCH 0900/1861] ASoC: pcm: fix error handling when try_module_get()
 fails.

Handle error before returning when try_module_get() fails
to prevent inconsistent mutex lock/unlock.

Fixes: 52034add7 (ASoC: pcm: update module refcount if
		  module_get_upon_open is set)
Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-pcm.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index d21247546f7f7..be80a12fba27c 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -518,8 +518,10 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 			continue;
 
 		if (component->driver->module_get_upon_open &&
-		    !try_module_get(component->dev->driver->owner))
-			return -ENODEV;
+		    !try_module_get(component->dev->driver->owner)) {
+			ret = -ENODEV;
+			goto module_err;
+		}
 
 		ret = component->driver->ops->open(substream);
 		if (ret < 0) {
@@ -636,7 +638,7 @@ codec_dai_err:
 
 component_err:
 	soc_pcm_components_close(substream, component);
-
+module_err:
 	if (cpu_dai->driver->ops->shutdown)
 		cpu_dai->driver->ops->shutdown(substream, cpu_dai);
 out:
-- 
GitLab


From 90c1cba2b3b3851c151229f61801919b2904d437 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 3 Apr 2019 16:35:52 -0700
Subject: [PATCH 0901/1861] locking/lockdep: Zap lock classes even with lock
 debugging disabled

The following commit:

  a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use")

changed the behavior of lockdep_free_key_range() from
unconditionally zapping lock classes into only zapping lock classes if
debug_lock == true. Not zapping lock classes if debug_lock == false leaves
dangling pointers in several lockdep datastructures, e.g. lock_class::name
in the all_lock_classes list.

The shell command "cat /proc/lockdep" causes the kernel to iterate the
all_lock_classes list. Hence the "unable to handle kernel paging request" cash
that Shenghui encountered by running cat /proc/lockdep.

Since the new behavior can cause cat /proc/lockdep to crash, restore the
pre-v5.1 behavior.

This patch avoids that cat /proc/lockdep triggers the following crash
with debug_lock == false:

  BUG: unable to handle kernel paging request at fffffbfff40ca448
  RIP: 0010:__asan_load1+0x28/0x50
  Call Trace:
   string+0xac/0x180
   vsnprintf+0x23e/0x820
   seq_vprintf+0x82/0xc0
   seq_printf+0x92/0xb0
   print_name+0x34/0xb0
   l_show+0x184/0x200
   seq_read+0x59e/0x6c0
   proc_reg_read+0x11f/0x170
   __vfs_read+0x4d/0x90
   vfs_read+0xc5/0x1f0
   ksys_read+0xab/0x130
   __x64_sys_read+0x43/0x50
   do_syscall_64+0x71/0x210
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

Reported-by: shenghui <shhuiw@foxmail.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Fixes: a0b0fd53e1e6 ("locking/lockdep: Free lock classes that are no longer in use") # v5.1-rc1.
Link: https://lkml.kernel.org/r/20190403233552.124673-1-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 34cdcbedda492..e16766ff184b5 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4689,8 +4689,8 @@ static void free_zapped_rcu(struct rcu_head *ch)
 		return;
 
 	raw_local_irq_save(flags);
-	if (!graph_lock())
-		goto out_irq;
+	arch_spin_lock(&lockdep_lock);
+	current->lockdep_recursion = 1;
 
 	/* closed head */
 	pf = delayed_free.pf + (delayed_free.index ^ 1);
@@ -4702,8 +4702,8 @@ static void free_zapped_rcu(struct rcu_head *ch)
 	 */
 	call_rcu_zapped(delayed_free.pf + delayed_free.index);
 
-	graph_unlock();
-out_irq:
+	current->lockdep_recursion = 0;
+	arch_spin_unlock(&lockdep_lock);
 	raw_local_irq_restore(flags);
 }
 
@@ -4744,21 +4744,17 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
 {
 	struct pending_free *pf;
 	unsigned long flags;
-	int locked;
 
 	init_data_structures_once();
 
 	raw_local_irq_save(flags);
-	locked = graph_lock();
-	if (!locked)
-		goto out_irq;
-
+	arch_spin_lock(&lockdep_lock);
+	current->lockdep_recursion = 1;
 	pf = get_pending_free();
 	__lockdep_free_key_range(pf, start, size);
 	call_rcu_zapped(pf);
-
-	graph_unlock();
-out_irq:
+	current->lockdep_recursion = 0;
+	arch_spin_unlock(&lockdep_lock);
 	raw_local_irq_restore(flags);
 
 	/*
@@ -4911,9 +4907,8 @@ void lockdep_unregister_key(struct lock_class_key *key)
 		return;
 
 	raw_local_irq_save(flags);
-	if (!graph_lock())
-		goto out_irq;
-
+	arch_spin_lock(&lockdep_lock);
+	current->lockdep_recursion = 1;
 	pf = get_pending_free();
 	hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
 		if (k == key) {
@@ -4925,8 +4920,8 @@ void lockdep_unregister_key(struct lock_class_key *key)
 	WARN_ON_ONCE(!found);
 	__lockdep_free_key_range(pf, key, 1);
 	call_rcu_zapped(pf);
-	graph_unlock();
-out_irq:
+	current->lockdep_recursion = 0;
+	arch_spin_unlock(&lockdep_lock);
 	raw_local_irq_restore(flags);
 
 	/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
-- 
GitLab


From e9f33a8fee53c2d4bcdeec9a89478b4bf17bfbbc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 9 Apr 2019 14:11:20 +0200
Subject: [PATCH 0902/1861] mac80211: fix RX STBC override byte order

The original patch neglected to take byte order conversions
into account, fix that.

Fixes: d9bb410888ce ("mac80211: allow overriding HT STBC capabilities")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ht.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index e03c46ac8e4d8..c62101857b9b9 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -112,8 +112,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 			     IEEE80211_HT_CAP_TX_STBC);
 
 	/* Allow user to configure RX STBC bits */
-	if (ht_capa_mask->cap_info & IEEE80211_HT_CAP_RX_STBC)
-		ht_cap->cap |= ht_capa->cap_info & IEEE80211_HT_CAP_RX_STBC;
+	if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC))
+		ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) &
+					IEEE80211_HT_CAP_RX_STBC;
 
 	/* Allow user to decrease AMPDU factor */
 	if (ht_capa_mask->ampdu_params_info &
-- 
GitLab


From 07d7e12091f4ab869cc6a4bb276399057e73b0b3 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Sun, 7 Apr 2019 21:15:42 -0700
Subject: [PATCH 0903/1861] alarmtimer: Return correct remaining time

To calculate a remaining time, it's required to subtract the current time
from the expiration time. In alarm_timer_remaining() the arguments of
ktime_sub are swapped.

Fixes: d653d8457c76 ("alarmtimer: Implement remaining callback")
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20190408041542.26338-1-avagin@gmail.com
---
 kernel/time/alarmtimer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 2c97e8c2d29fb..0519a8805aab3 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -594,7 +594,7 @@ static ktime_t alarm_timer_remaining(struct k_itimer *timr, ktime_t now)
 {
 	struct alarm *alarm = &timr->it.alarm.alarmtimer;
 
-	return ktime_sub(now, alarm->node.expires);
+	return ktime_sub(alarm->node.expires, now);
 }
 
 /**
-- 
GitLab


From d7a181da2dfa3190487c446042ba01e07d851c74 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 10 Apr 2019 12:49:55 +0200
Subject: [PATCH 0904/1861] ALSA: hda: Fix racy display power access

snd_hdac_display_power() doesn't handle the concurrent calls carefully
enough, and it may lead to the doubly get_power or put_power calls,
when a runtime PM and an async work get called in racy way.

This patch addresses it by reusing the bus->lock mutex that has been
used for protecting the link state change in ext bus code, so that it
can protect against racy display state changes.  The initialization of
bus->lock was moved from snd_hdac_ext_bus_init() to
snd_hdac_bus_init() as well accordingly.

Testcase: igt/i915_pm_rpm/module-reload #glk-dsi
Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/hda/ext/hdac_ext_bus.c | 1 -
 sound/hda/hdac_bus.c         | 1 +
 sound/hda/hdac_component.c   | 6 +++++-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 9c37d9af3023f..ec7715c6b0c02 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -107,7 +107,6 @@ int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 	INIT_LIST_HEAD(&bus->hlink_list);
 	bus->idx = idx++;
 
-	mutex_init(&bus->lock);
 	bus->cmd_dma_state = true;
 
 	return 0;
diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c
index 012305177f682..ad8eee08013fb 100644
--- a/sound/hda/hdac_bus.c
+++ b/sound/hda/hdac_bus.c
@@ -38,6 +38,7 @@ int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev,
 	INIT_WORK(&bus->unsol_work, snd_hdac_bus_process_unsol_events);
 	spin_lock_init(&bus->reg_lock);
 	mutex_init(&bus->cmd_mutex);
+	mutex_init(&bus->lock);
 	bus->irq = -1;
 	return 0;
 }
diff --git a/sound/hda/hdac_component.c b/sound/hda/hdac_component.c
index a6d37b9d6413f..6b5caee61c6e0 100644
--- a/sound/hda/hdac_component.c
+++ b/sound/hda/hdac_component.c
@@ -69,13 +69,15 @@ void snd_hdac_display_power(struct hdac_bus *bus, unsigned int idx, bool enable)
 
 	dev_dbg(bus->dev, "display power %s\n",
 		enable ? "enable" : "disable");
+
+	mutex_lock(&bus->lock);
 	if (enable)
 		set_bit(idx, &bus->display_power_status);
 	else
 		clear_bit(idx, &bus->display_power_status);
 
 	if (!acomp || !acomp->ops)
-		return;
+		goto unlock;
 
 	if (bus->display_power_status) {
 		if (!bus->display_power_active) {
@@ -92,6 +94,8 @@ void snd_hdac_display_power(struct hdac_bus *bus, unsigned int idx, bool enable)
 			bus->display_power_active = false;
 		}
 	}
+ unlock:
+	mutex_unlock(&bus->lock);
 }
 EXPORT_SYMBOL_GPL(snd_hdac_display_power);
 
-- 
GitLab


From eed47d19d9362bdd958e4ab56af480b9dbf6b2b6 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 10 Apr 2019 10:38:33 +0200
Subject: [PATCH 0905/1861] block, bfq: fix use after free in bfq_bfqq_expire

The function bfq_bfqq_expire() invokes the function
__bfq_bfqq_expire(), and the latter may free the in-service bfq-queue.
If this happens, then no other instruction of bfq_bfqq_expire() must
be executed, or a use-after-free will occur.

Basing on the assumption that __bfq_bfqq_expire() invokes
bfq_put_queue() on the in-service bfq-queue exactly once, the queue is
assumed to be freed if its refcounter is equal to one right before
invoking __bfq_bfqq_expire().

But, since commit 9dee8b3b057e ("block, bfq: fix queue removal from
weights tree") this assumption is false. __bfq_bfqq_expire() may also
invoke bfq_weights_tree_remove() and, since commit 9dee8b3b057e
("block, bfq: fix queue removal from weights tree"), also
the latter function may invoke bfq_put_queue(). So __bfq_bfqq_expire()
may invoke bfq_put_queue() twice, and this is the actual case where
the in-service queue may happen to be freed.

To address this issue, this commit moves the check on the refcounter
of the queue right around the last bfq_put_queue() that may be invoked
on the queue.

Fixes: 9dee8b3b057e ("block, bfq: fix queue removal from weights tree")
Reported-by: Dmitrii Tcvetkov <demfloro@demfloro.ru>
Reported-by: Douglas Anderson <dianders@chromium.org>
Tested-by: Dmitrii Tcvetkov <demfloro@demfloro.ru>
Tested-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 15 +++++++--------
 block/bfq-iosched.h |  2 +-
 block/bfq-wf2q.c    | 17 +++++++++++++++--
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index fac188dd78fa7..dfb8cb0af13a8 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2822,7 +2822,7 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
 	bfq_remove_request(q, rq);
 }
 
-static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
 	/*
 	 * If this bfqq is shared between multiple processes, check
@@ -2855,9 +2855,11 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 	/*
 	 * All in-service entities must have been properly deactivated
 	 * or requeued before executing the next function, which
-	 * resets all in-service entites as no more in service.
+	 * resets all in-service entities as no more in service. This
+	 * may cause bfqq to be freed. If this happens, the next
+	 * function returns true.
 	 */
-	__bfq_bfqd_reset_in_service(bfqd);
+	return __bfq_bfqd_reset_in_service(bfqd);
 }
 
 /**
@@ -3262,7 +3264,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
 	bool slow;
 	unsigned long delta = 0;
 	struct bfq_entity *entity = &bfqq->entity;
-	int ref;
 
 	/*
 	 * Check whether the process is slow (see bfq_bfqq_is_slow).
@@ -3347,10 +3348,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
 	 * reason.
 	 */
 	__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
-	ref = bfqq->ref;
-	__bfq_bfqq_expire(bfqd, bfqq);
-
-	if (ref == 1) /* bfqq is gone, no more actions on it */
+	if (__bfq_bfqq_expire(bfqd, bfqq))
+		/* bfqq is gone, no more actions on it */
 		return;
 
 	bfqq->injected_service = 0;
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 062e1c4787f4a..86394e503ca9c 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -995,7 +995,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity,
 			     bool ins_into_idle_tree);
 bool next_queue_may_preempt(struct bfq_data *bfqd);
 struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
-void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
+bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
 void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 			 bool ins_into_idle_tree, bool expiration);
 void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index a11bef75483d4..ae4d000ac0af1 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1605,7 +1605,8 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 	return bfqq;
 }
 
-void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+/* returns true if the in-service queue gets freed */
+bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 {
 	struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
 	struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
@@ -1629,8 +1630,20 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
 	 * service tree either, then release the service reference to
 	 * the queue it represents (taken with bfq_get_entity).
 	 */
-	if (!in_serv_entity->on_st)
+	if (!in_serv_entity->on_st) {
+		/*
+		 * If no process is referencing in_serv_bfqq any
+		 * longer, then the service reference may be the only
+		 * reference to the queue. If this is the case, then
+		 * bfqq gets freed here.
+		 */
+		int ref = in_serv_bfqq->ref;
 		bfq_put_queue(in_serv_bfqq);
+		if (ref == 1)
+			return true;
+	}
+
+	return false;
 }
 
 void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
-- 
GitLab


From bf348f9b78d413e75bb079462751a1d86b6de36c Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Wed, 27 Mar 2019 18:36:34 +0800
Subject: [PATCH 0906/1861] virtio-blk: limit number of hw queues by nr_cpu_ids

When tag_set->nr_maps is 1, the block layer limits the number of hw queues
by nr_cpu_ids. No matter how many hw queues are used by virtio-blk, as it
has (tag_set->nr_maps == 1), it can use at most nr_cpu_ids hw queues.

In addition, specifically for pci scenario, when the 'num-queues' specified
by qemu is more than maxcpus, virtio-blk would not be able to allocate more
than maxcpus vectors in order to have a vector for each queue. As a result,
it falls back into MSI-X with one vector for config and one shared for
queues.

Considering above reasons, this patch limits the number of hw queues used
by virtio-blk by nr_cpu_ids.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/virtio_blk.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4bc083b7c9b54..2a7ca4a1e6f7b 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -513,6 +513,8 @@ static int init_vq(struct virtio_blk *vblk)
 	if (err)
 		num_vqs = 1;
 
+	num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs);
+
 	vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
 	if (!vblk->vqs)
 		return -ENOMEM;
-- 
GitLab


From 1978f30a87732d4d9072a20abeded9fe17884f1b Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Wed, 27 Mar 2019 18:36:35 +0800
Subject: [PATCH 0907/1861] scsi: virtio_scsi: limit number of hw queues by
 nr_cpu_ids

When tag_set->nr_maps is 1, the block layer limits the number of hw queues
by nr_cpu_ids. No matter how many hw queues are used by virtio-scsi, as it
has (tag_set->nr_maps == 1), it can use at most nr_cpu_ids hw queues.

In addition, specifically for pci scenario, when the 'num_queues' specified
by qemu is more than maxcpus, virtio-scsi would not be able to allocate
more than maxcpus vectors in order to have a vector for each queue. As a
result, it falls back into MSI-X with one vector for config and one shared
for queues.

Considering above reasons, this patch limits the number of hw queues used
by virtio-scsi by nr_cpu_ids.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/virtio_scsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 8af01777d09c7..f8cb7c23305b7 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -793,6 +793,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
 
 	/* We need to know how many queues before we allocate. */
 	num_queues = virtscsi_config_get(vdev, num_queues) ? : 1;
+	num_queues = min_t(unsigned int, nr_cpu_ids, num_queues);
 
 	num_targets = virtscsi_config_get(vdev, max_target) + 1;
 
-- 
GitLab


From 5712f3301a12c0c3de9cc423484496b0464f2faf Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 3 Apr 2019 09:13:34 +0200
Subject: [PATCH 0908/1861] s390/3270: fix lockdep false positive on view->lock

The spinlock in the raw3270_view structure is used by con3270, tty3270
and fs3270 in different ways. For con3270 the lock can be acquired in
irq context, for tty3270 and fs3270 the highest context is bh.

Lockdep sees the view->lock as a single class and if the 3270 driver
is used for the console the following message is generated:

WARNING: inconsistent lock state
5.1.0-rc3-05157-g5c168033979d #12 Not tainted
--------------------------------
inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
swapper/0/1 [HC0[0]:SC1[1]:HE1:SE0] takes:
(____ptrval____) (&(&view->lock)->rlock){?.-.}, at: tty3270_update+0x7c/0x330

Introduce a lockdep subclass for the view lock to distinguish bh from
irq locks.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/con3270.c | 2 +-
 drivers/s390/char/fs3270.c  | 3 ++-
 drivers/s390/char/raw3270.c | 3 ++-
 drivers/s390/char/raw3270.h | 4 +++-
 drivers/s390/char/tty3270.c | 3 ++-
 5 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index fd2146bcc0add..e17364e13d2f7 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -629,7 +629,7 @@ con3270_init(void)
 		     (void (*)(unsigned long)) con3270_read_tasklet,
 		     (unsigned long) condev->read);
 
-	raw3270_add_view(&condev->view, &con3270_fn, 1);
+	raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ);
 
 	INIT_LIST_HEAD(&condev->freemem);
 	for (i = 0; i < CON3270_STRING_PAGES; i++) {
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 8f3a2eeb28dca..8b48ba9c598ec 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -463,7 +463,8 @@ fs3270_open(struct inode *inode, struct file *filp)
 
 	init_waitqueue_head(&fp->wait);
 	fp->fs_pid = get_pid(task_pid(current));
-	rc = raw3270_add_view(&fp->view, &fs3270_fn, minor);
+	rc = raw3270_add_view(&fp->view, &fs3270_fn, minor,
+			      RAW3270_VIEW_LOCK_BH);
 	if (rc) {
 		fs3270_free_view(&fp->view);
 		goto out;
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index f8cd2935fbfd4..63a41b1687610 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -920,7 +920,7 @@ raw3270_deactivate_view(struct raw3270_view *view)
  * Add view to device with minor "minor".
  */
 int
-raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor)
+raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor, int subclass)
 {
 	unsigned long flags;
 	struct raw3270 *rp;
@@ -942,6 +942,7 @@ raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor)
 		view->cols = rp->cols;
 		view->ascebc = rp->ascebc;
 		spin_lock_init(&view->lock);
+		lockdep_set_subclass(&view->lock, subclass);
 		list_add(&view->list, &rp->view_list);
 		rc = 0;
 		spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags);
diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h
index 114ca7cbf8897..3afaa35f73513 100644
--- a/drivers/s390/char/raw3270.h
+++ b/drivers/s390/char/raw3270.h
@@ -150,6 +150,8 @@ struct raw3270_fn {
 struct raw3270_view {
 	struct list_head list;
 	spinlock_t lock;
+#define RAW3270_VIEW_LOCK_IRQ	0
+#define RAW3270_VIEW_LOCK_BH	1
 	atomic_t ref_count;
 	struct raw3270 *dev;
 	struct raw3270_fn *fn;
@@ -158,7 +160,7 @@ struct raw3270_view {
 	unsigned char *ascebc;		/* ascii -> ebcdic table */
 };
 
-int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int);
+int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int);
 int raw3270_activate_view(struct raw3270_view *);
 void raw3270_del_view(struct raw3270_view *);
 void raw3270_deactivate_view(struct raw3270_view *);
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index 2b0c36c2c5688..98d7fc152e32f 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -980,7 +980,8 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty)
 		return PTR_ERR(tp);
 
 	rc = raw3270_add_view(&tp->view, &tty3270_fn,
-			      tty->index + RAW3270_FIRSTMINOR);
+			      tty->index + RAW3270_FIRSTMINOR,
+			      RAW3270_VIEW_LOCK_BH);
 	if (rc) {
 		tty3270_free_view(tp);
 		return rc;
-- 
GitLab


From 16222cfb96b02a4a3e38e52012f2a6304850c3c9 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.ibm.com>
Date: Wed, 3 Apr 2019 13:18:22 +0200
Subject: [PATCH 0909/1861] s390/zcrypt: fix possible deadlock situation on ap
 queue remove

With commit 01396a374c3d ("s390/zcrypt: revisit ap device remove
procedure") the ap queue remove is now a two stage process. However,
a del_timer_sync() call may trigger the timer function which may
try to lock the very same spinlock as is held by the function
just initiating the del_timer_sync() call. This could end up in
a deadlock situation. Very unlikely but possible as you need to
remove an ap queue at the exact sime time when a timeout of a
request occurs.

Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Reported-by: Pierre Morel <pmorel@linux.ibm.com>
Fixes: commit 01396a374c3d ("s390/zcrypt: revisit ap device remove procedure")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/ap_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 6a340f2c35569..5ea83dc4f1d74 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -751,8 +751,8 @@ void ap_queue_prepare_remove(struct ap_queue *aq)
 	__ap_flush_queue(aq);
 	/* set REMOVE state to prevent new messages are queued in */
 	aq->state = AP_STATE_REMOVE;
-	del_timer_sync(&aq->timeout);
 	spin_unlock_bh(&aq->lock);
+	del_timer_sync(&aq->timeout);
 }
 
 void ap_queue_remove(struct ap_queue *aq)
-- 
GitLab


From bf9921a9c15bad089c08b94c300a6cafa035a612 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 1 Apr 2019 19:10:45 +0200
Subject: [PATCH 0910/1861] s390: introduce .boot.preserved.data section

Introduce .boot.preserve.data section which is similar to .boot.data and
"shared" between the decompressor code and the decompressed kernel. The
decompressor will store values in it, and copy over to the decompressed
image before starting it. This method allows to avoid using pre-defined
addresses and other hacks to pass values between those boot phases.

Unlike .boot.data section .boot.preserved.data is NOT a part of init data,
and hence will be preserved for the kernel life time.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/compressed/decompressor.h |  2 ++
 arch/s390/boot/compressed/vmlinux.lds.S  |  1 +
 arch/s390/boot/startup.c                 |  4 ++++
 arch/s390/include/asm/sections.h         |  7 +++++++
 arch/s390/include/asm/vmlinux.lds.h      | 13 +++++++++++++
 arch/s390/kernel/vmlinux.lds.S           |  4 ++++
 6 files changed, 31 insertions(+)

diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h
index e1c1f2ec60f4a..424cf524aac16 100644
--- a/arch/s390/boot/compressed/decompressor.h
+++ b/arch/s390/boot/compressed/decompressor.h
@@ -17,6 +17,8 @@ struct vmlinux_info {
 	unsigned long bss_size;		/* uncompressed image .bss size */
 	unsigned long bootdata_off;
 	unsigned long bootdata_size;
+	unsigned long bootdata_preserved_off;
+	unsigned long bootdata_preserved_size;
 };
 
 extern char _vmlinux_info[];
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 7efc3938f5955..1c8ef393c6569 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -34,6 +34,7 @@ SECTIONS
 		_edata = . ;
 	}
 	BOOT_DATA
+	BOOT_DATA_PRESERVED
 
 	/*
 	 * uncompressed image info used by the decompressor it should match
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index bdfc5549a2998..57d7f9446e29e 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -6,6 +6,7 @@
 #include "boot.h"
 
 extern char __boot_data_start[], __boot_data_end[];
+extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
 
 void error(char *x)
 {
@@ -43,6 +44,9 @@ static void copy_bootdata(void)
 	if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size)
 		error(".boot.data section size mismatch");
 	memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size);
+	if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size)
+		error(".boot.preserved.data section size mismatch");
+	memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
 }
 
 void startup_kernel(void)
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 7afe4620685c9..29e55739b516a 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -16,4 +16,11 @@
  */
 #define __bootdata(var) __section(.boot.data.var) var
 
+/*
+ * .boot.preserved.data is similar to .boot.data, but it is not part of the
+ * .init section and thus will be preserved for later use in the decompressed
+ * kernel.
+ */
+#define __bootdata_preserved(var) __section(.boot.preserved.data.var) var
+
 #endif
diff --git a/arch/s390/include/asm/vmlinux.lds.h b/arch/s390/include/asm/vmlinux.lds.h
index 2d127f900352d..cbe670a6861b0 100644
--- a/arch/s390/include/asm/vmlinux.lds.h
+++ b/arch/s390/include/asm/vmlinux.lds.h
@@ -18,3 +18,16 @@
 		*(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.data*)))		\
 		__boot_data_end = .;					\
 	}
+
+/*
+ * .boot.preserved.data is similar to .boot.data, but it is not part of the
+ * .init section and thus will be preserved for later use in the decompressed
+ * kernel.
+ */
+#define BOOT_DATA_PRESERVED						\
+	. = ALIGN(PAGE_SIZE);						\
+	.boot.preserved.data : {					\
+		__boot_data_preserved_start = .;			\
+		*(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.preserved.data*))) \
+		__boot_data_preserved_end = .;				\
+	}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 8429ab0797157..6ef9c62bb01b6 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -72,6 +72,7 @@ SECTIONS
 	__end_ro_after_init = .;
 
 	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+	BOOT_DATA_PRESERVED
 
 	_edata = .;		/* End of data section */
 
@@ -161,6 +162,9 @@ SECTIONS
 		QUAD(__bss_stop - __bss_start)			/* bss_size */
 		QUAD(__boot_data_start)				/* bootdata_off */
 		QUAD(__boot_data_end - __boot_data_start)	/* bootdata_size */
+		QUAD(__boot_data_preserved_start)		/* bootdata_preserved_off */
+		QUAD(__boot_data_preserved_end -
+		     __boot_data_preserved_start)		/* bootdata_preserved_size */
 	} :NONE
 
 	/* Debugging sections.	*/
-- 
GitLab


From 1e941d39493f1820475d80729a03cd7ab8c3c86d Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 1 Apr 2019 19:10:51 +0200
Subject: [PATCH 0911/1861] s390: move ipl block to .boot.preserved.data
 section

.boot.preserved.data is a better fit for ipl block than .boot.data
which is discarded after init. Reusing .boot.preserved.data allows to
simplify code a little bit and avoid copying data from .boot.data to
persistent variables.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/ipl_parm.c         | 24 ++++++++++++------------
 arch/s390/include/asm/boot_data.h |  4 ++--
 arch/s390/include/asm/ipl.h       |  1 -
 arch/s390/kernel/early.c          |  1 -
 arch/s390/kernel/ipl.c            | 15 ++-------------
 5 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 36beb56de0217..dc0438d08751f 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -10,8 +10,8 @@
 #include "boot.h"
 
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
-struct ipl_parameter_block __bootdata(early_ipl_block);
-int __bootdata(early_ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_block_valid);
 
 unsigned long __bootdata(memory_end);
 int __bootdata(memory_end_set);
@@ -45,10 +45,10 @@ void store_ipl_parmblock(void)
 {
 	int rc;
 
-	rc = __diag308(DIAG308_STORE, &early_ipl_block);
+	rc = __diag308(DIAG308_STORE, &ipl_block);
 	if (rc == DIAG308_RC_OK &&
-	    early_ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
-		early_ipl_block_valid = 1;
+	    ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+		ipl_block_valid = 1;
 }
 
 static size_t scpdata_length(const char *buf, size_t count)
@@ -103,14 +103,14 @@ static void append_ipl_block_parm(void)
 	delim = early_command_line + len;    /* '\0' character position */
 	parm = early_command_line + len + 1; /* append right after '\0' */
 
-	switch (early_ipl_block.hdr.pbt) {
+	switch (ipl_block.hdr.pbt) {
 	case DIAG308_IPL_TYPE_CCW:
 		rc = ipl_block_get_ascii_vmparm(
-			parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block);
+			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
 		break;
 	case DIAG308_IPL_TYPE_FCP:
 		rc = ipl_block_get_ascii_scpdata(
-			parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block);
+			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
 		break;
 	}
 	if (rc) {
@@ -141,7 +141,7 @@ void setup_boot_command_line(void)
 	strcpy(early_command_line, strim(COMMAND_LINE));
 
 	/* append IPL PARM data to the boot command line */
-	if (early_ipl_block_valid)
+	if (ipl_block_valid)
 		append_ipl_block_parm();
 }
 
@@ -234,9 +234,9 @@ void parse_boot_command_line(void)
 void setup_memory_end(void)
 {
 #ifdef CONFIG_CRASH_DUMP
-	if (!OLDMEM_BASE && early_ipl_block_valid &&
-	    early_ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP &&
-	    early_ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) {
+	if (!OLDMEM_BASE && ipl_block_valid &&
+	    ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP &&
+	    ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) {
 		if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
 			memory_end_set = 1;
 	}
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index 2d999ccb977a7..d794802a22917 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -5,7 +5,7 @@
 #include <asm/ipl.h>
 
 extern char early_command_line[COMMAND_LINE_SIZE];
-extern struct ipl_parameter_block early_ipl_block;
-extern int early_ipl_block_valid;
+extern struct ipl_parameter_block ipl_block;
+extern int ipl_block_valid;
 
 #endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index a8389e2d2f034..6403cc9952bff 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -88,7 +88,6 @@ void __init save_area_add_regs(struct save_area *, void *regs);
 void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
 
 extern void s390_reset_system(void);
-extern void ipl_store_parameters(void);
 extern size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
 					 const struct ipl_parameter_block *ipb);
 
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index d6edf45f93b9a..c196abda36c77 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -309,7 +309,6 @@ void __init startup_init(void)
 	setup_facility_list();
 	detect_machine_type();
 	setup_arch_string();
-	ipl_store_parameters();
 	setup_boot_command_line();
 	detect_diag9c();
 	detect_diag44();
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 18a5d6317accd..d2e8dc60f2cf0 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -119,11 +119,8 @@ static char *dump_type_str(enum dump_type type)
 	}
 }
 
-struct ipl_parameter_block __bootdata(early_ipl_block);
-int __bootdata(early_ipl_block_valid);
-
-static int ipl_block_valid;
-static struct ipl_parameter_block ipl_block;
+int __bootdata_preserved(ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
 
 static int reipl_capabilities = IPL_TYPE_UNKNOWN;
 
@@ -1675,14 +1672,6 @@ void __init setup_ipl(void)
 	atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
 }
 
-void __init ipl_store_parameters(void)
-{
-	if (early_ipl_block_valid) {
-		memcpy(&ipl_block, &early_ipl_block, sizeof(ipl_block));
-		ipl_block_valid = 1;
-	}
-}
-
 void s390_reset_system(void)
 {
 	/* Disable prefixing */
-- 
GitLab


From fd184e1a8b650159ca553c53dd0fee75df799b4a Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 1 Apr 2019 19:10:56 +0200
Subject: [PATCH 0912/1861] s390: introduce .boot.preserved.data section
 compile time validation

Same as for .boot.data section make sure that .boot.preserved.data
sections of vmlinux and arch/s390/compressed/vmlinux match before
producing the compressed kernel image. Symbols presence, order and sizes
are cross-checked.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index c844eaf24ed73..458d18c44c67d 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -30,7 +30,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
 
 obj-y	:= head.o als.o startup.o mem_detect.o ipl_parm.o string.o ebcdic.o
 obj-y	+= sclp_early_core.o mem.o ipl_vmparm.o cmdline.o ctype.o
-targets	:= bzImage startup.a section_cmp.boot.data $(obj-y)
+targets	:= bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
 subdir-	:= compressed
 
 OBJECTS := $(addprefix $(obj)/,$(obj-y))
@@ -48,7 +48,7 @@ define cmd_section_cmp
 	touch $@
 endef
 
-$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data FORCE
+$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
 	$(call if_changed,objcopy)
 
 $(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE
-- 
GitLab


From 5abb9351dfd937d43193f4d09af9c72bfe2c4180 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 1 Apr 2019 19:11:03 +0200
Subject: [PATCH 0913/1861] s390/uv: introduce guest side ultravisor code

The Ultravisor Call Facility (stfle bit 158) defines an API to the
Ultravisor (UV calls), a mini hypervisor located at machine
level. With help of the Ultravisor, KVM will be able to run
"protected" VMs, special VMs whose memory and management data are
unavailable to KVM.

The protected VMs can also request services from the Ultravisor.
The guest api consists of UV calls to share and unshare memory with the
kvm hypervisor.

To enable this feature support PROTECTED_VIRTUALIZATION_GUEST kconfig
option has been introduced.

Co-developed-by: Janosch Frank <frankja@de.ibm.com>
Signed-off-by: Janosch Frank <frankja@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig          |  11 ++++
 arch/s390/boot/Makefile    |   1 +
 arch/s390/boot/startup.c   |   2 +
 arch/s390/boot/uv.c        |  24 +++++++
 arch/s390/include/asm/uv.h | 132 +++++++++++++++++++++++++++++++++++++
 arch/s390/kernel/setup.c   |   5 ++
 6 files changed, 175 insertions(+)
 create mode 100644 arch/s390/boot/uv.c
 create mode 100644 arch/s390/include/asm/uv.h

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b6e3d0653002a..4ac8d49e206a8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -837,6 +837,17 @@ config HAVE_PNETID
 
 menu "Virtualization"
 
+config PROTECTED_VIRTUALIZATION_GUEST
+	def_bool n
+	prompt "Protected virtualization guest support"
+	help
+	  Select this option, if you want to be able to run this
+	  kernel as a protected virtualization KVM guest.
+	  Protected virtualization capable machines have a mini hypervisor
+	  located at machine level (an ultravisor). With help of the
+	  Ultravisor, KVM will be able to run "protected" VMs, special
+	  VMs whose memory and management data are unavailable to KVM.
+
 config PFAULT
 	def_bool y
 	prompt "Pseudo page fault support"
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 458d18c44c67d..a5ae68b2aa844 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -30,6 +30,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
 
 obj-y	:= head.o als.o startup.o mem_detect.o ipl_parm.o string.o ebcdic.o
 obj-y	+= sclp_early_core.o mem.o ipl_vmparm.o cmdline.o ctype.o
+obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST)	+= uv.o
 targets	:= bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
 subdir-	:= compressed
 
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 57d7f9446e29e..2bd4a62d436c1 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -2,6 +2,7 @@
 #include <linux/string.h>
 #include <asm/setup.h>
 #include <asm/sclp.h>
+#include <asm/uv.h>
 #include "compressed/decompressor.h"
 #include "boot.h"
 
@@ -53,6 +54,7 @@ void startup_kernel(void)
 {
 	void *img;
 
+	uv_query_info();
 	rescue_initrd();
 	sclp_early_read_info();
 	store_ipl_parmblock();
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
new file mode 100644
index 0000000000000..ed007f4a6444f
--- /dev/null
+++ b/arch/s390/boot/uv.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/uv.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+
+int __bootdata_preserved(prot_virt_guest);
+
+void uv_query_info(void)
+{
+	struct uv_cb_qui uvcb = {
+		.header.cmd = UVC_CMD_QUI,
+		.header.len = sizeof(uvcb)
+	};
+
+	if (!test_facility(158))
+		return;
+
+	if (uv_call(0, (uint64_t)&uvcb))
+		return;
+
+	if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
+	    test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
+		prot_virt_guest = 1;
+}
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
new file mode 100644
index 0000000000000..ef3c00b049ab4
--- /dev/null
+++ b/arch/s390/include/asm/uv.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor Interfaces
+ *
+ * Copyright IBM Corp. 2019
+ *
+ * Author(s):
+ *	Vasily Gorbik <gor@linux.ibm.com>
+ *	Janosch Frank <frankja@linux.ibm.com>
+ */
+#ifndef _ASM_S390_UV_H
+#define _ASM_S390_UV_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <asm/page.h>
+
+#define UVC_RC_EXECUTED		0x0001
+#define UVC_RC_INV_CMD		0x0002
+#define UVC_RC_INV_STATE	0x0003
+#define UVC_RC_INV_LEN		0x0005
+#define UVC_RC_NO_RESUME	0x0007
+
+#define UVC_CMD_QUI			0x0001
+#define UVC_CMD_SET_SHARED_ACCESS	0x1000
+#define UVC_CMD_REMOVE_SHARED_ACCESS	0x1001
+
+/* Bits in installed uv calls */
+enum uv_cmds_inst {
+	BIT_UVC_CMD_QUI = 0,
+	BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
+	BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+};
+
+struct uv_cb_header {
+	u16 len;
+	u16 cmd;	/* Command Code */
+	u16 rc;		/* Response Code */
+	u16 rrc;	/* Return Reason Code */
+} __packed __aligned(8);
+
+struct uv_cb_qui {
+	struct uv_cb_header header;
+	u64 reserved08;
+	u64 inst_calls_list[4];
+	u64 reserved30[15];
+} __packed __aligned(8);
+
+struct uv_cb_share {
+	struct uv_cb_header header;
+	u64 reserved08[3];
+	u64 paddr;
+	u64 reserved28;
+} __packed __aligned(8);
+
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+	int cc;
+
+	asm volatile(
+		"0:	.insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+		"		brc	3,0b\n"
+		"		ipm	%[cc]\n"
+		"		srl	%[cc],28\n"
+		: [cc] "=d" (cc)
+		: [r1] "a" (r1), [r2] "a" (r2)
+		: "memory", "cc");
+	return cc;
+}
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+extern int prot_virt_guest;
+
+static inline int is_prot_virt_guest(void)
+{
+	return prot_virt_guest;
+}
+
+static inline int share(unsigned long addr, u16 cmd)
+{
+	struct uv_cb_share uvcb = {
+		.header.cmd = cmd,
+		.header.len = sizeof(uvcb),
+		.paddr = addr
+	};
+
+	if (!is_prot_virt_guest())
+		return -ENOTSUPP;
+	/*
+	 * Sharing is page wise, if we encounter addresses that are
+	 * not page aligned, we assume something went wrong. If
+	 * malloced structs are passed to this function, we could leak
+	 * data to the hypervisor.
+	 */
+	BUG_ON(addr & ~PAGE_MASK);
+
+	if (!uv_call(0, (u64)&uvcb))
+		return 0;
+	return -EINVAL;
+}
+
+/*
+ * Guest 2 request to the Ultravisor to make a page shared with the
+ * hypervisor for IO.
+ *
+ * @addr: Real or absolute address of the page to be shared
+ */
+static inline int uv_set_shared(unsigned long addr)
+{
+	return share(addr, UVC_CMD_SET_SHARED_ACCESS);
+}
+
+/*
+ * Guest 2 request to the Ultravisor to make a page unshared.
+ *
+ * @addr: Real or absolute address of the page to be unshared
+ */
+static inline int uv_remove_shared(unsigned long addr)
+{
+	return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
+}
+
+void uv_query_info(void);
+#else
+#define is_prot_virt_guest() 0
+static inline int uv_set_shared(unsigned long addr) { return 0; }
+static inline int uv_remove_shared(unsigned long addr) { return 0; }
+static inline void uv_query_info(void) {}
+#endif
+
+#endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2c642af526ce8..70197a68e6fab 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -70,6 +70,7 @@
 #include <asm/alternative.h>
 #include <asm/nospec-branch.h>
 #include <asm/mem_detect.h>
+#include <asm/uv.h>
 #include "entry.h"
 
 /*
@@ -89,6 +90,10 @@ char elf_platform[ELF_PLATFORM_SIZE];
 
 unsigned long int_hwcap = 0;
 
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
 int __bootdata(noexec_disabled);
 int __bootdata(memory_end_set);
 unsigned long __bootdata(memory_end);
-- 
GitLab


From db9492cef45efc347beed7b617dfdfac399f662b Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 1 Apr 2019 19:11:04 +0200
Subject: [PATCH 0914/1861] s390/protvirt: add memory sharing for diag 308
 set/store

Add sharing of ipl parameter block for diag 308 set/store calls to allow
kvm access in protected virtualization environment.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/ipl_parm.c | 3 +++
 arch/s390/kernel/ipl.c    | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index dc0438d08751f..385d0c7a96352 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -7,6 +7,7 @@
 #include <asm/sections.h>
 #include <asm/boot_data.h>
 #include <asm/facility.h>
+#include <asm/uv.h>
 #include "boot.h"
 
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
@@ -45,7 +46,9 @@ void store_ipl_parmblock(void)
 {
 	int rc;
 
+	uv_set_shared(__pa(&ipl_block));
 	rc = __diag308(DIAG308_STORE, &ipl_block);
+	uv_remove_shared(__pa(&ipl_block));
 	if (rc == DIAG308_RC_OK &&
 	    ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
 		ipl_block_valid = 1;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index d2e8dc60f2cf0..cffe3374d201d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -31,6 +31,7 @@
 #include <asm/os_info.h>
 #include <asm/sections.h>
 #include <asm/boot_data.h>
+#include <asm/uv.h>
 #include "entry.h"
 
 #define IPL_PARM_BLOCK_VERSION 0
@@ -863,15 +864,21 @@ static void __reipl_run(void *unused)
 {
 	switch (reipl_type) {
 	case IPL_TYPE_CCW:
+		uv_set_shared(__pa(reipl_block_ccw));
 		diag308(DIAG308_SET, reipl_block_ccw);
+		uv_remove_shared(__pa(reipl_block_ccw));
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case IPL_TYPE_FCP:
+		uv_set_shared(__pa(reipl_block_fcp));
 		diag308(DIAG308_SET, reipl_block_fcp);
+		uv_remove_shared(__pa(reipl_block_fcp));
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case IPL_TYPE_NSS:
+		uv_set_shared(__pa(reipl_block_nss));
 		diag308(DIAG308_SET, reipl_block_nss);
+		uv_remove_shared(__pa(reipl_block_nss));
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case IPL_TYPE_UNKNOWN:
@@ -1142,7 +1149,9 @@ static struct kset *dump_kset;
 
 static void diag308_dump(void *dump_block)
 {
+	uv_set_shared(__pa(dump_block));
 	diag308(DIAG308_SET, dump_block);
+	uv_remove_shared(__pa(dump_block));
 	while (1) {
 		if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
 			break;
-- 
GitLab


From 093ddccb55157f909f203f9e50bce0c24431e791 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 1 Apr 2019 19:11:08 +0200
Subject: [PATCH 0915/1861] s390/protvirt: block kernel command line alteration

Disallow kernel command line alteration via ipl parameter block if
running in protected virtualization environment.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/ipl_parm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 385d0c7a96352..1900670a83fdc 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -144,7 +144,7 @@ void setup_boot_command_line(void)
 	strcpy(early_command_line, strim(COMMAND_LINE));
 
 	/* append IPL PARM data to the boot command line */
-	if (ipl_block_valid)
+	if (!is_prot_virt_guest() && ipl_block_valid)
 		append_ipl_block_parm();
 }
 
-- 
GitLab


From 34298422cc442d65e0ee271e47f2926f63fe91e8 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 28 Mar 2019 13:48:44 +0100
Subject: [PATCH 0916/1861] s390/qdio: fix output of DSCI value in debug file

The DSCI is a 1-byte field, placed at the start of an u32. So when
printing it to a queue's debug state, limit the output to the part
that's actually occupied by the DSCI.
When the DSCI is set this gives us the expected output of '1', rather
than the current (obscure) value of '16777216'.

Suggested-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index d2f98e5829d49..4d77d87538c9f 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -128,8 +128,8 @@ static int qstat_show(struct seq_file *m, void *v)
 		seq_printf(m, "polling: %d  ack start: %d  ack count: %d\n",
 			   q->u.in.polling, q->u.in.ack_start,
 			   q->u.in.ack_count);
-		seq_printf(m, "DSCI: %d   IRQs disabled: %u\n",
-			   *(u32 *)q->irq_ptr->dsci,
+		seq_printf(m, "DSCI: %x   IRQs disabled: %u\n",
+			   *(u8 *)q->irq_ptr->dsci,
 			   test_bit(QDIO_QUEUE_IRQS_DISABLED,
 			   &q->u.in.queue_irq_state));
 	}
-- 
GitLab


From b39544c6e02fdf86c394a9367b10e707b800f4d7 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 28 Mar 2019 10:39:25 +0100
Subject: [PATCH 0917/1861] s390/qdio: pass up count of ready-to-process SBALs

When qdio_{in,out}bound_q_moved() scans a queue for pending work, it
currently only returns a boolean to its caller. The interface to the
upper-layer-drivers (qdio_kick_handler() and qdio_get_next_buffers())
then re-calculates the number of pending SBALs from the
q->first_to_check and q->first_to_kick cursors.

Refactor this so that whenever get_{in,out}bound_buffer_frontier()
adjusted the queue's first_to_check cursor, it also returns the
corresponding count of ready-to-process SBALs (and 0 else).
A subsequent patch will then make use of this additional information.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_main.c | 77 ++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 35 deletions(-)

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index c25a40ecc1056..195f35256cc56 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -504,7 +504,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 	 */
 	count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK);
 	if (!count)
-		goto out;
+		return 0;
 
 	/*
 	 * No siga sync here, as a PCI or we after a thin interrupt
@@ -512,7 +512,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 	 */
 	count = get_buf_states(q, q->first_to_check, &state, count, 1, 0);
 	if (!count)
-		goto out;
+		return 0;
 
 	switch (state) {
 	case SLSB_P_INPUT_PRIMED:
@@ -522,7 +522,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 			qperf_inc(q, inbound_queue_full);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals(q, count);
-		break;
+		return count;
 	case SLSB_P_INPUT_ERROR:
 		process_buffer_error(q, count);
 		q->first_to_check = add_buf(q->first_to_check, count);
@@ -530,7 +530,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 			qperf_inc(q, inbound_queue_full);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals_error(q, count);
-		break;
+		return count;
 	case SLSB_CU_INPUT_EMPTY:
 	case SLSB_P_INPUT_NOT_INIT:
 	case SLSB_P_INPUT_ACK:
@@ -538,27 +538,26 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 			q->q_stats.nr_sbal_nop++;
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in nop:%1d %#02x",
 			q->nr, q->first_to_check);
-		break;
+		return 0;
 	default:
 		WARN_ON_ONCE(1);
+		return 0;
 	}
-out:
-	return q->first_to_check;
 }
 
 static int qdio_inbound_q_moved(struct qdio_q *q)
 {
-	int bufnr;
+	int count;
 
-	bufnr = get_inbound_buffer_frontier(q);
+	count = get_inbound_buffer_frontier(q);
 
-	if (bufnr != q->last_move) {
-		q->last_move = bufnr;
+	if (count) {
+		q->last_move = q->first_to_check;
 		if (!is_thinint_irq(q->irq_ptr) && MACHINE_IS_LPAR)
 			q->u.in.timestamp = get_tod_clock();
-		return 1;
-	} else
-		return 0;
+	}
+
+	return count;
 }
 
 static inline int qdio_inbound_q_done(struct qdio_q *q)
@@ -678,9 +677,12 @@ static inline int qdio_tasklet_schedule(struct qdio_q *q)
 
 static void __qdio_inbound_processing(struct qdio_q *q)
 {
+	int count;
+
 	qperf_inc(q, tasklet_inbound);
 
-	if (!qdio_inbound_q_moved(q))
+	count = qdio_inbound_q_moved(q);
+	if (count == 0)
 		return;
 
 	qdio_kick_handler(q);
@@ -729,12 +731,12 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 	 */
 	count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK);
 	if (!count)
-		goto out;
+		return 0;
 
 	count = get_buf_states(q, q->first_to_check, &state, count, 0,
 			       q->u.out.use_cq);
 	if (!count)
-		goto out;
+		return 0;
 
 	switch (state) {
 	case SLSB_P_OUTPUT_EMPTY:
@@ -746,31 +748,28 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 		q->first_to_check = add_buf(q->first_to_check, count);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals(q, count);
-
-		break;
+		return count;
 	case SLSB_P_OUTPUT_ERROR:
 		process_buffer_error(q, count);
 		q->first_to_check = add_buf(q->first_to_check, count);
 		atomic_sub(count, &q->nr_buf_used);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals_error(q, count);
-		break;
+		return count;
 	case SLSB_CU_OUTPUT_PRIMED:
 		/* the adapter has not fetched the output yet */
 		if (q->irq_ptr->perf_stat_enabled)
 			q->q_stats.nr_sbal_nop++;
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out primed:%1d",
 			      q->nr);
-		break;
+		return 0;
 	case SLSB_P_OUTPUT_NOT_INIT:
 	case SLSB_P_OUTPUT_HALTED:
-		break;
+		return 0;
 	default:
 		WARN_ON_ONCE(1);
+		return 0;
 	}
-
-out:
-	return q->first_to_check;
 }
 
 /* all buffers processed? */
@@ -781,16 +780,16 @@ static inline int qdio_outbound_q_done(struct qdio_q *q)
 
 static inline int qdio_outbound_q_moved(struct qdio_q *q)
 {
-	int bufnr;
+	int count;
 
-	bufnr = get_outbound_buffer_frontier(q);
+	count = get_outbound_buffer_frontier(q);
 
-	if (bufnr != q->last_move) {
-		q->last_move = bufnr;
+	if (count) {
+		q->last_move = q->first_to_check;
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out moved:%1d", q->nr);
-		return 1;
-	} else
-		return 0;
+	}
+
+	return count;
 }
 
 static int qdio_kick_outbound_q(struct qdio_q *q, unsigned long aob)
@@ -837,10 +836,13 @@ retry:
 
 static void __qdio_outbound_processing(struct qdio_q *q)
 {
+	int count;
+
 	qperf_inc(q, tasklet_outbound);
 	WARN_ON_ONCE(atomic_read(&q->nr_buf_used) < 0);
 
-	if (qdio_outbound_q_moved(q))
+	count = qdio_outbound_q_moved(q);
+	if (count)
 		qdio_kick_handler(q);
 
 	if (queue_type(q) == QDIO_ZFCP_QFMT && !pci_out_supported(q->irq_ptr) &&
@@ -896,6 +898,8 @@ static inline void qdio_check_outbound_pci_queues(struct qdio_irq *irq)
 
 static void __tiqdio_inbound_processing(struct qdio_q *q)
 {
+	int count;
+
 	qperf_inc(q, tasklet_inbound);
 	if (need_siga_sync(q) && need_siga_sync_after_ai(q))
 		qdio_sync_queues(q);
@@ -903,7 +907,8 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
 	/* The interrupt could be caused by a PCI request: */
 	qdio_check_outbound_pci_queues(q->irq_ptr);
 
-	if (!qdio_inbound_q_moved(q))
+	count = qdio_inbound_q_moved(q);
+	if (count == 0)
 		return;
 
 	qdio_kick_handler(q);
@@ -1671,6 +1676,7 @@ int qdio_get_next_buffers(struct ccw_device *cdev, int nr, int *bufnr,
 	struct qdio_q *q;
 	int start, end;
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+	int count;
 
 	if (!irq_ptr)
 		return -ENODEV;
@@ -1685,7 +1691,8 @@ int qdio_get_next_buffers(struct ccw_device *cdev, int nr, int *bufnr,
 
 	qdio_check_outbound_pci_queues(irq_ptr);
 
-	if (!qdio_inbound_q_moved(q))
+	count = qdio_inbound_q_moved(q);
+	if (count == 0)
 		return 0;
 
 	/* Note: upper-layer MUST stop processing immediately here ... */
-- 
GitLab


From 65e4f776385ac5cab021ad8d992e851375305906 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 28 Mar 2019 10:43:46 +0100
Subject: [PATCH 0918/1861] s390/qdio: simplify SBAL range calculation

When passing a range of ready-to-process SBALs to the upper-layer
driver, use the available 'count' instead of calculating the distance
between the first_to_check and first_to_kick cursors.

This simplifies the logic of the queue-scan path, and opens up the
possibility of scanning all 128 SBALs in one go (as determining the
reported count no longer requires wrap-around safe arithmetic on the
queue's cursors).

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_main.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 195f35256cc56..b6bc02efa0d02 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -636,17 +636,13 @@ static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q,
 	return phys_aob;
 }
 
-static void qdio_kick_handler(struct qdio_q *q)
+static void qdio_kick_handler(struct qdio_q *q, unsigned int count)
 {
 	int start = q->first_to_kick;
-	int end = q->first_to_check;
-	int count;
 
 	if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
 		return;
 
-	count = sub_buf(end, start);
-
 	if (q->is_input_q) {
 		qperf_inc(q, inbound_handler);
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "kih s:%02x c:%02x", start, count);
@@ -662,7 +658,7 @@ static void qdio_kick_handler(struct qdio_q *q)
 		   q->irq_ptr->int_parm);
 
 	/* for the next time */
-	q->first_to_kick = end;
+	q->first_to_kick = add_buf(start, count);
 	q->qdio_error = 0;
 }
 
@@ -685,7 +681,7 @@ static void __qdio_inbound_processing(struct qdio_q *q)
 	if (count == 0)
 		return;
 
-	qdio_kick_handler(q);
+	qdio_kick_handler(q, count);
 
 	if (!qdio_inbound_q_done(q)) {
 		/* means poll time is not yet over */
@@ -843,7 +839,7 @@ static void __qdio_outbound_processing(struct qdio_q *q)
 
 	count = qdio_outbound_q_moved(q);
 	if (count)
-		qdio_kick_handler(q);
+		qdio_kick_handler(q, count);
 
 	if (queue_type(q) == QDIO_ZFCP_QFMT && !pci_out_supported(q->irq_ptr) &&
 	    !qdio_outbound_q_done(q))
@@ -911,7 +907,7 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
 	if (count == 0)
 		return;
 
-	qdio_kick_handler(q);
+	qdio_kick_handler(q, count);
 
 	if (!qdio_inbound_q_done(q)) {
 		qperf_inc(q, tasklet_inbound_resched);
@@ -1674,7 +1670,6 @@ int qdio_get_next_buffers(struct ccw_device *cdev, int nr, int *bufnr,
 			  int *error)
 {
 	struct qdio_q *q;
-	int start, end;
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
 	int count;
 
@@ -1699,15 +1694,14 @@ int qdio_get_next_buffers(struct ccw_device *cdev, int nr, int *bufnr,
 	if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
 		return -EIO;
 
-	start = q->first_to_kick;
-	end = q->first_to_check;
-	*bufnr = start;
+	*bufnr = q->first_to_kick;
 	*error = q->qdio_error;
 
 	/* for the next time */
-	q->first_to_kick = end;
+	q->first_to_kick = add_buf(q->first_to_kick, count);
 	q->qdio_error = 0;
-	return sub_buf(end, start);
+
+	return count;
 }
 EXPORT_SYMBOL(qdio_get_next_buffers);
 
-- 
GitLab


From dccbbaff174df174bb30c21e05d7b732d013ea1a Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 28 Mar 2019 10:45:11 +0100
Subject: [PATCH 0919/1861] s390/qdio: eliminate queue's last_move cursor

This cursor is used for debugging only. But since
commit "s390/qdio: pass up count of ready-to-process SBALs" it effectively
duplicates the first_to_check cursor, diverging for just a short moment
when get_*_buffer_frontier() updates q->first_to_check.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio.h       |  3 ---
 drivers/s390/cio/qdio_debug.c |  5 ++---
 drivers/s390/cio/qdio_main.c  | 11 +++--------
 3 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index 2c29141005ca3..a069443998652 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -228,9 +228,6 @@ struct qdio_q {
 	 */
 	int first_to_check;
 
-	/* first_to_check of the last time */
-	int last_move;
-
 	/* beginning position for calling the program */
 	int first_to_kick;
 
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index 4d77d87538c9f..35410e6eda2ea 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -121,9 +121,8 @@ static int qstat_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "Timestamp: %Lx  Last AI: %Lx\n",
 		   q->timestamp, last_ai_time);
-	seq_printf(m, "nr_used: %d  ftc: %d  last_move: %d\n",
-		   atomic_read(&q->nr_buf_used),
-		   q->first_to_check, q->last_move);
+	seq_printf(m, "nr_used: %d  ftc: %d\n",
+		   atomic_read(&q->nr_buf_used), q->first_to_check);
 	if (q->is_input_q) {
 		seq_printf(m, "polling: %d  ack start: %d  ack count: %d\n",
 			   q->u.in.polling, q->u.in.ack_start,
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index b6bc02efa0d02..0ccd3b30af78e 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -551,11 +551,8 @@ static int qdio_inbound_q_moved(struct qdio_q *q)
 
 	count = get_inbound_buffer_frontier(q);
 
-	if (count) {
-		q->last_move = q->first_to_check;
-		if (!is_thinint_irq(q->irq_ptr) && MACHINE_IS_LPAR)
-			q->u.in.timestamp = get_tod_clock();
-	}
+	if (count && !is_thinint_irq(q->irq_ptr) && MACHINE_IS_LPAR)
+		q->u.in.timestamp = get_tod_clock();
 
 	return count;
 }
@@ -780,10 +777,8 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q)
 
 	count = get_outbound_buffer_frontier(q);
 
-	if (count) {
-		q->last_move = q->first_to_check;
+	if (count)
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out moved:%1d", q->nr);
-	}
 
 	return count;
 }
-- 
GitLab


From 81a8f2beb32a5951ecf04385301f50879abc092b Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Sun, 7 Apr 2019 14:55:09 +0200
Subject: [PATCH 0920/1861] s390/mm: silence compiler warning when compiling
 without CONFIG_PGSTE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_PGSTE is not set (e.g. when compiling without KVM), GCC complains:

  CC      arch/s390/mm/pgtable.o
arch/s390/mm/pgtable.c:413:15: warning: ‘pmd_alloc_map’ defined but not
 used [-Wunused-function]
 static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
               ^~~~~~~~~~~~~

Wrap the function with "#ifdef CONFIG_PGSTE" to silence the warning.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgtable.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 8485d6dc27549..9ebd01219812c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -410,6 +410,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 	return old;
 }
 
+#ifdef CONFIG_PGSTE
 static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
@@ -427,6 +428,7 @@ static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
 	pmd = pmd_alloc(mm, pud, addr);
 	return pmd;
 }
+#endif
 
 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 		       pmd_t *pmdp, pmd_t new)
-- 
GitLab


From e91012ee855ad9f5ef2ab106a3de51db93fe4d0c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Apr 2019 23:26:20 +0200
Subject: [PATCH 0921/1861] s390: cio: fix cio_irb declaration

clang points out that the declaration of cio_irb does not match the
definition exactly, it is missing the alignment attribute:

../drivers/s390/cio/cio.c:50:1: warning: section does not match previous declaration [-Wsection]
DEFINE_PER_CPU_ALIGNED(struct irb, cio_irb);
^
../include/linux/percpu-defs.h:150:2: note: expanded from macro 'DEFINE_PER_CPU_ALIGNED'
        DEFINE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION)     \
        ^
../include/linux/percpu-defs.h:93:9: note: expanded from macro 'DEFINE_PER_CPU_SECTION'
        extern __PCPU_ATTRS(sec) __typeof__(type) name;                 \
               ^
../include/linux/percpu-defs.h:49:26: note: expanded from macro '__PCPU_ATTRS'
        __percpu __attribute__((section(PER_CPU_BASE_SECTION sec)))     \
                                ^
../drivers/s390/cio/cio.h:118:1: note: previous attribute is here
DECLARE_PER_CPU(struct irb, cio_irb);
^
../include/linux/percpu-defs.h:111:2: note: expanded from macro 'DECLARE_PER_CPU'
        DECLARE_PER_CPU_SECTION(type, name, "")
        ^
../include/linux/percpu-defs.h:87:9: note: expanded from macro 'DECLARE_PER_CPU_SECTION'
        extern __PCPU_ATTRS(sec) __typeof__(type) name
               ^
../include/linux/percpu-defs.h:49:26: note: expanded from macro '__PCPU_ATTRS'
        __percpu __attribute__((section(PER_CPU_BASE_SECTION sec)))     \
                                ^
Use DECLARE_PER_CPU_ALIGNED() here, to make the two match.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/cio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 9811fd8a0c731..92eabbb5f18d4 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -115,7 +115,7 @@ struct subchannel {
 	struct schib_config config;
 } __attribute__ ((aligned(8)));
 
-DECLARE_PER_CPU(struct irb, cio_irb);
+DECLARE_PER_CPU_ALIGNED(struct irb, cio_irb);
 
 #define to_subchannel(n) container_of(n, struct subchannel, dev)
 
-- 
GitLab


From e24e4712efad737ca09ff299276737331cd021d9 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 10 Apr 2019 12:28:41 +0200
Subject: [PATCH 0922/1861] s390/rseq: use trap4 for RSEQ_SIG

Use trap4 as the guard instruction for the restartable sequence abort
handler.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 tools/testing/selftests/rseq/rseq-s390.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 1069e85258ce3..0afdf79579745 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -1,6 +1,13 @@
 /* SPDX-License-Identifier: LGPL-2.1 OR MIT */
 
-#define RSEQ_SIG	0x53053053
+/*
+ * RSEQ_SIG uses the trap4 instruction. As Linux does not make use of the
+ * access-register mode nor the linkage stack this instruction will always
+ * cause a special-operation exception (the trap-enabled bit in the DUCT
+ * is and will stay 0). The instruction pattern is
+ *	b2 ff 0f ff	trap4	4095(%r0)
+ */
+#define RSEQ_SIG	0xB2FF0FFF
 
 #define rseq_smp_mb()	__asm__ __volatile__ ("bcr 15,0" ::: "memory")
 #define rseq_smp_rmb()	rseq_smp_mb()
-- 
GitLab


From 1b8f21b74c3c9c82fce5a751d7aefb7cc0b8d33d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 9 Apr 2019 06:31:21 +0800
Subject: [PATCH 0923/1861] blk-mq: introduce blk_mq_complete_request_sync()

In NVMe's error handler, follows the typical steps of tearing down
hardware for recovering controller:

1) stop blk_mq hw queues
2) stop the real hw queues
3) cancel in-flight requests via
	blk_mq_tagset_busy_iter(tags, cancel_request, ...)
cancel_request():
	mark the request as abort
	blk_mq_complete_request(req);
4) destroy real hw queues

However, there may be race between #3 and #4, because blk_mq_complete_request()
may run q->mq_ops->complete(rq) remotelly and asynchronously, and
->complete(rq) may be run after #4.

This patch introduces blk_mq_complete_request_sync() for fixing the
above race.

Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: James Smart <james.smart@broadcom.com>
Cc: linux-nvme@lists.infradead.org
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 7 +++++++
 include/linux/blk-mq.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9354835cf517..9516304a38ee3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -654,6 +654,13 @@ bool blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
+void blk_mq_complete_request_sync(struct request *rq)
+{
+	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
+	rq->q->mq_ops->complete(rq);
+}
+EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync);
+
 int blk_mq_request_started(struct request *rq)
 {
 	return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index cb2aa7ecafff5..db29928de4674 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -302,6 +302,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 bool blk_mq_complete_request(struct request *rq);
+void blk_mq_complete_request_sync(struct request *rq);
 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
 			   struct bio *bio);
 bool blk_mq_queue_stopped(struct request_queue *q);
-- 
GitLab


From eb3afb75b57c28599af0dfa03a99579d410749e9 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 9 Apr 2019 06:31:22 +0800
Subject: [PATCH 0924/1861] nvme: cancel request synchronously

nvme_cancel_request() is used in error handler, and it is always
reliable to cancel request synchronously, and avoids possible race
in which request may be completed after real hw queue is destroyed.

One issue is reported by our customer on NVMe RDMA, in which freed ib
queue pair may be used in nvme_rdma_complete_rq().

Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: James Smart <james.smart@broadcom.com>
Cc: linux-nvme@lists.infradead.org
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 470601980794e..2c43e12b70afc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -288,7 +288,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
 				"Cancelling I/O %d", req->tag);
 
 	nvme_req(req)->status = NVME_SC_ABORT_REQ;
-	blk_mq_complete_request(req);
+	blk_mq_complete_request_sync(req);
 	return true;
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);
-- 
GitLab


From 20eea462bf2fbff3a4be375cc8424a544235a432 Mon Sep 17 00:00:00 2001
From: Vandita Kulkarni <vandita.kulkarni@intel.com>
Date: Mon, 25 Mar 2019 16:56:41 +0530
Subject: [PATCH 0925/1861] drm/i915/icl: Ungate ddi clocks before IO enable

IO enable sequencing needs ddi clocks enabled.
These clocks will be gated at a later point in
the enable sequence.

v2: Fix the commit header (Uma)
v3: Remove the redundant read (Ville)

Fixes: 949fc52af19e ("drm/i915/icl: add pll mapping for DSI")
Signed-off-by: Vandita Kulkarni <vandita.kulkarni@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1553513202-13863-1-git-send-email-vandita.kulkarni@intel.com
(cherry picked from commit c5b81a325263a891d5811aabe938c87e03db4c37)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/icl_dsi.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c
index 83cd8284e8077..43b7b80ffa212 100644
--- a/drivers/gpu/drm/i915/icl_dsi.c
+++ b/drivers/gpu/drm/i915/icl_dsi.c
@@ -598,6 +598,12 @@ static void gen11_dsi_map_pll(struct intel_encoder *encoder,
 		val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port);
 	}
 	I915_WRITE(DPCLKA_CFGCR0_ICL, val);
+
+	for_each_dsi_port(port, intel_dsi->ports) {
+		val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port);
+	}
+	I915_WRITE(DPCLKA_CFGCR0_ICL, val);
+
 	POSTING_READ(DPCLKA_CFGCR0_ICL);
 
 	mutex_unlock(&dev_priv->dpll_lock);
-- 
GitLab


From 4690985e00ac38334d4a68f99c56ac310ef0527b Mon Sep 17 00:00:00 2001
From: Vandita Kulkarni <vandita.kulkarni@intel.com>
Date: Mon, 25 Mar 2019 16:56:42 +0530
Subject: [PATCH 0926/1861] drm/i915/icl: Fix port disable sequence for
 mipi-dsi

Re-enable clock gating of DDI clocks.

v2: Fix the default ddi clk state for mipi-dsi (Imre)

Fixes: 1026bea00381 ("drm/i915/icl: Ungate DSI clocks")
Signed-off-by: Vandita Kulkarni <vandita.kulkarni@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1553513202-13863-2-git-send-email-vandita.kulkarni@intel.com
(cherry picked from commit 942d1cf48eae3fcd7e973cfb708d5c4860f0c713)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/icl_dsi.c   | 2 +-
 drivers/gpu/drm/i915/intel_ddi.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c
index 43b7b80ffa212..641e0778fa9c4 100644
--- a/drivers/gpu/drm/i915/icl_dsi.c
+++ b/drivers/gpu/drm/i915/icl_dsi.c
@@ -1132,7 +1132,7 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder)
 			DRM_ERROR("DDI port:%c buffer not idle\n",
 				  port_name(port));
 	}
-	gen11_dsi_ungate_clocks(encoder);
+	gen11_dsi_gate_clocks(encoder);
 }
 
 static void gen11_dsi_disable_io_power(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index deba8e90360a6..ab4e60dfd6a34 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2824,10 +2824,10 @@ void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder)
 				return;
 		}
 		/*
-		 * DSI ports should have their DDI clock ungated when disabled
-		 * and gated when enabled.
+		 * For DSI we keep the ddi clocks gated
+		 * except during enable/disable sequence.
 		 */
-		ddi_clk_needed = !encoder->base.crtc;
+		ddi_clk_needed = false;
 	}
 
 	val = I915_READ(DPCLKA_CFGCR0_ICL);
-- 
GitLab


From 21635d7311734d2d1b177f8a95e2f9386174b76d Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 5 Apr 2019 10:52:20 +0300
Subject: [PATCH 0927/1861] drm/i915/dp: revert back to max link rate and lane
 count on eDP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 7769db588384 ("drm/i915/dp: optimize eDP 1.4+ link config fast
and narrow") started to optize the eDP 1.4+ link config, both per spec
and as preparation for display stream compression support.

Sadly, we again face panels that flat out fail with parameters they
claim to support. Revert, and go back to the drawing board.

v2: Actually revert to max params instead of just wide-and-slow.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109959
Fixes: 7769db588384 ("drm/i915/dp: optimize eDP 1.4+ link config fast and narrow")
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matt Atwood <matthew.s.atwood@intel.com>
Cc: "Lee, Shawn C" <shawn.c.lee@intel.com>
Cc: Dave Airlie <airlied@gmail.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v5.0+
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Manasi Navare <manasi.d.navare@intel.com>
Tested-by: Albert Astals Cid <aacid@kde.org> # v5.0 backport
Tested-by: Emanuele Panigati <ilpanich@gmail.com> # v5.0 backport
Tested-by: Matteo Iervasi <matteoiervasi@gmail.com> # v5.0 backport
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190405075220.9815-1-jani.nikula@intel.com
(cherry picked from commit f11cb1c19ad0563b3c1ea5eb16a6bac0e401f428)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 69 +++++----------------------------
 1 file changed, 10 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index cf709835fb9a9..8891f29a8c7ff 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1859,42 +1859,6 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp,
 	return -EINVAL;
 }
 
-/* Optimize link config in order: max bpp, min lanes, min clock */
-static int
-intel_dp_compute_link_config_fast(struct intel_dp *intel_dp,
-				  struct intel_crtc_state *pipe_config,
-				  const struct link_config_limits *limits)
-{
-	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
-	int bpp, clock, lane_count;
-	int mode_rate, link_clock, link_avail;
-
-	for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) {
-		mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
-						   bpp);
-
-		for (lane_count = limits->min_lane_count;
-		     lane_count <= limits->max_lane_count;
-		     lane_count <<= 1) {
-			for (clock = limits->min_clock; clock <= limits->max_clock; clock++) {
-				link_clock = intel_dp->common_rates[clock];
-				link_avail = intel_dp_max_data_rate(link_clock,
-								    lane_count);
-
-				if (mode_rate <= link_avail) {
-					pipe_config->lane_count = lane_count;
-					pipe_config->pipe_bpp = bpp;
-					pipe_config->port_clock = link_clock;
-
-					return 0;
-				}
-			}
-		}
-	}
-
-	return -EINVAL;
-}
-
 static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
 {
 	int i, num_bpc;
@@ -2031,15 +1995,13 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 	limits.min_bpp = 6 * 3;
 	limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config);
 
-	if (intel_dp_is_edp(intel_dp) && intel_dp->edp_dpcd[0] < DP_EDP_14) {
+	if (intel_dp_is_edp(intel_dp)) {
 		/*
 		 * Use the maximum clock and number of lanes the eDP panel
-		 * advertizes being capable of. The eDP 1.3 and earlier panels
-		 * are generally designed to support only a single clock and
-		 * lane configuration, and typically these values correspond to
-		 * the native resolution of the panel. With eDP 1.4 rate select
-		 * and DSC, this is decreasingly the case, and we need to be
-		 * able to select less than maximum link config.
+		 * advertizes being capable of. The panels are generally
+		 * designed to support only a single clock and lane
+		 * configuration, and typically these values correspond to the
+		 * native resolution of the panel.
 		 */
 		limits.min_lane_count = limits.max_lane_count;
 		limits.min_clock = limits.max_clock;
@@ -2053,22 +2015,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 		      intel_dp->common_rates[limits.max_clock],
 		      limits.max_bpp, adjusted_mode->crtc_clock);
 
-	if (intel_dp_is_edp(intel_dp))
-		/*
-		 * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4
-		 * section A.1: "It is recommended that the minimum number of
-		 * lanes be used, using the minimum link rate allowed for that
-		 * lane configuration."
-		 *
-		 * Note that we use the max clock and lane count for eDP 1.3 and
-		 * earlier, and fast vs. wide is irrelevant.
-		 */
-		ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config,
-							&limits);
-	else
-		/* Optimize for slow and wide. */
-		ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config,
-							&limits);
+	/*
+	 * Optimize for slow and wide. This is the place to add alternative
+	 * optimization policy.
+	 */
+	ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
 
 	/* enable compression if the mode doesn't fit available BW */
 	DRM_DEBUG_KMS("Force DSC en = %d\n", intel_dp->force_dsc_en);
-- 
GitLab


From ac71317e6be01812cc0c54d8be6d3c1139c8380b Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc.w.gonzalez@free.fr>
Date: Wed, 10 Apr 2019 16:23:38 +0200
Subject: [PATCH 0928/1861] ASoC: wcd9335: Fix missing regmap requirement

wcd9335.c: undefined reference to 'devm_regmap_add_irq_chip'

Signed-off-by: Marc Gonzalez <marc.w.gonzalez@free.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 419114edfd57d..667fc1d59e189 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -1151,6 +1151,7 @@ config SND_SOC_WCD9335
 	tristate "WCD9335 Codec"
 	depends on SLIMBUS
 	select REGMAP_SLIMBUS
+	select REGMAP_IRQ
 	help
 	  The WCD9335 is a standalone Hi-Fi audio CODEC IC, supports
 	  Qualcomm Technologies, Inc. (QTI) multimedia solutions,
-- 
GitLab


From d263119387de9975d2acba1dfd3392f7c5979c18 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 1 Apr 2019 12:30:14 +0100
Subject: [PATCH 0929/1861] arm64: compat: Reduce address limit

Currently, compat tasks running on arm64 can allocate memory up to
TASK_SIZE_32 (UL(0x100000000)).

This means that mmap() allocations, if we treat them as returning an
array, are not compliant with the sections 6.5.8 of the C standard
(C99) which states that: "If the expression P points to an element of
an array object and the expression Q points to the last element of the
same array object, the pointer expression Q+1 compares greater than P".

Redefine TASK_SIZE_32 to address the issue.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: <stable@vger.kernel.org>
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
[will: fixed typo in comment]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/processor.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5d9ce62bdebde..228af56ece480 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -57,7 +57,15 @@
 #define TASK_SIZE_64		(UL(1) << vabits_user)
 
 #ifdef CONFIG_COMPAT
+#ifdef CONFIG_ARM64_64K_PAGES
+/*
+ * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied
+ * by the compat vectors page.
+ */
 #define TASK_SIZE_32		UL(0x100000000)
+#else
+#define TASK_SIZE_32		(UL(0x100000000) - PAGE_SIZE)
+#endif /* CONFIG_ARM64_64K_PAGES */
 #define TASK_SIZE		(test_thread_flag(TIF_32BIT) ? \
 				TASK_SIZE_32 : TASK_SIZE_64)
 #define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_32BIT) ? \
-- 
GitLab


From f05badde4e20d2e0f8c39d07a6873b2bfb0754f8 Mon Sep 17 00:00:00 2001
From: Anup Patel <Anup.Patel@wdc.com>
Date: Fri, 5 Apr 2019 05:49:34 +0000
Subject: [PATCH 0930/1861] RISC-V: Fix Maximum Physical Memory 2GiB option for
 64bit systems

The Maximum Physical Memory 2GiB option for 64bit systems is currently
broken because kernel hangs at boot-time when this option is enabled
and the underlying system has more than 2GiB memory.

This issue can be easily reproduced on SiFive Unleashed board where
we have 8GiB of memory.

This patch fixes above issue by removing unusable memory region in
setup_bootmem().

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/mm/init.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 5fd8c922e1c22..bc7b77e34d092 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -121,6 +121,14 @@ void __init setup_bootmem(void)
 			 */
 			memblock_reserve(reg->base, vmlinux_end - reg->base);
 			mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET);
+
+			/*
+			 * Remove memblock from the end of usable area to the
+			 * end of region
+			 */
+			if (reg->base + mem_size < end)
+				memblock_remove(reg->base + mem_size,
+						end - reg->base - mem_size);
 		}
 	}
 	BUG_ON(mem_size == 0);
-- 
GitLab


From 46c874419652bbefdfed17420fd6e88d8a31d9ec Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Apr 2019 14:03:45 -0400
Subject: [PATCH 0931/1861] securityfs: fix use-after-free on symlink traversal

symlink body shouldn't be freed without an RCU delay.  Switch securityfs
to ->destroy_inode() and use of call_rcu(); free both the inode and symlink
body in the callback.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 security/inode.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/security/inode.c b/security/inode.c
index b7772a9b315ee..421dd72b58767 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -27,17 +27,22 @@
 static struct vfsmount *mount;
 static int mount_count;
 
-static void securityfs_evict_inode(struct inode *inode)
+static void securityfs_i_callback(struct rcu_head *head)
 {
-	truncate_inode_pages_final(&inode->i_data);
-	clear_inode(inode);
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	if (S_ISLNK(inode->i_mode))
 		kfree(inode->i_link);
+	free_inode_nonrcu(inode);
+}
+
+static void securityfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, securityfs_i_callback);
 }
 
 static const struct super_operations securityfs_super_operations = {
 	.statfs		= simple_statfs,
-	.evict_inode	= securityfs_evict_inode,
+	.destroy_inode	= securityfs_destroy_inode,
 };
 
 static int fill_super(struct super_block *sb, void *data, int silent)
-- 
GitLab


From f51dcd0f621caac5380ce90fbbeafc32ce4517ae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Apr 2019 14:04:34 -0400
Subject: [PATCH 0932/1861] apparmorfs: fix use-after-free on symlink traversal

symlink body shouldn't be freed without an RCU delay.  Switch apparmorfs
to ->destroy_inode() and use of call_rcu(); free both the inode and symlink
body in the callback.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 security/apparmor/apparmorfs.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index fefee040bf791..b9298d2e81654 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -123,17 +123,22 @@ static int aafs_show_path(struct seq_file *seq, struct dentry *dentry)
 	return 0;
 }
 
-static void aafs_evict_inode(struct inode *inode)
+static void aafs_i_callback(struct rcu_head *head)
 {
-	truncate_inode_pages_final(&inode->i_data);
-	clear_inode(inode);
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	if (S_ISLNK(inode->i_mode))
 		kfree(inode->i_link);
+	free_inode_nonrcu(inode);
+}
+
+static void aafs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, aafs_i_callback);
 }
 
 static const struct super_operations aafs_super_ops = {
 	.statfs = simple_statfs,
-	.evict_inode = aafs_evict_inode,
+	.destroy_inode = aafs_destroy_inode,
 	.show_path = aafs_show_path,
 };
 
-- 
GitLab


From d737b25b1ae0540ba13cbd45ebb9b58a1d6d7f0d Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 10 Apr 2019 06:27:05 -0700
Subject: [PATCH 0933/1861] IB/hfi1: Do not flush send queue in the TID RDMA
 second leg

When a QP is put into error state, the send queue will be flushed.
This mechanism is implemented in both the first and the second leg
of the send engine. Since the second leg is only responsible for
data transactions in the KDETH space for the TID RDMA WRITE request,
it should not perform the flushing of the send queue.

This patch removes the flushing function of the second leg, but
still keeps the bailing out of the QP if it is put into error state.

Fixes: 70dcb2e3dc6a ("IB/hfi1: Add the TID second leg send packet builder")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/tid_rdma.c | 31 +++++++--------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index fdda33aca77f2..43cbce7a19ea4 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -5017,24 +5017,14 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 	    make_tid_rdma_ack(qp, ohdr, ps))
 		return 1;
 
-	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
-		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
-			goto bail;
-		/* We are in the error state, flush the work request. */
-		if (qp->s_last == READ_ONCE(qp->s_head))
-			goto bail;
-		/* If DMAs are in progress, we can't flush immediately. */
-		if (iowait_sdma_pending(&priv->s_iowait)) {
-			qp->s_flags |= RVT_S_WAIT_DMA;
-			goto bail;
-		}
-		clear_ahg(qp);
-		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
-		hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
-					 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
-		/* will get called again */
-		goto done_free_tx;
-	}
+	/*
+	 * Bail out if we can't send data.
+	 * Be reminded that this check must been done after the call to
+	 * make_tid_rdma_ack() because the responding QP could be in
+	 * RTR state where it can send TID RDMA ACK, not TID RDMA WRITE DATA.
+	 */
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
+		goto bail;
 
 	if (priv->s_flags & RVT_S_WAIT_ACK)
 		goto bail;
@@ -5144,11 +5134,6 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 	hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
 			     middle, ps);
 	return 1;
-done_free_tx:
-	hfi1_put_txreq(ps->s_txreq);
-	ps->s_txreq = NULL;
-	return 1;
-
 bail:
 	hfi1_put_txreq(ps->s_txreq);
 bail_no_tx:
-- 
GitLab


From b2b3a70cd9984fe39ed5aaa9ce596476051ce5de Mon Sep 17 00:00:00 2001
From: Hans Holmberg <hans.holmberg@cnexlabs.com>
Date: Wed, 10 Apr 2019 19:56:43 +0200
Subject: [PATCH 0934/1861] lightnvm: pblk: fix crash in pblk_end_partial_read
 due to multipage bvecs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The introduction of multipage bio vectors broke pblk's partial read
logic due to it not being prepared for multipage bio vectors.

Use bio vector iterators instead of direct bio vector indexing.

Fixes: 07173c3ec276 ("block: enable multipage bvecs")
Reported-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
Updated description.
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/pblk-read.c | 50 ++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 3789185144dae..0b7d5fb4548dc 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -231,14 +231,14 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
 	struct pblk_sec_meta *meta;
 	struct bio *new_bio = rqd->bio;
 	struct bio *bio = pr_ctx->orig_bio;
-	struct bio_vec src_bv, dst_bv;
 	void *meta_list = rqd->meta_list;
-	int bio_init_idx = pr_ctx->bio_init_idx;
 	unsigned long *read_bitmap = pr_ctx->bitmap;
+	struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT;
+	struct bvec_iter new_iter = BVEC_ITER_ALL_INIT;
 	int nr_secs = pr_ctx->orig_nr_secs;
 	int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
 	void *src_p, *dst_p;
-	int hole, i;
+	int bit, i;
 
 	if (unlikely(nr_holes == 1)) {
 		struct ppa_addr ppa;
@@ -257,33 +257,39 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
 
 	/* Fill the holes in the original bio */
 	i = 0;
-	hole = find_first_zero_bit(read_bitmap, nr_secs);
-	do {
-		struct pblk_line *line;
+	for (bit = 0; bit < nr_secs; bit++) {
+		if (!test_bit(bit, read_bitmap)) {
+			struct bio_vec dst_bv, src_bv;
+			struct pblk_line *line;
 
-		line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
-		kref_put(&line->ref, pblk_line_put);
+			line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
+			kref_put(&line->ref, pblk_line_put);
 
-		meta = pblk_get_meta(pblk, meta_list, hole);
-		meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
+			meta = pblk_get_meta(pblk, meta_list, bit);
+			meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
 
-		src_bv = new_bio->bi_io_vec[i++];
-		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
+			dst_bv = bio_iter_iovec(bio, orig_iter);
+			src_bv = bio_iter_iovec(new_bio, new_iter);
 
-		src_p = kmap_atomic(src_bv.bv_page);
-		dst_p = kmap_atomic(dst_bv.bv_page);
+			src_p = kmap_atomic(src_bv.bv_page);
+			dst_p = kmap_atomic(dst_bv.bv_page);
 
-		memcpy(dst_p + dst_bv.bv_offset,
-			src_p + src_bv.bv_offset,
-			PBLK_EXPOSED_PAGE_SIZE);
+			memcpy(dst_p + dst_bv.bv_offset,
+				src_p + src_bv.bv_offset,
+				PBLK_EXPOSED_PAGE_SIZE);
 
-		kunmap_atomic(src_p);
-		kunmap_atomic(dst_p);
+			kunmap_atomic(src_p);
+			kunmap_atomic(dst_p);
 
-		mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
+			flush_dcache_page(dst_bv.bv_page);
+			mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
 
-		hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
-	} while (hole < nr_secs);
+			bio_advance_iter(new_bio, &new_iter,
+					PBLK_EXPOSED_PAGE_SIZE);
+			i++;
+		}
+		bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE);
+	}
 
 	bio_put(new_bio);
 	kfree(pr_ctx);
-- 
GitLab


From b9c8172eedc1a8285c373564174a724311394472 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Thu, 4 Apr 2019 09:36:35 -0500
Subject: [PATCH 0935/1861] Documentation: dt: edac: Fix Stratix10 IRQ bindings

Fix Stratix10 ECC bindings to specify only the single
bit error. On Stratix10 double bit errors are handled
as SErrors instead of interrupts.
Indicate the differences between the ARM64 and ARM32
EDAC architecture in the bindings.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Rob Herring <robh@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Cc: dinguyen@kernel.org
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: mark.rutland@arm.com
Cc: mchehab@kernel.org
Link: https://lkml.kernel.org/r/1554388597-28019-2-git-send-email-thor.thayer@linux.intel.com
---
 .../bindings/edac/socfpga-eccmgr.txt          | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt
index 5626560a6cfdf..acb211c098c02 100644
--- a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt
+++ b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt
@@ -232,37 +232,46 @@ Example:
 		};
 	};
 
-Stratix10 SoCFPGA ECC Manager
+Stratix10 SoCFPGA ECC Manager (ARM64)
 The Stratix10 SoC ECC Manager handles the IRQs for each peripheral
-in a shared register similar to the Arria10. However, ECC requires
-access to registers that can only be read from Secure Monitor with
-SMC calls. Therefore the device tree is slightly different.
+in a shared register similar to the Arria10. However, Stratix10 ECC
+requires access to registers that can only be read from Secure Monitor
+with SMC calls. Therefore the device tree is slightly different. Note
+that only 1 interrupt is sent in Stratix10 because the double bit errors
+are treated as SErrors in ARM64 instead of IRQs in ARM32.
 
 Required Properties:
 - compatible : Should be "altr,socfpga-s10-ecc-manager"
-- interrupts : Should be single bit error interrupt, then double bit error
-	interrupt.
+- altr,sysgr-syscon : phandle to Stratix10 System Manager Block
+	              containing the ECC manager registers.
+- interrupts : Should be single bit error interrupt.
 - interrupt-controller : boolean indicator that ECC Manager is an interrupt controller
 - #interrupt-cells : must be set to 2.
+- #address-cells: must be 1
+- #size-cells: must be 1
+- ranges : standard definition, should translate from local addresses
 
 Subcomponents:
 
 SDRAM ECC
 Required Properties:
 - compatible : Should be "altr,sdram-edac-s10"
-- interrupts : Should be single bit error interrupt, then double bit error
-	interrupt, in this order.
+- interrupts : Should be single bit error interrupt.
 
 Example:
 
 	eccmgr {
 		compatible = "altr,socfpga-s10-ecc-manager";
-		interrupts = <0 15 4>, <0 95 4>;
+		altr,sysmgr-syscon = <&sysmgr>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <0 15 4>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
+		ranges;
 
 		sdramedac {
 			compatible = "altr,sdram-edac-s10";
-			interrupts = <16 4>, <48 4>;
+			interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
 		};
 	};
-- 
GitLab


From d4d0e40977ac450f32f2db5e4d8e23c9d2578899 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 10 Apr 2019 06:58:12 +0000
Subject: [PATCH 0936/1861] mlxsw: spectrum_switchdev: Add MDB entries in
 prepare phase

The driver cannot guarantee in the prepare phase that it will be able to
write an MDB entry to the device. In case the driver returned success
during the prepare phase, but then failed to add the entry in the commit
phase, a WARNING [1] will be generated by the switchdev core.

Fix this by doing the work in the prepare phase instead.

[1]
[  358.544486] swp12s0: Commit of object (id=2) failed.
[  358.550061] WARNING: CPU: 0 PID: 30 at net/switchdev/switchdev.c:281 switchdev_port_obj_add_now+0x9b/0xe0
[  358.560754] CPU: 0 PID: 30 Comm: kworker/0:1 Not tainted 5.0.0-custom-13382-gf2449babf221 #1350
[  358.570472] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016
[  358.580582] Workqueue: events switchdev_deferred_process_work
[  358.587001] RIP: 0010:switchdev_port_obj_add_now+0x9b/0xe0
...
[  358.614109] RSP: 0018:ffffa6b900d6fe18 EFLAGS: 00010286
[  358.619943] RAX: 0000000000000000 RBX: ffff8b00797ff000 RCX: 0000000000000000
[  358.627912] RDX: ffff8b00b7a1d4c0 RSI: ffff8b00b7a152e8 RDI: ffff8b00b7a152e8
[  358.635881] RBP: ffff8b005c3f5bc0 R08: 000000000000022b R09: 0000000000000000
[  358.643850] R10: 0000000000000000 R11: ffffa6b900d6fcc8 R12: 0000000000000000
[  358.651819] R13: dead000000000100 R14: ffff8b00b65a23c0 R15: 0ffff8b00b7a2200
[  358.659790] FS:  0000000000000000(0000) GS:ffff8b00b7a00000(0000) knlGS:0000000000000000
[  358.668820] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  358.675228] CR2: 00007f00aad90de0 CR3: 00000001ca80d000 CR4: 00000000001006f0
[  358.683188] Call Trace:
[  358.685918]  switchdev_port_obj_add_deferred+0x13/0x60
[  358.691655]  switchdev_deferred_process+0x6b/0xf0
[  358.696907]  switchdev_deferred_process_work+0xa/0x10
[  358.702548]  process_one_work+0x1f5/0x3f0
[  358.707022]  worker_thread+0x28/0x3c0
[  358.711099]  ? process_one_work+0x3f0/0x3f0
[  358.715768]  kthread+0x10d/0x130
[  358.719369]  ? __kthread_create_on_node+0x180/0x180
[  358.724815]  ret_from_fork+0x35/0x40

Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Alex Kushnarov <alexanderk@mellanox.com>
Tested-by: Alex Kushnarov <alexanderk@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index f6ce386c30367..50111f228d772 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1630,7 +1630,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	u16 fid_index;
 	int err = 0;
 
-	if (switchdev_trans_ph_prepare(trans))
+	if (switchdev_trans_ph_commit(trans))
 		return 0;
 
 	bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
-- 
GitLab


From a8c133b06183c529c51cd0d54eb57d6b7078370c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 10 Apr 2019 06:58:13 +0000
Subject: [PATCH 0937/1861] mlxsw: core: Do not use WQ_MEM_RECLAIM for EMAD
 workqueue

The EMAD workqueue is used to handle retransmission of EMAD packets that
contain configuration data for the device's firmware.

Given the workers need to allocate these packets and that the code is
not called as part of memory reclaim path, remove the WQ_MEM_RECLAIM
flag.

Fixes: d965465b60ba ("mlxsw: core: Fix possible deadlock")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index d23d53c0e2842..91cd6fa42e9a3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
 	if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
 		return 0;
 
-	emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0);
+	emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0);
 	if (!emad_wq)
 		return -ENOMEM;
 	mlxsw_core->emad_wq = emad_wq;
-- 
GitLab


From 4af0699782e2cc7d0d89db9eb6f8844dd3df82dc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 10 Apr 2019 06:58:14 +0000
Subject: [PATCH 0938/1861] mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw
 ordered workqueue

The ordered workqueue is used to offload various objects such as routes
and neighbours in the order they are notified.

It should not be called as part of memory reclaim path, so remove the
WQ_MEM_RECLAIM flag. This can also result in a warning [1], if a worker
tries to flush a non-WQ_MEM_RECLAIM workqueue.

[1]
[97703.542861] workqueue: WQ_MEM_RECLAIM mlxsw_core_ordered:mlxsw_sp_router_fib6_event_work [mlxsw_spectrum] is flushing !WQ_MEM_RECLAIM events:rht_deferred_worker
[97703.542884] WARNING: CPU: 1 PID: 32492 at kernel/workqueue.c:2605 check_flush_dependency+0xb5/0x130
...
[97703.542988] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018
[97703.543049] Workqueue: mlxsw_core_ordered mlxsw_sp_router_fib6_event_work [mlxsw_spectrum]
[97703.543061] RIP: 0010:check_flush_dependency+0xb5/0x130
...
[97703.543071] RSP: 0018:ffffb3f08137bc00 EFLAGS: 00010086
[97703.543076] RAX: 0000000000000000 RBX: ffff96e07740ae00 RCX: 0000000000000000
[97703.543080] RDX: 0000000000000094 RSI: ffffffff82dc1934 RDI: 0000000000000046
[97703.543084] RBP: ffffb3f08137bc20 R08: ffffffff82dc18a0 R09: 00000000000225c0
[97703.543087] R10: 0000000000000000 R11: 0000000000007eec R12: ffffffff816e4ee0
[97703.543091] R13: ffff96e06f6a5c00 R14: ffff96e077ba7700 R15: ffffffff812ab0c0
[97703.543097] FS: 0000000000000000(0000) GS:ffff96e077a80000(0000) knlGS:0000000000000000
[97703.543101] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[97703.543104] CR2: 00007f8cd135b280 CR3: 00000001e860e003 CR4: 00000000003606e0
[97703.543109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[97703.543112] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[97703.543115] Call Trace:
[97703.543129] __flush_work+0xbd/0x1e0
[97703.543137] ? __cancel_work_timer+0x136/0x1b0
[97703.543145] ? pwq_dec_nr_in_flight+0x49/0xa0
[97703.543154] __cancel_work_timer+0x136/0x1b0
[97703.543175] ? mlxsw_reg_trans_bulk_wait+0x145/0x400 [mlxsw_core]
[97703.543184] cancel_work_sync+0x10/0x20
[97703.543191] rhashtable_free_and_destroy+0x23/0x140
[97703.543198] rhashtable_destroy+0xd/0x10
[97703.543254] mlxsw_sp_fib_destroy+0xb1/0xf0 [mlxsw_spectrum]
[97703.543310] mlxsw_sp_vr_put+0xa8/0xc0 [mlxsw_spectrum]
[97703.543364] mlxsw_sp_fib_node_put+0xbf/0x140 [mlxsw_spectrum]
[97703.543418] ? mlxsw_sp_fib6_entry_destroy+0xe8/0x110 [mlxsw_spectrum]
[97703.543475] mlxsw_sp_router_fib6_event_work+0x6cd/0x7f0 [mlxsw_spectrum]
[97703.543484] process_one_work+0x1fd/0x400
[97703.543493] worker_thread+0x34/0x410
[97703.543500] kthread+0x121/0x140
[97703.543507] ? process_one_work+0x400/0x400
[97703.543512] ? kthread_park+0x90/0x90
[97703.543523] ret_from_fork+0x35/0x40

Fixes: a3832b31898f ("mlxsw: core: Create an ordered workqueue for FIB offload")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Semion Lisyansky <semionl@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 91cd6fa42e9a3..2c0e3281bd7a7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1961,7 +1961,7 @@ static int __init mlxsw_core_module_init(void)
 	mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
 	if (!mlxsw_wq)
 		return -ENOMEM;
-	mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM,
+	mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
 					    mlxsw_core_driver_name);
 	if (!mlxsw_owq) {
 		err = -ENOMEM;
-- 
GitLab


From b442fed1b724af0de087912a5718ddde1b87acbb Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 10 Apr 2019 06:58:15 +0000
Subject: [PATCH 0939/1861] mlxsw: core: Do not use WQ_MEM_RECLAIM for mlxsw
 workqueue

The workqueue is used to periodically update the networking stack about
activity / statistics of various objects such as neighbours and TC
actions.

It should not be called as part of memory reclaim path, so remove the
WQ_MEM_RECLAIM flag.

Fixes: 3d5479e92087 ("mlxsw: core: Remove deprecated create_workqueue")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 2c0e3281bd7a7..f26a4ca293637 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1958,7 +1958,7 @@ static int __init mlxsw_core_module_init(void)
 {
 	int err;
 
-	mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
+	mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0);
 	if (!mlxsw_wq)
 		return -ENOMEM;
 	mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
-- 
GitLab


From 972fae683cbad5cf348268e76abc6d55cfb3ba87 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 10 Apr 2019 06:58:15 +0000
Subject: [PATCH 0940/1861] mlxsw: spectrum_router: Do not check VRF MAC
 address

Commit 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC
addresses") enabled the driver to veto router interface (RIF) MAC
addresses that it cannot support.

This check should only be performed for interfaces for which the driver
actually configures a RIF. A VRF upper is not one of them, so ignore it.

Without this patch it is not possible to set an IP address on the VRF
device and use it as a loopback.

Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Alexander Petrovskiy <alexpe@mellanox.com>
Tested-by: Alexander Petrovskiy <alexpe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 52fed8c7bf1ed..902e766a8ed33 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -6781,7 +6781,7 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
 	/* A RIF is not created for macvlan netdevs. Their MAC is used to
 	 * populate the FDB
 	 */
-	if (netif_is_macvlan(dev))
+	if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
 		return 0;
 
 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
-- 
GitLab


From 7052e2436373cc2c46981e165d1cbc5023f20dd7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 10 Apr 2019 06:58:16 +0000
Subject: [PATCH 0941/1861] selftests: mlxsw: Test VRF MAC vetoing

Test that it is possible to set an IP address on a VRF and that it is
not vetoed.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh  | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index c4cf6e6d800eb..a6c196c8534ce 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -11,6 +11,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding
 
 ALL_TESTS="
 	rif_set_addr_test
+	rif_vrf_set_addr_test
 	rif_inherit_bridge_addr_test
 	rif_non_inherit_bridge_addr_test
 	vlan_interface_deletion_test
@@ -98,6 +99,25 @@ rif_set_addr_test()
 	ip link set dev $swp1 addr $swp1_mac
 }
 
+rif_vrf_set_addr_test()
+{
+	# Test that it is possible to set an IP address on a VRF upper despite
+	# its random MAC address.
+	RET=0
+
+	ip link add name vrf-test type vrf table 10
+	ip link set dev $swp1 master vrf-test
+
+	ip -4 address add 192.0.2.1/24 dev vrf-test
+	check_err $? "failed to set IPv4 address on VRF"
+	ip -6 address add 2001:db8:1::1/64 dev vrf-test
+	check_err $? "failed to set IPv6 address on VRF"
+
+	log_test "RIF - setting IP address on VRF"
+
+	ip link del dev vrf-test
+}
+
 rif_inherit_bridge_addr_test()
 {
 	RET=0
-- 
GitLab


From d5949d92c29ce147a9cb9e21fcf8ad7c1ff327b1 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 10 Apr 2019 06:58:17 +0000
Subject: [PATCH 0942/1861] mlxsw: spectrum_buffers: Add a multicast pool for
 Spectrum-2

In Spectrum-1, when a multicast packet is admitted to the shared buffer
it increases the quotas of all the ports and {port, TC} to which it is
forwarded to.

The above means that multicast packets are accounted multiple times in
the shared buffer and can therefore cause the associated shared buffer
pool to fill up very quickly.

To work around this issue, commit e83c045e53d7 ("mlxsw:
spectrum_buffers: Configure MC pool") added a dedicated multicast pool
in which multicast packets are accounted.

The issue is not present in Spectrum-2, but in order to be backward
compatible with Spectrum-1, its default behavior is to allow a multicast
packet to increase multiple egress quotas instead of one.

Until the new (non-backward compatible) mode is supported, configure a
dedicated multicast pool as in Spectrum-1.

Fixes: fe099bf682ab ("mlxsw: spectrum_buffers: Add Spectrum-2 shared buffer configuration")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../mellanox/mlxsw/spectrum_buffers.c         | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 9a79b5e115974..d633bef5f1051 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -70,6 +70,7 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = {
 	{MLXSW_REG_SBXX_DIR_EGRESS, 1},
 	{MLXSW_REG_SBXX_DIR_EGRESS, 2},
 	{MLXSW_REG_SBXX_DIR_EGRESS, 3},
+	{MLXSW_REG_SBXX_DIR_EGRESS, 15},
 };
 
 #define MLXSW_SP_SB_ING_TC_COUNT 8
@@ -428,6 +429,7 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = {
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
 	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+	MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI),
 };
 
 static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
@@ -517,14 +519,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = {
 	MLXSW_SP_SB_CM(0, 7, 4),
 	MLXSW_SP_SB_CM(0, 7, 4),
 	MLXSW_SP_SB_CM(0, 7, 4),
-	MLXSW_SP_SB_CM(0, 7, 4),
-	MLXSW_SP_SB_CM(0, 7, 4),
-	MLXSW_SP_SB_CM(0, 7, 4),
-	MLXSW_SP_SB_CM(0, 7, 4),
-	MLXSW_SP_SB_CM(0, 7, 4),
-	MLXSW_SP_SB_CM(0, 7, 4),
-	MLXSW_SP_SB_CM(0, 7, 4),
-	MLXSW_SP_SB_CM(0, 7, 4),
+	MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+	MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+	MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+	MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+	MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+	MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+	MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+	MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
 	MLXSW_SP_SB_CM(1, 0xff, 4),
 };
 
@@ -671,6 +673,7 @@ static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = {
 	MLXSW_SP_SB_PM(0, 0),
 	MLXSW_SP_SB_PM(0, 0),
 	MLXSW_SP_SB_PM(0, 0),
+	MLXSW_SP_SB_PM(10000, 90000),
 };
 
 static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
-- 
GitLab


From 71eec083eef10b1529f2300d6c3553f0949733a5 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Thu, 4 Apr 2019 09:36:36 -0500
Subject: [PATCH 0943/1861] Documentation: dt: edac: Add Stratix10 Peripheral
 bindings

Add peripheral bindings for Stratix10 EDAC to capture
the differences between the ARM64 and ARM32 architecture.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Rob Herring <robh@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Cc: dinguyen@kernel.org
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: mark.rutland@arm.com
Cc: mchehab@kernel.org
Link: https://lkml.kernel.org/r/1554388597-28019-3-git-send-email-thor.thayer@linux.intel.com
---
 .../bindings/edac/socfpga-eccmgr.txt          | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt
index acb211c098c02..8f52206cfd2a1 100644
--- a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt
+++ b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt
@@ -258,6 +258,49 @@ Required Properties:
 - compatible : Should be "altr,sdram-edac-s10"
 - interrupts : Should be single bit error interrupt.
 
+On-Chip RAM ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-s10-ocram-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent OCRAM node.
+- interrupts      : Should be single bit error interrupt.
+
+Ethernet FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-s10-eth-mac-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent Ethernet node.
+- interrupts      : Should be single bit error interrupt.
+
+NAND FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-s10-nand-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent NAND node.
+- interrupts      : Should be single bit error interrupt.
+
+DMA FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-s10-dma-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent DMA node.
+- interrupts      : Should be single bit error interrupt.
+
+USB FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-s10-usb-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent USB node.
+- interrupts      : Should be single bit error interrupt.
+
+SDMMC FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-s10-sdmmc-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent SD/MMC node.
+- interrupts      : Should be single bit error interrupt for port A
+		    and then single bit error interrupt for port B.
+
 Example:
 
 	eccmgr {
@@ -274,4 +317,67 @@ Example:
 			compatible = "altr,sdram-edac-s10";
 			interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
 		};
+
+		ocram-ecc@ff8cc000 {
+			compatible = "altr,socfpga-s10-ocram-ecc";
+			reg = <ff8cc000 0x100>;
+			altr,ecc-parent = <&ocram>;
+			interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		emac0-rx-ecc@ff8c0000 {
+			compatible = "altr,socfpga-s10-eth-mac-ecc";
+			reg = <0xff8c0000 0x100>;
+			altr,ecc-parent = <&gmac0>;
+			interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		emac0-tx-ecc@ff8c0400 {
+			compatible = "altr,socfpga-s10-eth-mac-ecc";
+			reg = <0xff8c0400 0x100>;
+			altr,ecc-parent = <&gmac0>;
+			interrupts = <5 IRQ_TYPE_LEVEL_HIGH>'
+		};
+
+		nand-buf-ecc@ff8c8000 {
+			compatible = "altr,socfpga-s10-nand-ecc";
+			reg = <0xff8c8000 0x100>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		nand-rd-ecc@ff8c8400 {
+			compatible = "altr,socfpga-s10-nand-ecc";
+			reg = <0xff8c8400 0x100>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <13 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		nand-wr-ecc@ff8c8800 {
+			compatible = "altr,socfpga-s10-nand-ecc";
+			reg = <0xff8c8800 0x100>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		dma-ecc@ff8c9000 {
+			compatible = "altr,socfpga-s10-dma-ecc";
+			reg = <0xff8c9000 0x100>;
+			altr,ecc-parent = <&pdma>;
+			interrupts = <10 IRQ_TYPE_LEVEL_HIGH>;
+
+		usb0-ecc@ff8c4000 {
+			compatible = "altr,socfpga-s10-usb-ecc";
+			reg = <0xff8c4000 0x100>;
+			altr,ecc-parent = <&usb0>;
+			interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		sdmmc-ecc@ff8c8c00 {
+			compatible = "altr,socfpga-s10-sdmmc-ecc";
+			reg = <0xff8c8c00 0x100>;
+			altr,ecc-parent = <&mmc>;
+			interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+				     <15 IRQ_TYPE_LEVEL_HIGH>;
+		};
 	};
-- 
GitLab


From 74676a8e247a2163a25fb0ef98f17b74fbad9d21 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Thu, 4 Apr 2019 09:36:37 -0500
Subject: [PATCH 0944/1861] arm64: dts: stratix10: Use new Stratix10 EDAC
 bindings

Use the new Stratix10 binding format for EDAC nodes.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dinh Nguyen <dinguyen@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: mark.rutland@arm.com
Cc: mchehab@kernel.org
Link: https://lkml.kernel.org/r/1554388597-28019-4-git-send-email-thor.thayer@linux.intel.com
---
 .../boot/dts/altera/socfpga_stratix10.dtsi    | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 7c649f6b14cb6..e00ad770fa394 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -531,11 +531,12 @@
 		};
 
 		eccmgr {
-			compatible = "altr,socfpga-a10-ecc-manager";
+			compatible = "altr,socfpga-s10-ecc-manager",
+				     "altr,socfpga-a10-ecc-manager";
 			altr,sysmgr-syscon = <&sysmgr>;
 			#address-cells = <1>;
 			#size-cells = <1>;
-			interrupts = <0 15 4>, <0 95 4>;
+			interrupts = <0 15 4>;
 			interrupt-controller;
 			#interrupt-cells = <2>;
 			ranges;
@@ -543,31 +544,31 @@
 			sdramedac {
 				compatible = "altr,sdram-edac-s10";
 				altr,sdr-syscon = <&sdr>;
-				interrupts = <16 4>, <48 4>;
+				interrupts = <16 4>;
 			};
 
 			usb0-ecc@ff8c4000 {
-				compatible = "altr,socfpga-usb-ecc";
+				compatible = "altr,socfpga-s10-usb-ecc",
+					     "altr,socfpga-usb-ecc";
 				reg = <0xff8c4000 0x100>;
 				altr,ecc-parent = <&usb0>;
-				interrupts = <2 4>,
-					     <34 4>;
+				interrupts = <2 4>;
 			};
 
 			emac0-rx-ecc@ff8c0000 {
-				compatible = "altr,socfpga-eth-mac-ecc";
+				compatible = "altr,socfpga-s10-eth-mac-ecc",
+					     "altr,socfpga-eth-mac-ecc";
 				reg = <0xff8c0000 0x100>;
 				altr,ecc-parent = <&gmac0>;
-				interrupts = <4 4>,
-					     <36 4>;
+				interrupts = <4 4>;
 			};
 
 			emac0-tx-ecc@ff8c0400 {
-				compatible = "altr,socfpga-eth-mac-ecc";
+				compatible = "altr,socfpga-s10-eth-mac-ecc",
+					     "altr,socfpga-eth-mac-ecc";
 				reg = <0xff8c0400 0x100>;
 				altr,ecc-parent = <&gmac0>;
-				interrupts = <5 4>,
-					     <37 4>;
+				interrupts = <5 4>;
 			};
 
 		};
-- 
GitLab


From 1bfb97b9a51901103677a4d1a2386d223c15bc71 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 12 Mar 2019 10:50:05 -0700
Subject: [PATCH 0945/1861] MAINTAINERS: BMIPS: Add internal Broadcom mailing
 list

There is a patchwork instance behind bcm-kernel-feedback-list that is
helpful to track submissions, add this list for the MIPS BMIPS entry.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@linux-mips.org
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2359e12e4c41e..92df0eb1aa6c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3121,6 +3121,7 @@ F:	drivers/cpufreq/bmips-cpufreq.c
 BROADCOM BMIPS MIPS ARCHITECTURE
 M:	Kevin Cernekee <cernekee@gmail.com>
 M:	Florian Fainelli <f.fainelli@gmail.com>
+L:	bcm-kernel-feedback-list@broadcom.com
 L:	linux-mips@vger.kernel.org
 T:	git git://github.com/broadcom/stblinux.git
 S:	Maintained
-- 
GitLab


From b66b7bd2bdc1a74c46a0a470f9ac19629320d212 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.ibm.com>
Date: Wed, 10 Apr 2019 11:06:59 -0500
Subject: [PATCH 0946/1861] ibmvnic: Enable GRO

Enable Generic Receive Offload in the ibmvnic driver.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 51cfe95f3e247..cc22c53515139 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3837,7 +3837,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 	adapter->ip_offload_ctrl.large_rx_ipv4 = 0;
 	adapter->ip_offload_ctrl.large_rx_ipv6 = 0;
 
-	adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO;
+	adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
 
 	if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
 		adapter->netdev->features |= NETIF_F_IP_CSUM;
-- 
GitLab


From dde746a35f8b7da4b9515dd3dc4708a9926fbd65 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.ibm.com>
Date: Wed, 10 Apr 2019 11:07:00 -0500
Subject: [PATCH 0947/1861] ibmvnic: Fix netdev feature clobbering during a
 reset

While determining offload capabilities of backing hardware during
a device reset, the driver is clobbering current feature settings.
Update hw_features on reset instead of features unless a feature
is enabled that is no longer supported on the current backing device.
Also enable features that were not supported prior to the reset but
were previously enabled or requested by the user.

This can occur if the reset is the result of a carrier change, such
as a device failover or partition migration.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index cc22c53515139..3dfb2d131eb76 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3762,6 +3762,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
+	netdev_features_t old_hw_features = 0;
 	union ibmvnic_crq crq;
 	int i;
 
@@ -3837,24 +3838,41 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 	adapter->ip_offload_ctrl.large_rx_ipv4 = 0;
 	adapter->ip_offload_ctrl.large_rx_ipv6 = 0;
 
+	if (adapter->state != VNIC_PROBING) {
+		old_hw_features = adapter->netdev->hw_features;
+		adapter->netdev->hw_features = 0;
+	}
+
 	adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
 
 	if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
-		adapter->netdev->features |= NETIF_F_IP_CSUM;
+		adapter->netdev->hw_features |= NETIF_F_IP_CSUM;
 
 	if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum)
-		adapter->netdev->features |= NETIF_F_IPV6_CSUM;
+		adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM;
 
 	if ((adapter->netdev->features &
 	    (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
-		adapter->netdev->features |= NETIF_F_RXCSUM;
+		adapter->netdev->hw_features |= NETIF_F_RXCSUM;
 
 	if (buf->large_tx_ipv4)
-		adapter->netdev->features |= NETIF_F_TSO;
+		adapter->netdev->hw_features |= NETIF_F_TSO;
 	if (buf->large_tx_ipv6)
-		adapter->netdev->features |= NETIF_F_TSO6;
+		adapter->netdev->hw_features |= NETIF_F_TSO6;
 
-	adapter->netdev->hw_features |= adapter->netdev->features;
+	if (adapter->state == VNIC_PROBING) {
+		adapter->netdev->features |= adapter->netdev->hw_features;
+	} else if (old_hw_features != adapter->netdev->hw_features) {
+		netdev_features_t tmp = 0;
+
+		/* disable features no longer supported */
+		adapter->netdev->features &= adapter->netdev->hw_features;
+		/* turn on features now supported if previously enabled */
+		tmp = (old_hw_features ^ adapter->netdev->hw_features) &
+			adapter->netdev->hw_features;
+		adapter->netdev->features |=
+				tmp & adapter->netdev->wanted_features;
+	}
 
 	memset(&crq, 0, sizeof(crq));
 	crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
-- 
GitLab


From 2a29e9f6b9b499f1fc5f4a48220dc3f4428499f9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 3 Apr 2019 21:34:34 +0200
Subject: [PATCH 0948/1861] sparc64/pci_sun4v: fix ATU checks for large DMA
 masks

Now that we allow drivers to always need to set larger than required
DMA masks we need to be a little more careful in the sun4v PCI iommu
driver to chose when to select the ATU support - a larger DMA mask
can be set even when the platform does not support ATU, so we always
have to check if it is avaiable before using it.  Add a little helper
for that and use it in all the places where we make ATU usage decisions
based on the DMA mask.

Fixes: 24132a419c68 ("sparc64/pci_sun4v: allow large DMA masks")
Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Meelis Roos <mroos@linux.ee>
Acked-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/pci_sun4v.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index a8af6023c1263..14b93c5564e35 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -73,6 +73,11 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns
 	p->npages	= 0;
 }
 
+static inline bool iommu_use_atu(struct iommu *iommu, u64 mask)
+{
+	return iommu->atu && mask > DMA_BIT_MASK(32);
+}
+
 /* Interrupts must be disabled.  */
 static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
 {
@@ -92,7 +97,7 @@ static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
 		prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
 
 	while (npages != 0) {
-		if (mask <= DMA_BIT_MASK(32) || !pbm->iommu->atu) {
+		if (!iommu_use_atu(pbm->iommu, mask)) {
 			num = pci_sun4v_iommu_map(devhandle,
 						  HV_PCI_TSBID(0, entry),
 						  npages,
@@ -179,7 +184,6 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	unsigned long flags, order, first_page, npages, n;
 	unsigned long prot = 0;
 	struct iommu *iommu;
-	struct atu *atu;
 	struct iommu_map_table *tbl;
 	struct page *page;
 	void *ret;
@@ -205,13 +209,11 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	memset((char *)first_page, 0, PAGE_SIZE << order);
 
 	iommu = dev->archdata.iommu;
-	atu = iommu->atu;
-
 	mask = dev->coherent_dma_mask;
-	if (mask <= DMA_BIT_MASK(32) || !atu)
+	if (!iommu_use_atu(iommu, mask))
 		tbl = &iommu->tbl;
 	else
-		tbl = &atu->tbl;
+		tbl = &iommu->atu->tbl;
 
 	entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
@@ -333,7 +335,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 	atu = iommu->atu;
 	devhandle = pbm->devhandle;
 
-	if (dvma <= DMA_BIT_MASK(32)) {
+	if (!iommu_use_atu(iommu, dvma)) {
 		tbl = &iommu->tbl;
 		iotsb_num = 0; /* we don't care for legacy iommu */
 	} else {
@@ -374,7 +376,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
 	npages >>= IO_PAGE_SHIFT;
 
 	mask = *dev->dma_mask;
-	if (mask <= DMA_BIT_MASK(32))
+	if (!iommu_use_atu(iommu, mask))
 		tbl = &iommu->tbl;
 	else
 		tbl = &atu->tbl;
@@ -510,7 +512,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
 
 	mask = *dev->dma_mask;
-	if (mask <= DMA_BIT_MASK(32))
+	if (!iommu_use_atu(iommu, mask))
 		tbl = &iommu->tbl;
 	else
 		tbl = &atu->tbl;
-- 
GitLab


From 5a03bc73abed6ae196c15e9950afde19d48be12c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 10 Apr 2019 11:04:30 -0700
Subject: [PATCH 0949/1861] net/tls: fix the IV leaks

Commit f66de3ee2c16 ("net/tls: Split conf to rx + tx") made
freeing of IV and record sequence number conditional to SW
path only, but commit e8f69799810c ("net/tls: Add generic NIC
offload infrastructure") also allocates that state for the
device offload configuration.  Remember to free it.

Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 135a7ee9db034..38b3b2a9835aa 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock);
 
 static void tls_device_free_ctx(struct tls_context *ctx)
 {
-	if (ctx->tx_conf == TLS_HW)
+	if (ctx->tx_conf == TLS_HW) {
 		kfree(tls_offload_ctx_tx(ctx));
+		kfree(ctx->tx.rec_seq);
+		kfree(ctx->tx.iv);
+	}
 
 	if (ctx->rx_conf == TLS_HW)
 		kfree(tls_offload_ctx_rx(ctx));
-- 
GitLab


From 35b71a34ada62c9573847a324bf06a133fe11b11 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 10 Apr 2019 11:04:31 -0700
Subject: [PATCH 0950/1861] net/tls: don't leak partially sent record in device
 mode

David reports that tls triggers warnings related to
sk->sk_forward_alloc not being zero at destruction time:

WARNING: CPU: 5 PID: 6831 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110
WARNING: CPU: 5 PID: 6831 at net/ipv4/af_inet.c:160 inet_sock_destruct+0x15b/0x170

When sender fills up the write buffer and dies from
SIGPIPE.  This is due to the device implementation
not cleaning up the partially_sent_record.

This is because commit a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
moved the partial record cleanup to the SW-only path.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Reported-by: David Beckett <david.beckett@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h    |  2 ++
 net/tls/tls_device.c |  7 +++++++
 net/tls/tls_main.c   | 22 ++++++++++++++++++++++
 net/tls/tls_sw.c     | 15 +--------------
 4 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index a5a938583295c..c7f7dc344e738 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -307,6 +307,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_device_sendpage(struct sock *sk, struct page *page,
 			int offset, size_t size, int flags);
 void tls_device_sk_destruct(struct sock *sk);
+void tls_device_free_resources_tx(struct sock *sk);
 void tls_device_init(void);
 void tls_device_cleanup(void);
 int tls_tx_records(struct sock *sk, int flags);
@@ -330,6 +331,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx,
 		int flags);
 int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 			    int flags);
+bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
 
 static inline struct tls_msg *tls_msg(struct sk_buff *skb)
 {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 38b3b2a9835aa..9f3bdbc1e5934 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -219,6 +219,13 @@ void tls_device_sk_destruct(struct sock *sk)
 }
 EXPORT_SYMBOL(tls_device_sk_destruct);
 
+void tls_device_free_resources_tx(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+	tls_free_partial_record(sk, tls_ctx);
+}
+
 static void tls_append_frag(struct tls_record_info *record,
 			    struct page_frag *pfrag,
 			    int size)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index df921a2904b9b..a3cca1ef00985 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -208,6 +208,26 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 	return tls_push_sg(sk, ctx, sg, offset, flags);
 }
 
+bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx)
+{
+	struct scatterlist *sg;
+
+	sg = ctx->partially_sent_record;
+	if (!sg)
+		return false;
+
+	while (1) {
+		put_page(sg_page(sg));
+		sk_mem_uncharge(sk, sg->length);
+
+		if (sg_is_last(sg))
+			break;
+		sg++;
+	}
+	ctx->partially_sent_record = NULL;
+	return true;
+}
+
 static void tls_write_space(struct sock *sk)
 {
 	struct tls_context *ctx = tls_get_ctx(sk);
@@ -267,6 +287,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 		kfree(ctx->tx.rec_seq);
 		kfree(ctx->tx.iv);
 		tls_sw_free_resources_tx(sk);
+	} else if (ctx->tx_conf == TLS_HW) {
+		tls_device_free_resources_tx(sk);
 	}
 
 	if (ctx->rx_conf == TLS_SW) {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 20b1912279694..b50ced862f6f9 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2052,20 +2052,7 @@ void tls_sw_free_resources_tx(struct sock *sk)
 	/* Free up un-sent records in tx_list. First, free
 	 * the partially sent record if any at head of tx_list.
 	 */
-	if (tls_ctx->partially_sent_record) {
-		struct scatterlist *sg = tls_ctx->partially_sent_record;
-
-		while (1) {
-			put_page(sg_page(sg));
-			sk_mem_uncharge(sk, sg->length);
-
-			if (sg_is_last(sg))
-				break;
-			sg++;
-		}
-
-		tls_ctx->partially_sent_record = NULL;
-
+	if (tls_free_partial_record(sk, tls_ctx)) {
 		rec = list_first_entry(&ctx->tx_list,
 				       struct tls_rec, list);
 		list_del(&rec->list);
-- 
GitLab


From 4a9c2e3746e6151fd5d077259d79ce9ca86d47d7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 10 Apr 2019 11:04:32 -0700
Subject: [PATCH 0951/1861] net: strparser: partially revert "strparser: Call
 skb_unclone conditionally"

This reverts the first part of commit 4e485d06bb8c ("strparser: Call
skb_unclone conditionally").  To build a message with multiple
fragments we need our own root of frag_list.  We can't simply
use the frag_list of orig_skb, because it will lead to linking
all orig_skbs together creating very long frag chains, and causing
stack overflow on kfree_skb() (which is called recursively on
the frag_lists).

BUG: stack guard page was hit at 00000000d40fad41 (stack is 0000000029dde9f4..000000008cce03d5)
kernel stack overflow (double-fault): 0000 [#1] PREEMPT SMP
RIP: 0010:free_one_page+0x2b/0x490

Call Trace:
  __free_pages_ok+0x143/0x2c0
  skb_release_data+0x8e/0x140
  ? skb_release_data+0xad/0x140
  kfree_skb+0x32/0xb0

  [...]

  skb_release_data+0xad/0x140
  ? skb_release_data+0xad/0x140
  kfree_skb+0x32/0xb0
  skb_release_data+0xad/0x140
  ? skb_release_data+0xad/0x140
  kfree_skb+0x32/0xb0
  skb_release_data+0xad/0x140
  ? skb_release_data+0xad/0x140
  kfree_skb+0x32/0xb0
  skb_release_data+0xad/0x140
  ? skb_release_data+0xad/0x140
  kfree_skb+0x32/0xb0
  skb_release_data+0xad/0x140
  __kfree_skb+0xe/0x20
  tcp_disconnect+0xd6/0x4d0
  tcp_close+0xf4/0x430
  ? tcp_check_oom+0xf0/0xf0
  tls_sk_proto_close+0xe4/0x1e0 [tls]
  inet_release+0x36/0x60
  __sock_release+0x37/0xa0
  sock_close+0x11/0x20
  __fput+0xa2/0x1d0
  task_work_run+0x89/0xb0
  exit_to_usermode_loop+0x9a/0xa0
  do_syscall_64+0xc0/0xf0
  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Let's leave the second unclone conditional, as I'm not entirely
sure what is its purpose :)

Fixes: 4e485d06bb8c ("strparser: Call skb_unclone conditionally")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/strparser/strparser.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 860dcfb95ee47..fa6c977b4c41a 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			/* We are going to append to the frags_list of head.
 			 * Need to unshare the frag_list.
 			 */
-			if (skb_has_frag_list(head)) {
-				err = skb_unclone(head, GFP_ATOMIC);
-				if (err) {
-					STRP_STATS_INCR(strp->stats.mem_fail);
-					desc->error = err;
-					return 0;
-				}
+			err = skb_unclone(head, GFP_ATOMIC);
+			if (err) {
+				STRP_STATS_INCR(strp->stats.mem_fail);
+				desc->error = err;
+				return 0;
 			}
 
 			if (unlikely(skb_shinfo(head)->frag_list)) {
-- 
GitLab


From 7e94a7b659eefedda82cde97229a26f319fb1182 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Thu, 4 Apr 2019 13:11:28 +0200
Subject: [PATCH 0952/1861] x86/microcode/intel: Refactor Intel microcode blob
 loading

Change generic_load_microcode() to use the iov_iter API instead of a
clumsy open-coded version which has to pay attention to __user data
or kernel data, depending on the loading method. This allows to avoid
explicit casting between user and kernel pointers.

Because the iov_iter API makes it hard to read the same location twice,
as a side effect, also fix a double-read of the microcode header (which
could e.g. lead to out-of-bounds reads in microcode_sanity_check()).

Not that it matters much, only root is allowed to load microcode
anyway...

 [ bp: Massage a bit, sort function-local variables. ]

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190404111128.131157-1-jannh@google.com
---
 arch/x86/kernel/cpu/microcode/intel.c | 71 ++++++++++++++-------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 16936a24795c8..a44bdbe7c55eb 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/uio.h>
 #include <linux/mm.h>
 
 #include <asm/microcode_intel.h>
@@ -861,32 +862,33 @@ out:
 	return ret;
 }
 
-static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
-				int (*get_ucode_data)(void *, const void *, size_t))
+static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
-	int new_rev = uci->cpu_sig.rev;
-	unsigned int leftover = size;
 	unsigned int curr_mc_size = 0, new_mc_size = 0;
-	unsigned int csig, cpf;
 	enum ucode_state ret = UCODE_OK;
+	int new_rev = uci->cpu_sig.rev;
+	u8 *new_mc = NULL, *mc = NULL;
+	unsigned int csig, cpf;
 
-	while (leftover) {
+	while (iov_iter_count(iter)) {
 		struct microcode_header_intel mc_header;
-		unsigned int mc_size;
+		unsigned int mc_size, data_size;
+		u8 *data;
 
-		if (leftover < sizeof(mc_header)) {
-			pr_err("error! Truncated header in microcode data file\n");
+		if (!copy_from_iter_full(&mc_header, sizeof(mc_header), iter)) {
+			pr_err("error! Truncated or inaccessible header in microcode data file\n");
 			break;
 		}
 
-		if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
-			break;
-
 		mc_size = get_totalsize(&mc_header);
-		if (!mc_size || mc_size > leftover) {
-			pr_err("error! Bad data in microcode data file\n");
+		if (mc_size < sizeof(mc_header)) {
+			pr_err("error! Bad data in microcode data file (totalsize too small)\n");
+			break;
+		}
+		data_size = mc_size - sizeof(mc_header);
+		if (data_size > iov_iter_count(iter)) {
+			pr_err("error! Bad data in microcode data file (truncated file?)\n");
 			break;
 		}
 
@@ -899,7 +901,9 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 			curr_mc_size = mc_size;
 		}
 
-		if (get_ucode_data(mc, ucode_ptr, mc_size) ||
+		memcpy(mc, &mc_header, sizeof(mc_header));
+		data = mc + sizeof(mc_header);
+		if (!copy_from_iter_full(data, data_size, iter) ||
 		    microcode_sanity_check(mc, 1) < 0) {
 			break;
 		}
@@ -914,14 +918,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 			mc = NULL;	/* trigger new vmalloc */
 			ret = UCODE_NEW;
 		}
-
-		ucode_ptr += mc_size;
-		leftover  -= mc_size;
 	}
 
 	vfree(mc);
 
-	if (leftover) {
+	if (iov_iter_count(iter)) {
 		vfree(new_mc);
 		return UCODE_ERROR;
 	}
@@ -945,12 +946,6 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 	return ret;
 }
 
-static int get_ucode_fw(void *to, const void *from, size_t n)
-{
-	memcpy(to, from, n);
-	return 0;
-}
-
 static bool is_blacklisted(unsigned int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -977,10 +972,12 @@ static bool is_blacklisted(unsigned int cpu)
 static enum ucode_state request_microcode_fw(int cpu, struct device *device,
 					     bool refresh_fw)
 {
-	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	const struct firmware *firmware;
+	struct iov_iter iter;
 	enum ucode_state ret;
+	struct kvec kvec;
+	char name[30];
 
 	if (is_blacklisted(cpu))
 		return UCODE_NFOUND;
@@ -993,26 +990,30 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
 		return UCODE_NFOUND;
 	}
 
-	ret = generic_load_microcode(cpu, (void *)firmware->data,
-				     firmware->size, &get_ucode_fw);
+	kvec.iov_base = (void *)firmware->data;
+	kvec.iov_len = firmware->size;
+	iov_iter_kvec(&iter, WRITE, &kvec, 1, firmware->size);
+	ret = generic_load_microcode(cpu, &iter);
 
 	release_firmware(firmware);
 
 	return ret;
 }
 
-static int get_ucode_user(void *to, const void *from, size_t n)
-{
-	return copy_from_user(to, from, n);
-}
-
 static enum ucode_state
 request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
+	struct iov_iter iter;
+	struct iovec iov;
+
 	if (is_blacklisted(cpu))
 		return UCODE_NFOUND;
 
-	return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
+	iov.iov_base = (void __user *)buf;
+	iov.iov_len = size;
+	iov_iter_init(&iter, WRITE, &iov, 1, size);
+
+	return generic_load_microcode(cpu, &iter);
 }
 
 static struct microcode_ops microcode_intel_ops = {
-- 
GitLab


From 24613a04ad1c0588c10f4b5403ca60a73d164051 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 4 Apr 2019 22:14:07 +0200
Subject: [PATCH 0953/1861] x86/microcode: Fix the ancient deprecated microcode
 loading method

Commit

  2613f36ed965 ("x86/microcode: Attempt late loading only when new microcode is present")

added the new define UCODE_NEW to denote that an update should happen
only when newer microcode (than installed on the system) has been found.

But it missed adjusting that for the old /dev/cpu/microcode loading
interface. Fix it.

Fixes: 2613f36ed965 ("x86/microcode: Attempt late loading only when new microcode is present")
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jann Horn <jannh@google.com>
Link: https://lkml.kernel.org/r/20190405133010.24249-3-bp@alien8.de
---
 arch/x86/kernel/cpu/microcode/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 5260185cbf7ba..8a4a7823451ac 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -418,8 +418,9 @@ static int do_microcode_update(const void __user *buf, size_t size)
 		if (ustate == UCODE_ERROR) {
 			error = -1;
 			break;
-		} else if (ustate == UCODE_OK)
+		} else if (ustate == UCODE_NEW) {
 			apply_microcode_on_target(cpu);
+		}
 	}
 
 	return error;
-- 
GitLab


From c02f48e070bde326f55bd94544ca82291f7396e3 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 5 Apr 2019 06:28:11 +0200
Subject: [PATCH 0954/1861] x86/microcode: Deprecate MICROCODE_OLD_INTERFACE

This is the ancient loading interface which requires special tools to be
used. The bigger problem with it is that it is as inadequate for proper
loading of microcode as the late microcode loading interface is because
it happens just as late.

iucode_tool's manpage already warns people that it is deprecated.

Deprecate it and turn it off by default along with a big fat warning in
the Kconfig help. It will go away sometime in the future.

Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190405133010.24249-4-bp@alien8.de
---
 arch/x86/Kconfig | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19c..5a0a752f3ddd7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1330,8 +1330,16 @@ config MICROCODE_AMD
 	  processors will be enabled.
 
 config MICROCODE_OLD_INTERFACE
-	def_bool y
+	bool "Ancient loading interface (DEPRECATED)"
+	default n
 	depends on MICROCODE
+	---help---
+	  DO NOT USE THIS! This is the ancient /dev/cpu/microcode interface
+	  which was used by userspace tools like iucode_tool and microcode.ctl.
+	  It is inadequate because it runs too late to be able to properly
+	  load microcode on a machine and it needs special tools. Instead, you
+	  should've switched to the early loading method with the initrd or
+	  builtin microcode by now: Documentation/x86/microcode.txt
 
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
-- 
GitLab


From 3943af9d01e94330d0cfac6fccdbc829aad50c92 Mon Sep 17 00:00:00 2001
From: Sergey Miroshnichenko <s.miroshnichenko@yadro.com>
Date: Tue, 12 Mar 2019 15:05:48 +0300
Subject: [PATCH 0955/1861] PCI: pciehp: Ignore Link State Changes after
 powering off a slot

During a safe hot remove, the OS powers off the slot, which may cause a
Data Link Layer State Changed event.  The slot has already been set to
OFF_STATE, so that event results in re-enabling the device, making it
impossible to safely remove it.

Clear out the Presence Detect Changed and Data Link Layer State Changed
events when the disabled slot has settled down.

It is still possible to re-enable the device if it remains in the slot
after pressing the Attention Button by pressing it again.

Fixes the problem that Micah reported below: an NVMe drive power button may
not actually turn off the drive.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=203237
Reported-by: Micah Parrish <micah.parrish@hpe.com>
Tested-by: Micah Parrish <micah.parrish@hpe.com>
Signed-off-by: Sergey Miroshnichenko <s.miroshnichenko@yadro.com>
[bhelgaas: changelog, add bugzilla URL]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Cc: stable@vger.kernel.org	# v4.19+
---
 drivers/pci/hotplug/pciehp_ctrl.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 3f3df4c29f6e1..905282a8ddaae 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -115,6 +115,10 @@ static void remove_board(struct controller *ctrl, bool safe_removal)
 		 * removed from the slot/adapter.
 		 */
 		msleep(1000);
+
+		/* Ignore link or presence changes caused by power off */
+		atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
+			   &ctrl->pending_events);
 	}
 
 	/* turn off Green LED */
-- 
GitLab


From a3761c3c91209b58b6f33bf69dd8bb8ec0c9d925 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= <jglisse@redhat.com>
Date: Wed, 10 Apr 2019 16:27:51 -0400
Subject: [PATCH 0956/1861] block: do not leak memory in bio_copy_user_iov()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When bio_add_pc_page() fails in bio_copy_user_iov() we should free
the page we just allocated otherwise we are leaking it.

Cc: linux-block@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@vger.kernel.org
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/bio.c b/block/bio.c
index b64cedc7f87cf..716510ecd7ffa 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1298,8 +1298,11 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 			}
 		}
 
-		if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
+		if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
+			if (!map_data)
+				__free_page(page);
 			break;
+		}
 
 		len -= bytes;
 		offset = 0;
-- 
GitLab


From 7c2e07130090ae001a97a6b65597830d6815e93e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20M=C3=BCller?= <dave.mueller@gmx.ch>
Date: Mon, 8 Apr 2019 15:33:54 +0200
Subject: [PATCH 0957/1861] clk: x86: Add system specific quirk to mark clocks
 as critical
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 648e921888ad ("clk: x86: Stop marking clocks as
CLK_IS_CRITICAL"), the pmc_plt_clocks of the Bay Trail SoC are
unconditionally gated off. Unfortunately this will break systems where these
clocks are used for external purposes beyond the kernel's knowledge. Fix it
by implementing a system specific quirk to mark the necessary pmc_plt_clks as
critical.

Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL")
Signed-off-by: David Müller <dave.mueller@gmx.ch>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/x86/clk-pmc-atom.c                | 14 ++++++++++---
 drivers/platform/x86/pmc_atom.c               | 21 +++++++++++++++++++
 .../linux/platform_data/x86/clk-pmc-atom.h    |  3 +++
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
index d977193842dfe..19174835693b9 100644
--- a/drivers/clk/x86/clk-pmc-atom.c
+++ b/drivers/clk/x86/clk-pmc-atom.c
@@ -165,7 +165,7 @@ static const struct clk_ops plt_clk_ops = {
 };
 
 static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
-					void __iomem *base,
+					const struct pmc_clk_data *pmc_data,
 					const char **parent_names,
 					int num_parents)
 {
@@ -184,9 +184,17 @@ static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
 	init.num_parents = num_parents;
 
 	pclk->hw.init = &init;
-	pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
+	pclk->reg = pmc_data->base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
 	spin_lock_init(&pclk->lock);
 
+	/*
+	 * On some systems, the pmc_plt_clocks already enabled by the
+	 * firmware are being marked as critical to avoid them being
+	 * gated by the clock framework.
+	 */
+	if (pmc_data->critical && plt_clk_is_enabled(&pclk->hw))
+		init.flags |= CLK_IS_CRITICAL;
+
 	ret = devm_clk_hw_register(&pdev->dev, &pclk->hw);
 	if (ret) {
 		pclk = ERR_PTR(ret);
@@ -332,7 +340,7 @@ static int plt_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(parent_names);
 
 	for (i = 0; i < PMC_CLK_NUM; i++) {
-		data->clks[i] = plt_clk_register(pdev, i, pmc_data->base,
+		data->clks[i] = plt_clk_register(pdev, i, pmc_data,
 						 parent_names, data->nparents);
 		if (IS_ERR(data->clks[i])) {
 			err = PTR_ERR(data->clks[i]);
diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c
index 8f018b3f3cd4c..eaec2d306481c 100644
--- a/drivers/platform/x86/pmc_atom.c
+++ b/drivers/platform/x86/pmc_atom.c
@@ -17,6 +17,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/device.h>
+#include <linux/dmi.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/platform_data/x86/clk-pmc-atom.h>
@@ -391,11 +392,27 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc)
 }
 #endif /* CONFIG_DEBUG_FS */
 
+/*
+ * Some systems need one or more of their pmc_plt_clks to be
+ * marked as critical.
+ */
+static const struct dmi_system_id critclk_systems[] __initconst = {
+	{
+		.ident = "MPL CEC1x",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "CEC10 Family"),
+		},
+	},
+	{ /*sentinel*/ }
+};
+
 static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap,
 			  const struct pmc_data *pmc_data)
 {
 	struct platform_device *clkdev;
 	struct pmc_clk_data *clk_data;
+	const struct dmi_system_id *d = dmi_first_match(critclk_systems);
 
 	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
 	if (!clk_data)
@@ -403,6 +420,10 @@ static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap,
 
 	clk_data->base = pmc_regmap; /* offset is added by client */
 	clk_data->clks = pmc_data->clks;
+	if (d) {
+		clk_data->critical = true;
+		pr_info("%s critclks quirk enabled\n", d->ident);
+	}
 
 	clkdev = platform_device_register_data(&pdev->dev, "clk-pmc-atom",
 					       PLATFORM_DEVID_NONE,
diff --git a/include/linux/platform_data/x86/clk-pmc-atom.h b/include/linux/platform_data/x86/clk-pmc-atom.h
index 3ab892208343c..7a37ac27d0fb2 100644
--- a/include/linux/platform_data/x86/clk-pmc-atom.h
+++ b/include/linux/platform_data/x86/clk-pmc-atom.h
@@ -35,10 +35,13 @@ struct pmc_clk {
  *
  * @base:	PMC clock register base offset
  * @clks:	pointer to set of registered clocks, typically 0..5
+ * @critical:	flag to indicate if firmware enabled pmc_plt_clks
+ *		should be marked as critial or not
  */
 struct pmc_clk_data {
 	void __iomem *base;
 	const struct pmc_clk *clks;
+	bool critical;
 };
 
 #endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */
-- 
GitLab


From 903f1a187776bb8d79b13618ec05b25f86318885 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 10 Apr 2019 16:23:39 -0700
Subject: [PATCH 0958/1861] net/tls: fix build without CONFIG_TLS_DEVICE

buildbot noticed that TLS_HW is not defined if CONFIG_TLS_DEVICE=n.
Wrap the cleanup branch into an ifdef, tls_device_free_resources_tx()
wouldn't be compiled either in this case.

Fixes: 35b71a34ada6 ("net/tls: don't leak partially sent record in device mode")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index a3cca1ef00985..9547cea0ce3b0 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -287,8 +287,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 		kfree(ctx->tx.rec_seq);
 		kfree(ctx->tx.iv);
 		tls_sw_free_resources_tx(sk);
+#ifdef CONFIG_TLS_DEVICE
 	} else if (ctx->tx_conf == TLS_HW) {
 		tls_device_free_resources_tx(sk);
+#endif
 	}
 
 	if (ctx->rx_conf == TLS_SW) {
-- 
GitLab


From 43c2adb9df7ddd6560fd3546d925b42cef92daa0 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 8 Apr 2019 16:45:17 +0800
Subject: [PATCH 0959/1861] team: set slave to promisc if team is already in
 promisc mode

After adding a team interface to bridge, the team interface will enter
promisc mode. Then if we add a new slave to team0, the slave will keep
promisc off. Fix it by setting slave to promisc on if team master is
already in promisc mode, also do the same for allmulti.

v2: add promisc and allmulti checking when delete ports

Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6ed96fdfd96dd..9ce61b019aadb 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1246,6 +1246,23 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
 		goto err_option_port_add;
 	}
 
+	/* set promiscuity level to new slave */
+	if (dev->flags & IFF_PROMISC) {
+		err = dev_set_promiscuity(port_dev, 1);
+		if (err)
+			goto err_set_slave_promisc;
+	}
+
+	/* set allmulti level to new slave */
+	if (dev->flags & IFF_ALLMULTI) {
+		err = dev_set_allmulti(port_dev, 1);
+		if (err) {
+			if (dev->flags & IFF_PROMISC)
+				dev_set_promiscuity(port_dev, -1);
+			goto err_set_slave_promisc;
+		}
+	}
+
 	netif_addr_lock_bh(dev);
 	dev_uc_sync_multiple(port_dev, dev);
 	dev_mc_sync_multiple(port_dev, dev);
@@ -1262,6 +1279,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
 
 	return 0;
 
+err_set_slave_promisc:
+	__team_option_inst_del_port(team, port);
+
 err_option_port_add:
 	team_upper_dev_unlink(team, port);
 
@@ -1307,6 +1327,12 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
 
 	team_port_disable(team, port);
 	list_del_rcu(&port->list);
+
+	if (dev->flags & IFF_PROMISC)
+		dev_set_promiscuity(port_dev, -1);
+	if (dev->flags & IFF_ALLMULTI)
+		dev_set_allmulti(port_dev, -1);
+
 	team_upper_dev_unlink(team, port);
 	netdev_rx_handler_unregister(port_dev);
 	team_port_disable_netpoll(port);
-- 
GitLab


From cd7879f79f83aec4bb13f0f823f323911dc5397b Mon Sep 17 00:00:00 2001
From: Xiong Zhang <xiong.y.zhang@intel.com>
Date: Wed, 10 Apr 2019 12:16:33 +0800
Subject: [PATCH 0960/1861] drm/i915/gvt: Roundup fb->height into tile's height
 at calucation fb->size

When fb is tiled and fb->height isn't the multiple of tile's height,
the format fb->size = fb->stride * fb->height, will get a smaller size
than the actual size. As the memory height of tiled fb should be multiple
of tile's height.

Fixes: 7f1a93b1f1d1 ("drm/i915/gvt: Correct the calculation of plane size")
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/dmabuf.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 5d887f7cc0d5c..69a9a1b2ea4ac 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -209,7 +209,7 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_vgpu_primary_plane_format p;
 	struct intel_vgpu_cursor_plane_format c;
-	int ret;
+	int ret, tile_height = 1;
 
 	if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
 		ret = intel_vgpu_decode_primary_plane(vgpu, &p);
@@ -228,12 +228,15 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 			break;
 		case PLANE_CTL_TILED_X:
 			info->drm_format_mod = I915_FORMAT_MOD_X_TILED;
+			tile_height = 8;
 			break;
 		case PLANE_CTL_TILED_Y:
 			info->drm_format_mod = I915_FORMAT_MOD_Y_TILED;
+			tile_height = 32;
 			break;
 		case PLANE_CTL_TILED_YF:
 			info->drm_format_mod = I915_FORMAT_MOD_Yf_TILED;
+			tile_height = 32;
 			break;
 		default:
 			gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled);
@@ -264,8 +267,8 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	info->size = (info->stride * info->height + PAGE_SIZE - 1)
-		      >> PAGE_SHIFT;
+	info->size = (info->stride * roundup(info->height, tile_height)
+		      + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (info->size == 0) {
 		gvt_vgpu_err("fb size is zero\n");
 		return -EINVAL;
-- 
GitLab


From 8065a779f17e94536a1c4dcee4f9d88011672f97 Mon Sep 17 00:00:00 2001
From: Si-Wei Liu <si-wei.liu@oracle.com>
Date: Mon, 8 Apr 2019 19:45:27 -0400
Subject: [PATCH 0961/1861] failover: allow name change on IFF_UP slave
 interfaces

When a netdev appears through hot plug then gets enslaved by a failover
master that is already up and running, the slave will be opened
right away after getting enslaved. Today there's a race that userspace
(udev) may fail to rename the slave if the kernel (net_failover)
opens the slave earlier than when the userspace rename happens.
Unlike bond or team, the primary slave of failover can't be renamed by
userspace ahead of time, since the kernel initiated auto-enslavement is
unable to, or rather, is never meant to be synchronized with the rename
request from userspace.

As the failover slave interfaces are not designed to be operated
directly by userspace apps: IP configuration, filter rules with
regard to network traffic passing and etc., should all be done on master
interface. In general, userspace apps only care about the
name of master interface, while slave names are less important as long
as admin users can see reliable names that may carry
other information describing the netdev. For e.g., they can infer that
"ens3nsby" is a standby slave of "ens3", while for a
name like "eth0" they can't tell which master it belongs to.

Historically the name of IFF_UP interface can't be changed because
there might be admin script or management software that is already
relying on such behavior and assumes that the slave name can't be
changed once UP. But failover is special: with the in-kernel
auto-enslavement mechanism, the userspace expectation for device
enumeration and bring-up order is already broken. Previously initramfs
and various userspace config tools were modified to bypass failover
slaves because of auto-enslavement and duplicate MAC address. Similarly,
in case that users care about seeing reliable slave name, the new type
of failover slaves needs to be taken care of specifically in userspace
anyway.

It's less risky to lift up the rename restriction on failover slave
which is already UP. Although it's possible this change may potentially
break userspace component (most likely configuration scripts or
management software) that assumes slave name can't be changed while
UP, it's relatively a limited and controllable set among all userspace
components, which can be fixed specifically to listen for the rename
events on failover slaves. Userspace component interacting with slaves
is expected to be changed to operate on failover master interface
instead, as the failover slave is dynamic in nature which may come and
go at any point.  The goal is to make the role of failover slaves less
relevant, and userspace components should only deal with failover master
in the long run.

Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module")
Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Acked-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 net/core/dev.c            | 16 +++++++++++++++-
 net/core/failover.c       |  6 +++---
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 26f69cf763f43..324e872c91d15 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1500,6 +1500,7 @@ struct net_device_ops {
  * @IFF_FAILOVER: device is a failover master device
  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
+ * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1532,6 +1533,7 @@ enum netdev_priv_flags {
 	IFF_FAILOVER			= 1<<27,
 	IFF_FAILOVER_SLAVE		= 1<<28,
 	IFF_L3MDEV_RX_HANDLER		= 1<<29,
+	IFF_LIVE_RENAME_OK		= 1<<30,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1563,6 +1565,7 @@ enum netdev_priv_flags {
 #define IFF_FAILOVER			IFF_FAILOVER
 #define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE
 #define IFF_L3MDEV_RX_HANDLER		IFF_L3MDEV_RX_HANDLER
+#define IFF_LIVE_RENAME_OK		IFF_LIVE_RENAME_OK
 
 /**
  *	struct net_device - The DEVICE structure.
diff --git a/net/core/dev.c b/net/core/dev.c
index fdcff29df9158..f409406254ddf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *dev, const char *newname)
 	BUG_ON(!dev_net(dev));
 
 	net = dev_net(dev);
-	if (dev->flags & IFF_UP)
+
+	/* Some auto-enslaved devices e.g. failover slaves are
+	 * special, as userspace might rename the device after
+	 * the interface had been brought up and running since
+	 * the point kernel initiated auto-enslavement. Allow
+	 * live name change even when these slave devices are
+	 * up and running.
+	 *
+	 * Typically, users of these auto-enslaving devices
+	 * don't actually care about slave name change, as
+	 * they are supposed to operate on master interface
+	 * directly.
+	 */
+	if (dev->flags & IFF_UP &&
+	    likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
 		return -EBUSY;
 
 	write_seqcount_begin(&devnet_rename_seq);
diff --git a/net/core/failover.c b/net/core/failover.c
index 4a92a98ccce9a..b5cd3c727285d 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -80,14 +80,14 @@ static int failover_slave_register(struct net_device *slave_dev)
 		goto err_upper_link;
 	}
 
-	slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 
 	if (fops && fops->slave_register &&
 	    !fops->slave_register(slave_dev, failover_dev))
 		return NOTIFY_OK;
 
 	netdev_upper_dev_unlink(slave_dev, failover_dev);
-	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 err_upper_link:
 	netdev_rx_handler_unregister(slave_dev);
 done:
@@ -121,7 +121,7 @@ int failover_slave_unregister(struct net_device *slave_dev)
 
 	netdev_rx_handler_unregister(slave_dev);
 	netdev_upper_dev_unlink(slave_dev, failover_dev);
-	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+	slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
 
 	if (fops && fops->slave_unregister &&
 	    !fops->slave_unregister(slave_dev, failover_dev))
-- 
GitLab


From b4f47f3848eb70986f75d06112af7b48b7f5f462 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 8 Apr 2019 17:59:50 -0700
Subject: [PATCH 0962/1861] net/tls: prevent bad memory access in
 tls_is_sk_tx_device_offloaded()

Unlike '&&' operator, the '&' does not have short-circuit
evaluation semantics.  IOW both sides of the operator always
get evaluated.  Fix the wrong operator in
tls_is_sk_tx_device_offloaded(), which would lead to
out-of-bounds access for for non-full sockets.

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index c7f7dc344e738..5934246b2c6f4 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -381,7 +381,7 @@ tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
 static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 {
 #ifdef CONFIG_SOCK_VALIDATE_XMIT
-	return sk_fullsock(sk) &
+	return sk_fullsock(sk) &&
 	       (smp_load_acquire(&sk->sk_validate_xmit_skb) ==
 	       &tls_validate_xmit_skb);
 #else
-- 
GitLab


From 813dbeb656d6c90266f251d8bd2b02d445afa63f Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 9 Apr 2019 12:10:25 +0800
Subject: [PATCH 0963/1861] vhost: reject zero size iova range

We used to accept zero size iova range which will lead a infinite loop
in translate_desc(). Fixing this by failing the request in this case.

Reported-by: syzbot+d21e6e297322a900c128@syzkaller.appspotmail.com
Fixes: 6b1e6cc7 ("vhost: new device IOTLB API")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5ace833de7462..351af88231ada 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -911,8 +911,12 @@ static int vhost_new_umem_range(struct vhost_umem *umem,
 				u64 start, u64 size, u64 end,
 				u64 userspace_addr, int perm)
 {
-	struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC);
+	struct vhost_umem_node *tmp, *node;
 
+	if (!size)
+		return -EFAULT;
+
+	node = kmalloc(sizeof(*node), GFP_ATOMIC);
 	if (!node)
 		return -ENOMEM;
 
-- 
GitLab


From d1841533e54876f152a30ac398a34f47ad6590b1 Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Tue, 9 Apr 2019 14:59:24 +0700
Subject: [PATCH 0964/1861] tipc: missing entries in name table of publications

When binding multiple services with specific type 1Ki, 2Ki..,
this leads to some entries in the name table of publications
missing when listed out via 'tipc name show'.

The problem is at identify zero last_type conditional provided
via netlink. The first is initial 'type' when starting name table
dummping. The second is continuously with zero type (node state
service type). Then, lookup function failure to finding node state
service type in next iteration.

To solve this, adding more conditional to marked as dirty type and
lookup correct service type for the next iteration instead of select
the first service as initial 'type' zero.

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bff241f035250..89993afe0fbd3 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
 	for (; i < TIPC_NAMETBL_SIZE; i++) {
 		head = &tn->nametbl->services[i];
 
-		if (*last_type) {
+		if (*last_type ||
+		    (!i && *last_key && (*last_lower == *last_key))) {
 			service = tipc_service_find(net, *last_type);
 			if (!service)
 				return -EPIPE;
-- 
GitLab


From 988dc4a9a3b66be75b30405a5494faf0dc7cffb6 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Tue, 9 Apr 2019 11:47:20 +0200
Subject: [PATCH 0965/1861] net: fou: do not use guehdr after
 iptunnel_pull_offloads in gue_udp_recv

gue tunnels run iptunnel_pull_offloads on received skbs. This can
determine a possible use-after-free accessing guehdr pointer since
the packet will be 'uncloned' running pskb_expand_head if it is a
cloned gso skb (e.g if the packet has been sent though a veth device)

Fixes: a09a4c8dd1ec ("tunnels: Remove encapsulation offloads on decap")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fou.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 79e98e21cdd7f..12ce6c526d72b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 	struct guehdr *guehdr;
 	void *data;
 	u16 doffset = 0;
+	u8 proto_ctype;
 
 	if (!fou)
 		return 1;
@@ -212,13 +213,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(guehdr->control))
 		return gue_control_message(skb, guehdr);
 
+	proto_ctype = guehdr->proto_ctype;
 	__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
 	skb_reset_transport_header(skb);
 
 	if (iptunnel_pull_offloads(skb))
 		goto drop;
 
-	return -guehdr->proto_ctype;
+	return -proto_ctype;
 
 drop:
 	kfree_skb(skb);
-- 
GitLab


From e154592a1d25fa1f50ac1bd8d132d0e1103442ba Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Thu, 11 Apr 2019 00:47:46 +0200
Subject: [PATCH 0966/1861] gpu: host1x: Fix compile error when IOMMU API is
 not available
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case the IOMMU API is not available compiling host1x fails with
the following error:

  In file included from drivers/gpu/host1x/hw/host1x06.c:27:
  drivers/gpu/host1x/hw/channel_hw.c: In function ‘host1x_channel_set_streamid’:
  drivers/gpu/host1x/hw/channel_hw.c:118:30: error: implicit declaration of function
    ‘dev_iommu_fwspec_get’; did you mean ‘iommu_fwspec_free’?  [-Werror=implicit-function-declaration]
  struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
                              ^~~~~~~~~~~~~~~~~~~~
                              iommu_fwspec_free

Fixes: de5469c21ff9 ("gpu: host1x: Program the channel stream ID")
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/hw/channel_hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 27101c04a8272..4030d64916f00 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -114,7 +114,7 @@ static inline void synchronize_syncpt_base(struct host1x_job *job)
 
 static void host1x_channel_set_streamid(struct host1x_channel *channel)
 {
-#if HOST1X_HW >= 6
+#if IS_ENABLED(CONFIG_IOMMU_API) &&  HOST1X_HW >= 6
 	struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
 	u32 sid = spec ? spec->ids[0] & 0xffff : 0x7f;
 
-- 
GitLab


From 5c41ea6d52003b5bc77c2a82fd5ca7d480237d89 Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Thu, 11 Apr 2019 14:29:37 +0530
Subject: [PATCH 0967/1861] mmc: sdhci-omap: Don't finish_mrq() on a command
 error during tuning

commit 5b0d62108b46 ("mmc: sdhci-omap: Add platform specific reset
callback") skips data resets during tuning operation. Because of this,
a data error or data finish interrupt might still arrive after a command
error has been handled and the mrq ended. This ends up with a "mmc0: Got
data interrupt 0x00000002 even though no data operation was in progress"
error message.

Fix this by adding a platform specific callback for sdhci_irq. Mark the
mrq as a failure but wait for a data interrupt instead of calling
finish_mrq().

Fixes: 5b0d62108b46 ("mmc: sdhci-omap: Add platform specific reset
callback")
Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-omap.c | 38 +++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 5bbed477c9b1e..9f20fff9781b0 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -797,6 +797,43 @@ void sdhci_omap_reset(struct sdhci_host *host, u8 mask)
 	sdhci_reset(host, mask);
 }
 
+#define CMD_ERR_MASK (SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX |\
+		      SDHCI_INT_TIMEOUT)
+#define CMD_MASK (CMD_ERR_MASK | SDHCI_INT_RESPONSE)
+
+static u32 sdhci_omap_irq(struct sdhci_host *host, u32 intmask)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
+
+	if (omap_host->is_tuning && host->cmd && !host->data_early &&
+	    (intmask & CMD_ERR_MASK)) {
+
+		/*
+		 * Since we are not resetting data lines during tuning
+		 * operation, data error or data complete interrupts
+		 * might still arrive. Mark this request as a failure
+		 * but still wait for the data interrupt
+		 */
+		if (intmask & SDHCI_INT_TIMEOUT)
+			host->cmd->error = -ETIMEDOUT;
+		else
+			host->cmd->error = -EILSEQ;
+
+		host->cmd = NULL;
+
+		/*
+		 * Sometimes command error interrupts and command complete
+		 * interrupt will arrive together. Clear all command related
+		 * interrupts here.
+		 */
+		sdhci_writel(host, intmask & CMD_MASK, SDHCI_INT_STATUS);
+		intmask &= ~CMD_MASK;
+	}
+
+	return intmask;
+}
+
 static struct sdhci_ops sdhci_omap_ops = {
 	.set_clock = sdhci_omap_set_clock,
 	.set_power = sdhci_omap_set_power,
@@ -807,6 +844,7 @@ static struct sdhci_ops sdhci_omap_ops = {
 	.platform_send_init_74_clocks = sdhci_omap_init_74_clocks,
 	.reset = sdhci_omap_reset,
 	.set_uhs_signaling = sdhci_omap_set_uhs_signaling,
+	.irq = sdhci_omap_irq,
 };
 
 static int sdhci_omap_set_capabilities(struct sdhci_omap_host *omap_host)
-- 
GitLab


From 0cde62a46e88499cf3f62e3682248a3489488f08 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 10 Apr 2019 14:01:06 +0100
Subject: [PATCH 0968/1861] docs/memory-barriers.txt: Fix style, spacing and
 grammar in I/O section

Commit 4614bbdee357 ("docs/memory-barriers.txt: Rewrite "KERNEL I/O
BARRIER EFFECTS" section") rewrote the I/O ordering section of
memory-barriers.txt.

Subsequently, Ingo noticed a number of issues with the style, spacing
and grammar of the rewritten section. Fix them based on his suggestions.

Link: https://lkml.kernel.org/r/20190410105833.GA116161@gmail.com
Fixes: 4614bbdee357 ("docs/memory-barriers.txt: Rewrite "KERNEL I/O BARRIER EFFECTS" section")
Reported-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/memory-barriers.txt | 124 ++++++++++++++++--------------
 1 file changed, 66 insertions(+), 58 deletions(-)

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 3522f0cc772f9..1660dde75e14f 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2517,80 +2517,88 @@ guarantees:
 
  (*) readX(), writeX():
 
-     The readX() and writeX() MMIO accessors take a pointer to the peripheral
-     being accessed as an __iomem * parameter. For pointers mapped with the
-     default I/O attributes (e.g. those returned by ioremap()), then the
-     ordering guarantees are as follows:
-
-     1. All readX() and writeX() accesses to the same peripheral are ordered
-        with respect to each other. For example, this ensures that MMIO register
-	writes by the CPU to a particular device will arrive in program order.
-
-     2. A writeX() by the CPU to the peripheral will first wait for the
-        completion of all prior CPU writes to memory. For example, this ensures
-        that writes by the CPU to an outbound DMA buffer allocated by
-        dma_alloc_coherent() will be visible to a DMA engine when the CPU writes
-        to its MMIO control register to trigger the transfer.
-
-     3. A readX() by the CPU from the peripheral will complete before any
-	subsequent CPU reads from memory can begin. For example, this ensures
-	that reads by the CPU from an incoming DMA buffer allocated by
-	dma_alloc_coherent() will not see stale data after reading from the DMA
-	engine's MMIO status register to establish that the DMA transfer has
-	completed.
-
-     4. A readX() by the CPU from the peripheral will complete before any
-	subsequent delay() loop can begin execution. For example, this ensures
-	that two MMIO register writes by the CPU to a peripheral will arrive at
-	least 1us apart if the first write is immediately read back with readX()
-	and udelay(1) is called prior to the second writeX().
-
-     __iomem pointers obtained with non-default attributes (e.g. those returned
-     by ioremap_wc()) are unlikely to provide many of these guarantees.
+	The readX() and writeX() MMIO accessors take a pointer to the
+	peripheral being accessed as an __iomem * parameter. For pointers
+	mapped with the default I/O attributes (e.g. those returned by
+	ioremap()), the ordering guarantees are as follows:
+
+	1. All readX() and writeX() accesses to the same peripheral are ordered
+	   with respect to each other. This ensures that MMIO register writes by
+	   the CPU to a particular device will arrive in program order.
+
+	2. A writeX() by the CPU to the peripheral will first wait for the
+	   completion of all prior CPU writes to memory. This ensures that
+	   writes by the CPU to an outbound DMA buffer allocated by
+	   dma_alloc_coherent() will be visible to a DMA engine when the CPU
+	   writes to its MMIO control register to trigger the transfer.
+
+	3. A readX() by the CPU from the peripheral will complete before any
+	   subsequent CPU reads from memory can begin. This ensures that reads
+	   by the CPU from an incoming DMA buffer allocated by
+	   dma_alloc_coherent() will not see stale data after reading from the
+	   DMA engine's MMIO status register to establish that the DMA transfer
+	   has completed.
+
+	4. A readX() by the CPU from the peripheral will complete before any
+	   subsequent delay() loop can begin execution. This ensures that two
+	   MMIO register writes by the CPU to a peripheral will arrive at least
+	   1us apart if the first write is immediately read back with readX()
+	   and udelay(1) is called prior to the second writeX():
+
+		writel(42, DEVICE_REGISTER_0); // Arrives at the device...
+		readl(DEVICE_REGISTER_0);
+		udelay(1);
+		writel(42, DEVICE_REGISTER_1); // ...at least 1us before this.
+
+	The ordering properties of __iomem pointers obtained with non-default
+	attributes (e.g. those returned by ioremap_wc()) are specific to the
+	underlying architecture and therefore the guarantees listed above cannot
+	generally be relied upon for accesses to these types of mappings.
 
  (*) readX_relaxed(), writeX_relaxed():
 
-     These are similar to readX() and writeX(), but provide weaker memory
-     ordering guarantees. Specifically, they do not guarantee ordering with
-     respect to normal memory accesses or delay() loops (i.e bullets 2-4 above)
-     but they are still guaranteed to be ordered with respect to other accesses
-     to the same peripheral when operating on __iomem pointers mapped with the
-     default I/O attributes.
+	These are similar to readX() and writeX(), but provide weaker memory
+	ordering guarantees. Specifically, they do not guarantee ordering with
+	respect to normal memory accesses or delay() loops (i.e. bullets 2-4
+	above) but they are still guaranteed to be ordered with respect to other
+	accesses to the same peripheral when operating on __iomem pointers
+	mapped with the default I/O attributes.
 
  (*) readsX(), writesX():
 
-     The readsX() and writesX() MMIO accessors are designed for accessing
-     register-based, memory-mapped FIFOs residing on peripherals that are not
-     capable of performing DMA. Consequently, they provide only the ordering
-     guarantees of readX_relaxed() and writeX_relaxed(), as documented above.
+	The readsX() and writesX() MMIO accessors are designed for accessing
+	register-based, memory-mapped FIFOs residing on peripherals that are not
+	capable of performing DMA. Consequently, they provide only the ordering
+	guarantees of readX_relaxed() and writeX_relaxed(), as documented above.
 
  (*) inX(), outX():
 
-     The inX() and outX() accessors are intended to access legacy port-mapped
-     I/O peripherals, which may require special instructions on some
-     architectures (notably x86). The port number of the peripheral being
-     accessed is passed as an argument.
+	The inX() and outX() accessors are intended to access legacy port-mapped
+	I/O peripherals, which may require special instructions on some
+	architectures (notably x86). The port number of the peripheral being
+	accessed is passed as an argument.
 
-     Since many CPU architectures ultimately access these peripherals via an
-     internal virtual memory mapping, the portable ordering guarantees provided
-     by inX() and outX() are the same as those provided by readX() and writeX()
-     respectively when accessing a mapping with the default I/O attributes.
+	Since many CPU architectures ultimately access these peripherals via an
+	internal virtual memory mapping, the portable ordering guarantees
+	provided by inX() and outX() are the same as those provided by readX()
+	and writeX() respectively when accessing a mapping with the default I/O
+	attributes.
 
-     Device drivers may expect outX() to emit a non-posted write transaction
-     that waits for a completion response from the I/O peripheral before
-     returning. This is not guaranteed by all architectures and is therefore
-     not part of the portable ordering semantics.
+	Device drivers may expect outX() to emit a non-posted write transaction
+	that waits for a completion response from the I/O peripheral before
+	returning. This is not guaranteed by all architectures and is therefore
+	not part of the portable ordering semantics.
 
  (*) insX(), outsX():
 
-     As above, the insX() and outsX() accessors provide the same ordering
-     guarantees as readsX() and writesX() respectively when accessing a mapping
-     with the default I/O attributes.
+	As above, the insX() and outsX() accessors provide the same ordering
+	guarantees as readsX() and writesX() respectively when accessing a
+	mapping with the default I/O attributes.
 
- (*) ioreadX(), iowriteX()
+ (*) ioreadX(), iowriteX():
 
-     These will perform appropriately for the type of access they're actually
-     doing, be it inX()/outX() or readX()/writeX().
+	These will perform appropriately for the type of access they're actually
+	doing, be it inX()/outX() or readX()/writeX().
 
 All of these accessors assume that the underlying peripheral is little-endian,
 and will therefore perform byte-swapping operations on big-endian architectures.
-- 
GitLab


From 7aa0055e06475f7b486c0673bc59c6478bc055fa Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 10 Apr 2019 15:48:43 +0200
Subject: [PATCH 0969/1861] s390: fine-tune stack switch helper

The CALL_ON_STACK helper currently does not work with clang and for
calls without arguments. It does not initialize r2 although the constraint
is "+&d". Rework the CALL_FMT_x and the CALL_ON_STACK macros to work
with clang and produce optimal code in all cases.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/processor.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 81038ab357ce9..0ee022247580a 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -261,12 +261,12 @@ static __no_kasan_or_inline unsigned short stap(void)
 	CALL_ARGS_4(arg1, arg2, arg3, arg4);				\
 	register unsigned long r4 asm("6") = (unsigned long)(arg5)
 
-#define CALL_FMT_0
-#define CALL_FMT_1 CALL_FMT_0, "0" (r2)
-#define CALL_FMT_2 CALL_FMT_1, "d" (r3)
-#define CALL_FMT_3 CALL_FMT_2, "d" (r4)
-#define CALL_FMT_4 CALL_FMT_3, "d" (r5)
-#define CALL_FMT_5 CALL_FMT_4, "d" (r6)
+#define CALL_FMT_0 "=&d" (r2) :
+#define CALL_FMT_1 "+&d" (r2) :
+#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
+#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
+#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
+#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
 
 #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
 #define CALL_CLOBBER_4 CALL_CLOBBER_5
@@ -286,10 +286,10 @@ static __no_kasan_or_inline unsigned short stap(void)
 		"	stg	%[_prev],%[_bc](15)\n"			\
 		"	brasl	14,%[_fn]\n"				\
 		"	la	15,0(%[_prev])\n"			\
-		: "+&d" (r2), [_prev] "=&a" (prev)			\
-		: [_stack] "a" (stack),					\
+		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
+		  [_stack] "a" (stack),					\
 		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
-		  [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr);	\
+		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
 	r2;								\
 })
 
-- 
GitLab


From c1afcaec2af6d80d057b184eb86cbb018fce6517 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Apr 2019 23:26:14 +0200
Subject: [PATCH 0970/1861] s390: remove -fno-strength-reduce flag

This was added as a workaround for really old compilers, and it prevents
building with clang now. I can see no reason for keeping it, as it has
already been removed for most architectures in the pre-git era, so
let's remove it everywhere, rather than only for clang.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index e21053e5e0da2..9c079a506325b 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -111,7 +111,7 @@ endif
 cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1)
 
 KBUILD_CFLAGS	+= -mbackchain -msoft-float $(cflags-y)
-KBUILD_CFLAGS	+= -pipe -fno-strength-reduce -Wno-sign-compare
+KBUILD_CFLAGS	+= -pipe -Wno-sign-compare
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables $(cfi)
 KBUILD_AFLAGS	+= $(aflags-y) $(cfi)
 export KBUILD_AFLAGS_DECOMPRESSOR
-- 
GitLab


From 96ca7674ea66e9f0e2cb9f75588d4d16e5abb724 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Apr 2019 23:26:15 +0200
Subject: [PATCH 0971/1861] s390: don't build vdso32 with clang

clang does not support 31 bit object files on s390, so skip
the 32-bit vdso here, and only build it when using gcc to compile
the kernel.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig         |  3 +++
 arch/s390/kernel/Makefile |  2 +-
 arch/s390/kernel/vdso.c   | 10 +++++-----
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 4ac8d49e206a8..566f0b460d219 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -388,6 +388,9 @@ config COMPAT
 	  (and some other stuff like libraries and such) is needed for
 	  executing 31 bit applications.  It is safe to say "Y".
 
+config COMPAT_VDSO
+	def_bool COMPAT && !CC_IS_CLANG
+
 config SYSVIPC_COMPAT
 	def_bool y if COMPAT && SYSVIPC
 
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 8a62c7f72e1bd..1222db6d4ee90 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -86,7 +86,7 @@ obj-$(CONFIG_TRACEPOINTS)	+= trace.o
 
 # vdso
 obj-y				+= vdso64/
-obj-$(CONFIG_COMPAT)		+= vdso32/
+obj-$(CONFIG_COMPAT_VDSO)	+= vdso32/
 
 chkbss := head64.o early_nobss.o
 include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index e7920a68a12ec..243d8b1185bfc 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -29,7 +29,7 @@
 #include <asm/vdso.h>
 #include <asm/facility.h>
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
 extern char vdso32_start, vdso32_end;
 static void *vdso32_kbase = &vdso32_start;
 static unsigned int vdso32_pages;
@@ -55,7 +55,7 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
 
 	vdso_pagelist = vdso64_pagelist;
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
 	if (vma->vm_mm->context.compat_mm) {
 		vdso_pagelist = vdso32_pagelist;
 		vdso_pages = vdso32_pages;
@@ -76,7 +76,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	unsigned long vdso_pages;
 
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
 	if (vma->vm_mm->context.compat_mm)
 		vdso_pages = vdso32_pages;
 #endif
@@ -223,7 +223,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		return 0;
 
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
 	mm->context.compat_mm = is_compat_task();
 	if (mm->context.compat_mm)
 		vdso_pages = vdso32_pages;
@@ -280,7 +280,7 @@ static int __init vdso_init(void)
 	int i;
 
 	vdso_init_data(vdso_data);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
 	/* Calculate the size of the 32 bit vDSO */
 	vdso32_pages = ((&vdso32_end - &vdso32_start
 			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-- 
GitLab


From efb150df1de6462003890cbfea5fa02a28821530 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Apr 2019 23:26:21 +0200
Subject: [PATCH 0972/1861] s390: syscall_wrapper: avoid clang warning

Building system calls with clang results in a warning
about an alias from a global function to a static one:

../fs/namei.c:3847:1: warning: unused function '__se_sys_mkdirat' [-Wunused-function]
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
^
../include/linux/syscalls.h:219:36: note: expanded from macro 'SYSCALL_DEFINE3'
 #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
                                   ^
../include/linux/syscalls.h:228:2: note: expanded from macro 'SYSCALL_DEFINEx'
        __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
        ^
../arch/s390/include/asm/syscall_wrapper.h:126:18: note: expanded from macro '__SYSCALL_DEFINEx'
        asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))          \
                        ^
<scratch space>:31:1: note: expanded from here
__se_sys_mkdirat
^

The only reference to the static __se_sys_mkdirat() here is the alias, but
this only gets evaluated later. Making this function global as well avoids
the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/syscall_wrapper.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 5596c5c625d26..3c3d6fe8e2f02 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -119,8 +119,8 @@
 		      "Type aliasing is used to sanitize syscall arguments");\
 	asmlinkage long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
 		__attribute__((alias(__stringify(__se_sys##name))));		\
-	ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO);				\
-	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO);			\
+	long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));			\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
 	__S390_SYS_STUBx(x, name, __VA_ARGS__)					\
 	asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
-- 
GitLab


From 0a113efc3b48e9a8952e3c6e19dfdcc48b62226f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Apr 2019 23:26:22 +0200
Subject: [PATCH 0973/1861] s390: make __load_psw_mask work with clang

clang fails to use the %O and %R inline assembly modifiers
the same way as gcc, leading to build failures with every use
of __load_psw_mask():

/tmp/nmi-4a9f80.s: Assembler messages:
/tmp/nmi-4a9f80.s:571: Error: junk at end of line: `+8(160(%r11))'
/tmp/nmi-4a9f80.s:626: Error: junk at end of line: `+8(160(%r11))'

Replace these with a more conventional way of passing the addresses
that should work with both clang and gcc.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/processor.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0ee022247580a..8aa85e39f50b0 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -339,10 +339,10 @@ static __no_kasan_or_inline void __load_psw_mask(unsigned long mask)
 
 	asm volatile(
 		"	larl	%0,1f\n"
-		"	stg	%0,%O1+8(%R1)\n"
-		"	lpswe	%1\n"
+		"	stg	%0,%1\n"
+		"	lpswe	%2\n"
 		"1:"
-		: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
+		: "=&d" (addr), "=Q" (psw.addr) : "Q" (psw) : "memory", "cc");
 }
 
 /*
-- 
GitLab


From 9a0ceb9cfbee7da4a005135c620603ea837878a9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Apr 2019 23:26:24 +0200
Subject: [PATCH 0974/1861] s390: make chkbss work with clang

llvm skips an empty .bss section entirely, which makes
the check fail with an unexpected error:

/tmp/binutils-multi-test/bin/s390x-linux-gnu-objdump: section '.bss' mentioned in a -j option, but not found in any input file
error: arch/s390/boot/compressed/decompressor.o .bss section is not empty
../arch/s390/scripts/Makefile.chkbss:20: recipe for target 'arch/s390/boot/compressed/decompressor.o.chkbss' failed

Change the check so we first see if a .bss section exists
before trying to read its size.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/scripts/Makefile.chkbss | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
index cd7e8f4419f51..884a9caff5fb8 100644
--- a/arch/s390/scripts/Makefile.chkbss
+++ b/arch/s390/scripts/Makefile.chkbss
@@ -11,7 +11,8 @@ chkbss: $(addprefix $(obj)/, $(chkbss-files))
 
 quiet_cmd_chkbss = CHKBSS  $<
       cmd_chkbss = \
-	if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
+	if $(OBJDUMP) -h $< | grep -q "\.bss" && \
+	   ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
 		echo "error: $< .bss section is not empty" >&2; exit 1; \
 	fi; \
 	touch $@;
-- 
GitLab


From 475c8e9e89db2fcecf837ac41dcb6aed4ba72c61 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 9 Apr 2019 09:33:12 -0700
Subject: [PATCH 0975/1861] s390: Convert IS_ENABLED uses to __is_defined

IS_ENABLED should be reserved for CONFIG_<FOO> uses so convert
the uses of IS_ENABLED with a #define to __is_defined.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/nospec-branch.c | 6 +++---
 arch/s390/kernel/nospec-sysfs.c  | 2 +-
 arch/s390/net/bpf_jit_comp.c     | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index bdddaae965598..8e484130b9b8d 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -37,7 +37,7 @@ static int __init nospec_report(void)
 {
 	if (test_facility(156))
 		pr_info("Spectre V2 mitigation: etokens\n");
-	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+	if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable)
 		pr_info("Spectre V2 mitigation: execute trampolines\n");
 	if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
 		pr_info("Spectre V2 mitigation: limited branch prediction\n");
@@ -63,10 +63,10 @@ void __init nospec_auto_detect(void)
 		 * The machine supports etokens.
 		 * Disable expolines and disable nobp.
 		 */
-		if (IS_ENABLED(CC_USING_EXPOLINE))
+		if (__is_defined(CC_USING_EXPOLINE))
 			nospec_disable = 1;
 		__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
-	} else if (IS_ENABLED(CC_USING_EXPOLINE)) {
+	} else if (__is_defined(CC_USING_EXPOLINE)) {
 		/*
 		 * The kernel has been compiled with expolines.
 		 * Keep expolines enabled and disable nobp.
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index e30e580ae3620..48f472bf92908 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -15,7 +15,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
 {
 	if (test_facility(156))
 		return sprintf(buf, "Mitigation: etokens\n");
-	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+	if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable)
 		return sprintf(buf, "Mitigation: execute trampolines\n");
 	if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
 		return sprintf(buf, "Mitigation: limited branch prediction\n");
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 51dd0267d0148..5e7c630331590 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -455,7 +455,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
 	/* Restore registers */
 	save_restore_regs(jit, REGS_RESTORE, stack_depth);
-	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+	if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
 		jit->r14_thunk_ip = jit->prg;
 		/* Generate __s390_indirect_jump_r14 thunk */
 		if (test_facility(35)) {
@@ -473,7 +473,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 	/* br %r14 */
 	_EMIT2(0x07fe);
 
-	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+	if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
 	    (jit->seen & SEEN_FUNC)) {
 		jit->r1_thunk_ip = jit->prg;
 		/* Generate __s390_indirect_jump_r1 thunk */
@@ -999,7 +999,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		/* lg %w1,<d(imm)>(%l) */
 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
 			      EMIT_CONST_U64(func));
-		if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+		if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
 			/* brasl %r14,__s390_indirect_jump_r1 */
 			EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
 		} else {
-- 
GitLab


From 6e042492a272bdc507d37982b890dc5266d5bb01 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Apr 2019 23:26:23 +0200
Subject: [PATCH 0976/1861] s390: avoid __builtin_return_address(n) on clang

llvm on s390 has problems with __builtin_return_address(n), with n>0,
this results in a somewhat cryptic error message:

fatal error: error in backend: Unsupported stack frame traversal count

To work around it, use the direct return address directly. This
is probably not ideal here, but gets things to compile and should
only lead to inferior reporting, not to misbehavior of the generated
code.

Link: https://bugs.llvm.org/show_bug.cgi?id=41424
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ftrace.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 5a3c95b119527..6e0ed03387854 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -13,7 +13,12 @@
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_CC_IS_CLANG
+/* https://bugs.llvm.org/show_bug.cgi?id=41424 */
+#define ftrace_return_address(n) 0UL
+#else
 #define ftrace_return_address(n) __builtin_return_address(n)
+#endif
 
 void _mcount(void);
 void ftrace_caller(void);
-- 
GitLab


From 913140e221567b3ecd21b4242257a7e3fa279026 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Apr 2019 23:26:18 +0200
Subject: [PATCH 0977/1861] s390: zcrypt: initialize variables before_use

The 'func_code' variable gets printed in debug statements without
a prior initialization in multiple functions, as reported when building
with clang:

drivers/s390/crypto/zcrypt_api.c:659:6: warning: variable 'func_code' is used uninitialized whenever 'if' condition is true
      [-Wsometimes-uninitialized]
        if (mex->outputdatalength < mex->inputdatalength) {
            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/s390/crypto/zcrypt_api.c:725:29: note: uninitialized use occurs here
        trace_s390_zcrypt_rep(mex, func_code, rc,
                                   ^~~~~~~~~
drivers/s390/crypto/zcrypt_api.c:659:2: note: remove the 'if' if its condition is always false
        if (mex->outputdatalength < mex->inputdatalength) {
        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/s390/crypto/zcrypt_api.c:654:24: note: initialize the variable 'func_code' to silence this warning
        unsigned int func_code;
                              ^

Add initializations to all affected code paths to shut up the warning
and make the warning output consistent.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/zcrypt_api.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 689c2af7026a3..c31b2d31cd832 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -659,6 +659,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
 	trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO);
 
 	if (mex->outputdatalength < mex->inputdatalength) {
+		func_code = 0;
 		rc = -EINVAL;
 		goto out;
 	}
@@ -742,6 +743,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
 	trace_s390_zcrypt_req(crt, TP_ICARSACRT);
 
 	if (crt->outputdatalength < crt->inputdatalength) {
+		func_code = 0;
 		rc = -EINVAL;
 		goto out;
 	}
@@ -951,6 +953,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
 
 		targets = kcalloc(target_num, sizeof(*targets), GFP_KERNEL);
 		if (!targets) {
+			func_code = 0;
 			rc = -ENOMEM;
 			goto out;
 		}
@@ -958,6 +961,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms,
 		uptr = (struct ep11_target_dev __force __user *) xcrb->targets;
 		if (copy_from_user(targets, uptr,
 				   target_num * sizeof(*targets))) {
+			func_code = 0;
 			rc = -EFAULT;
 			goto out_free;
 		}
-- 
GitLab


From 102bbe34b31c9159e714432afd64458f6f3876d7 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Wed, 10 Apr 2019 15:47:54 +0800
Subject: [PATCH 0978/1861] gpio: eic: sprd: Fix incorrect irq type setting for
 the sync EIC

When setting sync EIC as IRQ_TYPE_EDGE_BOTH type, we missed to set the
SPRD_EIC_SYNC_INTMODE register to 0, which means detecting edge signals.

Thus this patch fixes the issue.

Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-eic-sprd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index f0223cee97744..77092268ee955 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type)
 			irq_set_handler_locked(data, handle_edge_irq);
 			break;
 		case IRQ_TYPE_EDGE_BOTH:
+			sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0);
 			sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1);
 			irq_set_handler_locked(data, handle_edge_irq);
 			break;
-- 
GitLab


From a66477b0efe511d98dde3e4aaeb189790e6f0a39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 2 Apr 2019 09:26:52 +0200
Subject: [PATCH 0979/1861] drm/ttm: fix out-of-bounds read in ttm_put_pages()
 v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When ttm_put_pages() tries to figure out whether it's dealing with
transparent hugepages, it just reads past the bounds of the pages array
without a check.

v2: simplify the test if enough pages are left in the array (Christian).

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Fixes: 5c42c64f7d54 ("drm/ttm: fix the fix for huge compound pages")
Cc: stable@vger.kernel.org
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index f841accc2c006..f77c81db161b2 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -730,7 +730,8 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
 			}
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-			if (!(flags & TTM_PAGE_FLAG_DMA32)) {
+			if (!(flags & TTM_PAGE_FLAG_DMA32) &&
+			    (npages - i) >= HPAGE_PMD_NR) {
 				for (j = 0; j < HPAGE_PMD_NR; ++j)
 					if (p++ != pages[i + j])
 					    break;
@@ -759,7 +760,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
 		unsigned max_size, n2free;
 
 		spin_lock_irqsave(&huge->lock, irq_flags);
-		while (i < npages) {
+		while ((npages - i) >= HPAGE_PMD_NR) {
 			struct page *p = pages[i];
 			unsigned j;
 
-- 
GitLab


From ac1e516d5a4c56bf0cb4a3dfc0672f689131cfd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 2 Apr 2019 09:29:35 +0200
Subject: [PATCH 0980/1861] drm/ttm: fix start page for huge page check in
 ttm_put_pages()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The first page entry is always the same with itself.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index f77c81db161b2..c74147f0cbe3d 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -732,7 +732,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 			if (!(flags & TTM_PAGE_FLAG_DMA32) &&
 			    (npages - i) >= HPAGE_PMD_NR) {
-				for (j = 0; j < HPAGE_PMD_NR; ++j)
+				for (j = 1; j < HPAGE_PMD_NR; ++j)
 					if (p++ != pages[i + j])
 					    break;
 
@@ -767,7 +767,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
 			if (!p)
 				break;
 
-			for (j = 0; j < HPAGE_PMD_NR; ++j)
+			for (j = 1; j < HPAGE_PMD_NR; ++j)
 				if (p++ != pages[i + j])
 				    break;
 
-- 
GitLab


From 453393369dc9806d2455151e329c599684762428 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Wed, 10 Apr 2019 11:43:43 +0200
Subject: [PATCH 0981/1861] drm/ttm: fix incrementing the page pointer for huge
 pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we increment the counter we need to increment the pointer as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Fixes: e16858a7e6e7 drm/ttm: fix start page for huge page check in ttm_put_pages()
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index c74147f0cbe3d..627f8dc91d0ed 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -733,7 +733,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
 			if (!(flags & TTM_PAGE_FLAG_DMA32) &&
 			    (npages - i) >= HPAGE_PMD_NR) {
 				for (j = 1; j < HPAGE_PMD_NR; ++j)
-					if (p++ != pages[i + j])
+					if (++p != pages[i + j])
 					    break;
 
 				if (j == HPAGE_PMD_NR)
@@ -768,7 +768,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
 				break;
 
 			for (j = 1; j < HPAGE_PMD_NR; ++j)
-				if (p++ != pages[i + j])
+				if (++p != pages[i + j])
 				    break;
 
 			if (j != HPAGE_PMD_NR)
-- 
GitLab


From 543c364d8eeeb42c0edfaac9764f4e9f3d777ec1 Mon Sep 17 00:00:00 2001
From: Lin Yi <teroincn@163.com>
Date: Wed, 10 Apr 2019 10:23:34 +0800
Subject: [PATCH 0982/1861] drm/ttm: fix dma_fence refcount imbalance on error
 path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

the ttm_bo_add_move_fence takes a reference to the struct dma_fence, but
failed to release it on the error path, leading to a memory leak.
add dma_fence_put before return when error occur.

Signed-off-by: Lin Yi <teroincn@163.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3f56647cdb35f..0fa5034b9f9e0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -876,8 +876,10 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 		reservation_object_add_shared_fence(bo->resv, fence);
 
 		ret = reservation_object_reserve_shared(bo->resv, 1);
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			dma_fence_put(fence);
 			return ret;
+		}
 
 		dma_fence_put(bo->moving);
 		bo->moving = fence;
-- 
GitLab


From f4bbebf8e7eb4d294b040ab2d2ba71e70e69b930 Mon Sep 17 00:00:00 2001
From: Martin Leung <martin.leung@amd.com>
Date: Tue, 26 Mar 2019 13:14:11 -0400
Subject: [PATCH 0983/1861] drm/amd/display: extending AUX SW Timeout

[Why]
AUX takes longer to reply when using active DP-DVI dongle on some asics
resulting in up to 2000+ us edid read (timeout).

[How]
1. Adjust AUX poll to match spec
2. Extend the SW timeout. This does not affect normal
operation since we exit the loop as soon as AUX acks.

Signed-off-by: Martin Leung <martin.leung@amd.com>
Reviewed-by: Jun Lei <Jun.Lei@amd.com>
Acked-by: Joshua Aberback <Joshua.Aberback@amd.com>
Acked-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 9 ++++++---
 drivers/gpu/drm/amd/display/dc/dce/dce_aux.h | 6 +++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index 4febf4ef7240e..4fe3664fb4950 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -190,6 +190,12 @@ static void submit_channel_request(
 				1,
 				0);
 	}
+
+	REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1);
+
+	REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0,
+				10, aux110->timeout_period/10);
+
 	/* set the delay and the number of bytes to write */
 
 	/* The length include
@@ -242,9 +248,6 @@ static void submit_channel_request(
 		}
 	}
 
-	REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1);
-	REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0,
-				10, aux110->timeout_period/10);
 	REG_UPDATE(AUX_SW_CONTROL, AUX_SW_GO, 1);
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
index d27f22c05e4b5..e28ed6a00ff42 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
@@ -71,11 +71,11 @@ enum {	/* This is the timeout as defined in DP 1.2a,
 	 * at most within ~240usec. That means,
 	 * increasing this timeout will not affect normal operation,
 	 * and we'll timeout after
-	 * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 1600usec.
+	 * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 2400usec.
 	 * This timeout is especially important for
-	 * resume from S3 and CTS.
+	 * converters, resume from S3, and CTS.
 	 */
-	SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 4
+	SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 6
 };
 
 struct dce_aux {
-- 
GitLab


From 67f471b6ed3b09033c4ac77ea03f92afdb1989fe Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 8 Apr 2019 11:15:19 -0700
Subject: [PATCH 0984/1861] nvme-fc: correct csn initialization and increments
 on error

This patch fixes a long-standing bug that initialized the FC-NVME
cmnd iu CSN value to 1. Early FC-NVME specs had the connection starting
with CSN=1. By the time the spec reached approval, the language had
changed to state a connection should start with CSN=0.  This patch
corrects the initialization value for FC-NVME connections.

Additionally, in reviewing the transport, the CSN value is assigned to
the new IU early in the start routine. It's possible that a later dma
map request may fail, causing the command to never be sent to the
controller.  Change the location of the assignment so that it is
immediately prior to calling the lldd. Add a comment block to explain
the impacts if the lldd were to additionally fail sending the command.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index f3b9d91ba0dfd..6d8451356eaca 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1845,7 +1845,7 @@ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx)
 	memset(queue, 0, sizeof(*queue));
 	queue->ctrl = ctrl;
 	queue->qnum = idx;
-	atomic_set(&queue->csn, 1);
+	atomic_set(&queue->csn, 0);
 	queue->dev = ctrl->dev;
 
 	if (idx > 0)
@@ -1887,7 +1887,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
 	 */
 
 	queue->connection_id = 0;
-	atomic_set(&queue->csn, 1);
+	atomic_set(&queue->csn, 0);
 }
 
 static void
@@ -2183,7 +2183,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 {
 	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
 	struct nvme_command *sqe = &cmdiu->sqe;
-	u32 csn;
 	int ret, opstate;
 
 	/*
@@ -2198,8 +2197,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 
 	/* format the FC-NVME CMD IU and fcp_req */
 	cmdiu->connection_id = cpu_to_be64(queue->connection_id);
-	csn = atomic_inc_return(&queue->csn);
-	cmdiu->csn = cpu_to_be32(csn);
 	cmdiu->data_len = cpu_to_be32(data_len);
 	switch (io_dir) {
 	case NVMEFC_FCP_WRITE:
@@ -2257,11 +2254,24 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	if (!(op->flags & FCOP_FLAGS_AEN))
 		blk_mq_start_request(op->rq);
 
+	cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn));
 	ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
 					&ctrl->rport->remoteport,
 					queue->lldd_handle, &op->fcp_req);
 
 	if (ret) {
+		/*
+		 * If the lld fails to send the command is there an issue with
+		 * the csn value?  If the command that fails is the Connect,
+		 * no - as the connection won't be live.  If it is a command
+		 * post-connect, it's possible a gap in csn may be created.
+		 * Does this matter?  As Linux initiators don't send fused
+		 * commands, no.  The gap would exist, but as there's nothing
+		 * that depends on csn order to be delivered on the target
+		 * side, it shouldn't hurt.  It would be difficult for a
+		 * target to even detect the csn gap as it has no idea when the
+		 * cmd with the csn was supposed to arrive.
+		 */
 		opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
 		__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
 
-- 
GitLab


From d808b7f759b50acf0784ce6230ffa63e12ef465d Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 9 Apr 2019 10:03:59 -0600
Subject: [PATCH 0985/1861] nvmet: fix discover log page when offsets are used

The nvme target hadn't been taking the Get Log Page offset parameter
into consideration, and so has been returning corrupted log pages when
offsets are used. Since many tools, including nvme-cli, split the log
request to 4k, we've been breaking discovery log responses when more
than 3 subsystems exist.

Fix the returned data by internally generating the entire discovery
log page and copying only the requested bytes into the user buffer. The
command log page offset type has been modified to a native __le64 to
make it easier to extract the value from a command.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Tested-by: Minwoo Im <minwoo.im@samsung.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c |  5 +++
 drivers/nvme/target/discovery.c | 68 ++++++++++++++++++++++-----------
 drivers/nvme/target/nvmet.h     |  1 +
 include/linux/nvme.h            |  9 ++++-
 4 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 76250181fee05..9f72d515fc4b3 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -24,6 +24,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd)
 	return len;
 }
 
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
+{
+	return le64_to_cpu(cmd->get_log_page.lpo);
+}
+
 static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
 {
 	nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index c872b47a88f31..33ed95e72d6b1 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -131,54 +131,76 @@ static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port
 		memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
 }
 
+static size_t discovery_log_entries(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvmet_subsys_link *p;
+	struct nvmet_port *r;
+	size_t entries = 0;
+
+	list_for_each_entry(p, &req->port->subsystems, entry) {
+		if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
+			continue;
+		entries++;
+	}
+	list_for_each_entry(r, &req->port->referrals, entry)
+		entries++;
+	return entries;
+}
+
 static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
 {
 	const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmf_disc_rsp_page_hdr *hdr;
+	u64 offset = nvmet_get_log_page_offset(req->cmd);
 	size_t data_len = nvmet_get_log_page_len(req->cmd);
-	size_t alloc_len = max(data_len, sizeof(*hdr));
-	int residual_len = data_len - sizeof(*hdr);
+	size_t alloc_len;
 	struct nvmet_subsys_link *p;
 	struct nvmet_port *r;
 	u32 numrec = 0;
 	u16 status = 0;
+	void *buffer;
+
+	/* Spec requires dword aligned offsets */
+	if (offset & 0x3) {
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		goto out;
+	}
 
 	/*
 	 * Make sure we're passing at least a buffer of response header size.
 	 * If host provided data len is less than the header size, only the
 	 * number of bytes requested by host will be sent to host.
 	 */
-	hdr = kzalloc(alloc_len, GFP_KERNEL);
-	if (!hdr) {
+	down_read(&nvmet_config_sem);
+	alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req);
+	buffer = kzalloc(alloc_len, GFP_KERNEL);
+	if (!buffer) {
+		up_read(&nvmet_config_sem);
 		status = NVME_SC_INTERNAL;
 		goto out;
 	}
 
-	down_read(&nvmet_config_sem);
+	hdr = buffer;
 	list_for_each_entry(p, &req->port->subsystems, entry) {
+		char traddr[NVMF_TRADDR_SIZE];
+
 		if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
 			continue;
-		if (residual_len >= entry_size) {
-			char traddr[NVMF_TRADDR_SIZE];
-
-			nvmet_set_disc_traddr(req, req->port, traddr);
-			nvmet_format_discovery_entry(hdr, req->port,
-					p->subsys->subsysnqn, traddr,
-					NVME_NQN_NVME, numrec);
-			residual_len -= entry_size;
-		}
+
+		nvmet_set_disc_traddr(req, req->port, traddr);
+		nvmet_format_discovery_entry(hdr, req->port,
+				p->subsys->subsysnqn, traddr,
+				NVME_NQN_NVME, numrec);
 		numrec++;
 	}
 
 	list_for_each_entry(r, &req->port->referrals, entry) {
-		if (residual_len >= entry_size) {
-			nvmet_format_discovery_entry(hdr, r,
-					NVME_DISC_SUBSYS_NAME,
-					r->disc_addr.traddr,
-					NVME_NQN_DISC, numrec);
-			residual_len -= entry_size;
-		}
+		nvmet_format_discovery_entry(hdr, r,
+				NVME_DISC_SUBSYS_NAME,
+				r->disc_addr.traddr,
+				NVME_NQN_DISC, numrec);
 		numrec++;
 	}
 
@@ -190,8 +212,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
 
 	up_read(&nvmet_config_sem);
 
-	status = nvmet_copy_to_sgl(req, 0, hdr, data_len);
-	kfree(hdr);
+	status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len);
+	kfree(buffer);
 out:
 	nvmet_req_complete(req, status);
 }
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 51e49efd7849d..1653d19b187fd 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -428,6 +428,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
 
 u32 nvmet_get_log_page_len(struct nvme_command *cmd);
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd);
 
 extern struct list_head *nvmet_ports;
 void nvmet_port_disc_changed(struct nvmet_port *port,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index baa49e6a23cc7..c40720cb59acc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -967,8 +967,13 @@ struct nvme_get_log_page_command {
 	__le16			numdl;
 	__le16			numdu;
 	__u16			rsvd11;
-	__le32			lpol;
-	__le32			lpou;
+	union {
+		struct {
+			__le32 lpol;
+			__le32 lpou;
+		};
+		__le64 lpo;
+	};
 	__u32			rsvd14[2];
 };
 
-- 
GitLab


From 8bbad1ba3196814487438d1299cec75de5c74615 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 6 Mar 2019 14:57:43 +0100
Subject: [PATCH 0986/1861] gpu: host1x: Program stream ID to bypass without
 SMMU

If SMMU support is not available, fall back to programming the bypass
stream ID (0x7f).

Fixes: de5469c21ff9 ("gpu: host1x: Program the channel stream ID")
Suggested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
[treding@nvidia.com: rebase this on top of a later build fix]
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/hw/channel_hw.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 4030d64916f00..0c0eb43abf657 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -114,9 +114,13 @@ static inline void synchronize_syncpt_base(struct host1x_job *job)
 
 static void host1x_channel_set_streamid(struct host1x_channel *channel)
 {
-#if IS_ENABLED(CONFIG_IOMMU_API) &&  HOST1X_HW >= 6
+#if HOST1X_HW >= 6
+	u32 sid = 0x7f;
+#ifdef CONFIG_IOMMU_API
 	struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
-	u32 sid = spec ? spec->ids[0] & 0xffff : 0x7f;
+	if (spec)
+		sid = spec->ids[0] & 0xffff;
+#endif
 
 	host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID);
 #endif
-- 
GitLab


From 3d659e7d6513890eb137e7056635004b8b885ce5 Mon Sep 17 00:00:00 2001
From: Raphael Gault <raphael.gault@arm.com>
Date: Thu, 11 Apr 2019 17:16:46 +0100
Subject: [PATCH 0987/1861] arm64: perf_event: Remove wrongfully used inline

The functions armv8pmu_read_counter() and armv8pmu_write_counter()
are `static inline` while they are only referenced when assigned
to a function pointer field in a `struct arm_pmu` instance.

The inline keyword is thus counter intuitive and shouldn't be used.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Raphael Gault <raphael.gault@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 4addb38bc250b..6164d389eed60 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -431,7 +431,7 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
 	return val;
 }
 
-static inline u64 armv8pmu_read_counter(struct perf_event *event)
+static u64 armv8pmu_read_counter(struct perf_event *event)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -468,7 +468,7 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event,
 	}
 }
 
-static inline void armv8pmu_write_counter(struct perf_event *event, u64 value)
+static void armv8pmu_write_counter(struct perf_event *event, u64 value)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-- 
GitLab


From 691efbedc60d2a7364a90e38882fc762f06f52c4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 11 Apr 2019 18:30:15 +0900
Subject: [PATCH 0988/1861] arm64: vdso: use $(LD) instead of $(CC) to link
 VDSO

We use $(LD) to link vmlinux, modules, decompressors, etc.

VDSO is the only exceptional case where $(CC) is used as the linker
driver, but I do not know why we need to do so. VDSO uses a special
linker script, and does not link standard libraries at all.

I changed the Makefile to use $(LD) rather than $(CC). I tested this,
and VDSO worked for me.

Users will be able to use their favorite linker (e.g. lld instead of
of bfd) by passing LD= from the command line.

My plan is to rewrite all VDSO Makefiles to use $(LD), then delete
cc-ldoption.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/vdso/Makefile | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index a0af6bf6c11bf..744b9dbaba036 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -12,17 +12,12 @@ obj-vdso := gettimeofday.o note.o sigreturn.o
 targets := $(obj-vdso) vdso.so vdso.so.dbg
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
-		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 \
+		$(call ld-option, --hash-style=sysv) -n -T
 
 # Disable gcov profiling for VDSO code
 GCOV_PROFILE := n
 
-# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
-# down to collect2, resulting in silent corruption of the vDSO image.
-ccflags-y += -Wl,-shared
-
 obj-y += vdso.o
 extra-y += vdso.lds
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
@@ -32,7 +27,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
 
 # Link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
-	$(call if_changed,vdsold)
+	$(call if_changed,ld)
 
 # Strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
@@ -52,8 +47,6 @@ $(obj-vdso): %.o: %.S FORCE
 	$(call if_changed_dep,vdsoas)
 
 # Actual build commands
-quiet_cmd_vdsold = VDSOL   $@
-      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $(real-prereqs) -o $@
 quiet_cmd_vdsoas = VDSOA   $@
       cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
 
-- 
GitLab


From b995dcca7cf12f208cfd95fd9d5768dca7cccec7 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Thu, 11 Apr 2019 10:22:43 -0700
Subject: [PATCH 0989/1861] platform/x86: pmc_atom: Drop __initconst on dmi
 table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's used by probe and that isn't an init function. Drop this so that we
don't get a section mismatch.

Reported-by: kbuild test robot <lkp@intel.com>
Cc: David Müller <dave.mueller@gmx.ch>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Fixes: 7c2e07130090 ("clk: x86: Add system specific quirk to mark clocks as critical")
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/platform/x86/pmc_atom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c
index eaec2d306481c..c7039f52ad518 100644
--- a/drivers/platform/x86/pmc_atom.c
+++ b/drivers/platform/x86/pmc_atom.c
@@ -396,7 +396,7 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc)
  * Some systems need one or more of their pmc_plt_clks to be
  * marked as critical.
  */
-static const struct dmi_system_id critclk_systems[] __initconst = {
+static const struct dmi_system_id critclk_systems[] = {
 	{
 		.ident = "MPL CEC1x",
 		.matches = {
-- 
GitLab


From 8c5165430c0194df92369162d1c7f53f8672baa5 Mon Sep 17 00:00:00 2001
From: Scott Wood <swood@redhat.com>
Date: Wed, 10 Apr 2019 16:59:25 -0500
Subject: [PATCH 0990/1861] dma-debug: only skip one stackframe entry

With skip set to 1, I get a traceback like this:

[  106.867637] DMA-API: Mapped at:
[  106.870784]  afu_dma_map_region+0x2cd/0x4f0 [dfl_afu]
[  106.875839]  afu_ioctl+0x258/0x380 [dfl_afu]
[  106.880108]  do_vfs_ioctl+0xa9/0x720
[  106.883688]  ksys_ioctl+0x60/0x90
[  106.887007]  __x64_sys_ioctl+0x16/0x20

With the previous value of 2, afu_dma_map_region was being omitted.  I
suspect that the code paths have simply changed since the value of 2 was
chosen a decade ago, but it's also possible that it varies based on which
mapping function was used, compiler inlining choices, etc.  In any case,
it's best to err on the side of skipping less.

Signed-off-by: Scott Wood <swood@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 45d51e8e26f62..a218e43cc3825 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -706,7 +706,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
 #ifdef CONFIG_STACKTRACE
 	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
 	entry->stacktrace.entries = entry->st_entries;
-	entry->stacktrace.skip = 2;
+	entry->stacktrace.skip = 1;
 	save_stack_trace(&entry->stacktrace);
 #endif
 
-- 
GitLab


From fd57770dd198f5b2ddd5b9e6bf282cf98d63adb9 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 11 Apr 2019 11:17:30 +0200
Subject: [PATCH 0991/1861] net/smc: wait for pending work before clcsock
 release_sock

When the clcsock is already released using sock_release() and a pending
smc_listen_work accesses the clcsock than that will fail. Solve this
by canceling and waiting for the work to complete first. Because the
work holds the sock_lock it must make sure that the lock is not hold
before the new helper smc_clcsock_release() is invoked. And before the
smc_listen_work starts working check if the parent listen socket is
still valid, otherwise stop the work early.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c    | 14 ++++++++------
 net/smc/smc_close.c | 25 +++++++++++++++++++++----
 net/smc/smc_close.h |  1 +
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 77ef53596d18c..9bdaed2f2e35c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -167,10 +167,9 @@ static int smc_release(struct socket *sock)
 
 	if (sk->sk_state == SMC_CLOSED) {
 		if (smc->clcsock) {
-			mutex_lock(&smc->clcsock_release_lock);
-			sock_release(smc->clcsock);
-			smc->clcsock = NULL;
-			mutex_unlock(&smc->clcsock_release_lock);
+			release_sock(sk);
+			smc_clcsock_release(smc);
+			lock_sock(sk);
 		}
 		if (!smc->use_fallback)
 			smc_conn_free(&smc->conn);
@@ -1037,13 +1036,13 @@ static void smc_listen_out(struct smc_sock *new_smc)
 	struct smc_sock *lsmc = new_smc->listen_smc;
 	struct sock *newsmcsk = &new_smc->sk;
 
-	lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
 	if (lsmc->sk.sk_state == SMC_LISTEN) {
+		lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
 		smc_accept_enqueue(&lsmc->sk, newsmcsk);
+		release_sock(&lsmc->sk);
 	} else { /* no longer listening */
 		smc_close_non_accepted(newsmcsk);
 	}
-	release_sock(&lsmc->sk);
 
 	/* Wake up accept */
 	lsmc->sk.sk_data_ready(&lsmc->sk);
@@ -1237,6 +1236,9 @@ static void smc_listen_work(struct work_struct *work)
 	int rc = 0;
 	u8 ibport;
 
+	if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
+		return smc_listen_out_err(new_smc);
+
 	if (new_smc->use_fallback) {
 		smc_listen_out_connected(new_smc);
 		return;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 2ad37e9985093..fc06720b53c14 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -21,6 +21,22 @@
 
 #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME	(5 * HZ)
 
+/* release the clcsock that is assigned to the smc_sock */
+void smc_clcsock_release(struct smc_sock *smc)
+{
+	struct socket *tcp;
+
+	if (smc->listen_smc && current_work() != &smc->smc_listen_work)
+		cancel_work_sync(&smc->smc_listen_work);
+	mutex_lock(&smc->clcsock_release_lock);
+	if (smc->clcsock) {
+		tcp = smc->clcsock;
+		smc->clcsock = NULL;
+		sock_release(tcp);
+	}
+	mutex_unlock(&smc->clcsock_release_lock);
+}
+
 static void smc_close_cleanup_listen(struct sock *parent)
 {
 	struct sock *sk;
@@ -321,6 +337,7 @@ static void smc_close_passive_work(struct work_struct *work)
 						   close_work);
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 	struct smc_cdc_conn_state_flags *rxflags;
+	bool release_clcsock = false;
 	struct sock *sk = &smc->sk;
 	int old_state;
 
@@ -400,13 +417,13 @@ wakeup:
 		if ((sk->sk_state == SMC_CLOSED) &&
 		    (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
 			smc_conn_free(conn);
-			if (smc->clcsock) {
-				sock_release(smc->clcsock);
-				smc->clcsock = NULL;
-			}
+			if (smc->clcsock)
+				release_clcsock = true;
 		}
 	}
 	release_sock(sk);
+	if (release_clcsock)
+		smc_clcsock_release(smc);
 	sock_put(sk); /* sock_hold done by schedulers of close_work */
 }
 
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 19eb6a211c23c..e0e3b5df25d24 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -23,5 +23,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc);
 int smc_close_active(struct smc_sock *smc);
 int smc_close_shutdown_write(struct smc_sock *smc);
 void smc_close_init(struct smc_sock *smc);
+void smc_clcsock_release(struct smc_sock *smc);
 
 #endif /* SMC_CLOSE_H */
-- 
GitLab


From e183d4e414b64711baf7a04e214b61969ca08dfa Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kjlu@umn.edu>
Date: Thu, 11 Apr 2019 11:17:31 +0200
Subject: [PATCH 0992/1861] net/smc: fix a NULL pointer dereference

In case alloc_ordered_workqueue fails, the fix returns NULL
to avoid NULL pointer dereference.

Signed-off-by: Kangjie Lu <kjlu@umn.edu>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_ism.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 2fff79db1a59c..e89e918b88e09 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -289,6 +289,11 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 	INIT_LIST_HEAD(&smcd->vlan);
 	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
 						 WQ_MEM_RECLAIM, name);
+	if (!smcd->event_wq) {
+		kfree(smcd->conn);
+		kfree(smcd);
+		return NULL;
+	}
 	return smcd;
 }
 EXPORT_SYMBOL_GPL(smcd_alloc_dev);
-- 
GitLab


From 07603b230895a74ebb1e2a1231ac45c29c2a8cd3 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Thu, 11 Apr 2019 11:17:32 +0200
Subject: [PATCH 0993/1861] net/smc: propagate file from SMC to TCP socket

fcntl(fd, F_SETOWN, getpid()) selects the recipient of SIGURG signals
that are delivered when out-of-band data arrives on socket fd.
If an SMC socket program makes use of such an fcntl() call, it fails
in case of fallback to TCP-mode. In case of fallback the traffic is
processed with the internal TCP socket. Propagating field "file" from the
SMC socket to the internal TCP socket fixes the issue.

Reviewed-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  6 ------
 net/smc/af_smc.c   | 38 ++++++++++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 8de5ee258b93a..341f8bafa0cf5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2084,12 +2084,6 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
  * @p:              poll_table
  *
  * See the comments in the wq_has_sleeper function.
- *
- * Do not derive sock from filp->private_data here. An SMC socket establishes
- * an internal TCP socket that is used in the fallback case. All socket
- * operations on the SMC socket are then forwarded to the TCP socket. In case of
- * poll, the filp->private_data pointer references the SMC socket because the
- * TCP socket has no file assigned.
  */
 static inline void sock_poll_wait(struct file *filp, struct socket *sock,
 				  poll_table *p)
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9bdaed2f2e35c..d2a0d15f809ce 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -445,10 +445,19 @@ static void smc_link_save_peer_info(struct smc_link *link,
 	link->peer_mtu = clc->qp_mtu;
 }
 
+static void smc_switch_to_fallback(struct smc_sock *smc)
+{
+	smc->use_fallback = true;
+	if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
+		smc->clcsock->file = smc->sk.sk_socket->file;
+		smc->clcsock->file->private_data = smc->clcsock;
+	}
+}
+
 /* fall back during connect */
 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 {
-	smc->use_fallback = true;
+	smc_switch_to_fallback(smc);
 	smc->fallback_rsn = reason_code;
 	smc_copy_sock_settings_to_clc(smc);
 	if (smc->sk.sk_state == SMC_INIT)
@@ -774,10 +783,14 @@ static void smc_connect_work(struct work_struct *work)
 		smc->sk.sk_err = -rc;
 
 out:
-	if (smc->sk.sk_err)
-		smc->sk.sk_state_change(&smc->sk);
-	else
-		smc->sk.sk_write_space(&smc->sk);
+	if (!sock_flag(&smc->sk, SOCK_DEAD)) {
+		if (smc->sk.sk_err) {
+			smc->sk.sk_state_change(&smc->sk);
+		} else { /* allow polling before and after fallback decision */
+			smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
+			smc->sk.sk_write_space(&smc->sk);
+		}
+	}
 	kfree(smc->connect_info);
 	smc->connect_info = NULL;
 	release_sock(&smc->sk);
@@ -934,8 +947,13 @@ struct sock *smc_accept_dequeue(struct sock *parent,
 			sock_put(new_sk); /* final */
 			continue;
 		}
-		if (new_sock)
+		if (new_sock) {
 			sock_graft(new_sk, new_sock);
+			if (isk->use_fallback) {
+				smc_sk(new_sk)->clcsock->file = new_sock->file;
+				isk->clcsock->file->private_data = isk->clcsock;
+			}
+		}
 		return new_sk;
 	}
 	return NULL;
@@ -1086,7 +1104,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
 		return;
 	}
 	smc_conn_free(&new_smc->conn);
-	new_smc->use_fallback = true;
+	smc_switch_to_fallback(new_smc);
 	new_smc->fallback_rsn = reason_code;
 	if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
 		if (smc_clc_send_decline(new_smc, reason_code) < 0) {
@@ -1246,7 +1264,7 @@ static void smc_listen_work(struct work_struct *work)
 
 	/* check if peer is smc capable */
 	if (!tcp_sk(newclcsock->sk)->syn_smc) {
-		new_smc->use_fallback = true;
+		smc_switch_to_fallback(new_smc);
 		new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
 		smc_listen_out_connected(new_smc);
 		return;
@@ -1503,7 +1521,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
 	if (msg->msg_flags & MSG_FASTOPEN) {
 		if (sk->sk_state == SMC_INIT) {
-			smc->use_fallback = true;
+			smc_switch_to_fallback(smc);
 			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			rc = -EINVAL;
@@ -1705,7 +1723,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 	case TCP_FASTOPEN_NO_COOKIE:
 		/* option not supported by SMC */
 		if (sk->sk_state == SMC_INIT) {
-			smc->use_fallback = true;
+			smc_switch_to_fallback(smc);
 			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			if (!smc->use_fallback)
-- 
GitLab


From 8ef659f1a840c953a59442ff1400ec73baf3b601 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 11 Apr 2019 11:17:33 +0200
Subject: [PATCH 0994/1861] net/smc: fix return code from FLUSH command

The FLUSH command is used to empty the pnet table. No return code is
expected from the command. Commit a9d8b0b1e3d6 added namespace support
for the pnet table and changed the FLUSH command processing to call
smc_pnet_remove_by_pnetid() to remove the pnet entries. This function
returns -ENOENT when no entry was deleted, which is now the return code
of the FLUSH command. As a result the FLUSH command will return an error
when the pnet table is already empty.
Restore the expected behavior and let FLUSH always return 0.

Fixes: a9d8b0b1e3d6 ("net/smc: add pnet table namespace support")
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_pnet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 8d2f6296279c9..0285c7f9e79b6 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -603,7 +603,8 @@ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
 
-	return smc_pnet_remove_by_pnetid(net, NULL);
+	smc_pnet_remove_by_pnetid(net, NULL);
+	return 0;
 }
 
 /* SMC_PNETID generic netlink operation definition */
-- 
GitLab


From f61bca58f6c36e666c2b807697f25e5e98708162 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Thu, 11 Apr 2019 11:17:34 +0200
Subject: [PATCH 0995/1861] net/smc: move unhash before release of clcsock

Commit <26d92e951fe0>
("net/smc: move unhash as early as possible in smc_release()")
fixes one occurrence in the smc code, but the same pattern exists
in other places. This patch covers the remaining occurrences and
makes sure, the unhash operation is done before the smc->clcsock is
released. This avoids a potential use-after-free in smc_diag_dump().

Reviewed-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index d2a0d15f809ce..6f869ef49b322 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -884,11 +884,11 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 	if  (rc < 0)
 		lsk->sk_err = -rc;
 	if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
+		new_sk->sk_prot->unhash(new_sk);
 		if (new_clcsock)
 			sock_release(new_clcsock);
 		new_sk->sk_state = SMC_CLOSED;
 		sock_set_flag(new_sk, SOCK_DEAD);
-		new_sk->sk_prot->unhash(new_sk);
 		sock_put(new_sk); /* final */
 		*new_smc = NULL;
 		goto out;
@@ -939,11 +939,11 @@ struct sock *smc_accept_dequeue(struct sock *parent,
 
 		smc_accept_unlink(new_sk);
 		if (new_sk->sk_state == SMC_CLOSED) {
+			new_sk->sk_prot->unhash(new_sk);
 			if (isk->clcsock) {
 				sock_release(isk->clcsock);
 				isk->clcsock = NULL;
 			}
-			new_sk->sk_prot->unhash(new_sk);
 			sock_put(new_sk); /* final */
 			continue;
 		}
@@ -973,6 +973,7 @@ void smc_close_non_accepted(struct sock *sk)
 		sock_set_flag(sk, SOCK_DEAD);
 		sk->sk_shutdown |= SHUTDOWN_MASK;
 	}
+	sk->sk_prot->unhash(sk);
 	if (smc->clcsock) {
 		struct socket *tcp;
 
@@ -988,7 +989,6 @@ void smc_close_non_accepted(struct sock *sk)
 			smc_conn_free(&smc->conn);
 	}
 	release_sock(sk);
-	sk->sk_prot->unhash(sk);
 	sock_put(sk); /* final sock_put */
 }
 
-- 
GitLab


From 5ee15c101f29e0093ffb5448773ccbc786eb313b Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Thu, 11 Apr 2019 12:26:32 +0200
Subject: [PATCH 0996/1861] net: thunderx: raise XDP MTU to 1508

The thunderx driver splits frames bigger than 1530 bytes to multiple
pages, making impossible to run an eBPF program on it.
This leads to a maximum MTU of 1508 if QinQ is in use.

The thunderx driver forbids to load an eBPF program if the MTU is higher
than 1500 bytes. Raise the limit to 1508 so it is possible to use L2
protocols which need some more headroom.

Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support")
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 28eac90562113..debc8c861c6bd 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -32,6 +32,13 @@
 #define DRV_NAME	"nicvf"
 #define DRV_VERSION	"1.0"
 
+/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs
+ * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed
+ * this value, keeping headroom for the 14 byte Ethernet header and two
+ * VLAN tags (for QinQ)
+ */
+#define MAX_XDP_MTU	(1530 - ETH_HLEN - VLAN_HLEN * 2)
+
 /* Supported devices */
 static const struct pci_device_id nicvf_id_table[] = {
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
@@ -1830,8 +1837,10 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
 	bool bpf_attached = false;
 	int ret = 0;
 
-	/* For now just support only the usual MTU sized frames */
-	if (prog && (dev->mtu > 1500)) {
+	/* For now just support only the usual MTU sized frames,
+	 * plus some headroom for VLAN, QinQ.
+	 */
+	if (prog && dev->mtu > MAX_XDP_MTU) {
 		netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
 			    dev->mtu);
 		return -EOPNOTSUPP;
-- 
GitLab


From 1f227d16083b2e280b7dde4ca78883d75593f2fd Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Thu, 11 Apr 2019 12:26:33 +0200
Subject: [PATCH 0997/1861] net: thunderx: don't allow jumbo frames with XDP

The thunderx driver forbids to load an eBPF program if the MTU is too high,
but this can be circumvented by loading the eBPF, then raising the MTU.

Fix this by limiting the MTU if an eBPF program is already loaded.

Fixes: 05c773f52b96e ("net: thunderx: Add basic XDP support")
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index debc8c861c6bd..c032bef1b776d 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1589,6 +1589,15 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
 	struct nicvf *nic = netdev_priv(netdev);
 	int orig_mtu = netdev->mtu;
 
+	/* For now just support only the usual MTU sized frames,
+	 * plus some headroom for VLAN, QinQ.
+	 */
+	if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) {
+		netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
+			    netdev->mtu);
+		return -EINVAL;
+	}
+
 	netdev->mtu = new_mtu;
 
 	if (!netif_running(netdev))
-- 
GitLab


From c5b493ce192bd7a4e7bd073b5685aad121eeef82 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Thu, 11 Apr 2019 15:08:25 +0300
Subject: [PATCH 0998/1861] net: bridge: multicast: use rcu to access port list
 from br_multicast_start_querier

br_multicast_start_querier() walks over the port list but it can be
called from a timer with only multicast_lock held which doesn't protect
the port list, so use RCU to walk over it.

Fixes: c83b8fab06fc ("bridge: Restart queries when last querier expires")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 02da21d771c9c..45e7f4173bbaf 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2031,7 +2031,8 @@ static void br_multicast_start_querier(struct net_bridge *br,
 
 	__br_multicast_open(br, query);
 
-	list_for_each_entry(port, &br->port_list, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &br->port_list, list) {
 		if (port->state == BR_STATE_DISABLED ||
 		    port->state == BR_STATE_BLOCKING)
 			continue;
@@ -2043,6 +2044,7 @@ static void br_multicast_start_querier(struct net_bridge *br,
 			br_multicast_enable(&port->ip6_own_query);
 #endif
 	}
+	rcu_read_unlock();
 }
 
 int br_multicast_toggle(struct net_bridge *br, unsigned long val)
-- 
GitLab


From 7c2bd9a39845bfb6d72ddb55ce737650271f6f96 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 30 Mar 2019 10:21:07 +0900
Subject: [PATCH 0999/1861] NFS: Forbid setting AF_INET6 to "struct
 sockaddr_in"->sin_family.

syzbot is reporting uninitialized value at rpc_sockaddr2uaddr() [1]. This
is because syzbot is setting AF_INET6 to "struct sockaddr_in"->sin_family
(which is embedded into user-visible "struct nfs_mount_data" structure)
despite nfs23_validate_mount_data() cannot pass sizeof(struct sockaddr_in6)
bytes of AF_INET6 address to rpc_sockaddr2uaddr().

Since "struct nfs_mount_data" structure is user-visible, we can't change
"struct nfs_mount_data" to use "struct sockaddr_storage". Therefore,
assuming that everybody is using AF_INET family when passing address via
"struct nfs_mount_data"->addr, reject if its sin_family is not AF_INET.

[1] https://syzkaller.appspot.com/bug?id=599993614e7cbbf66bc2656a919ab2a95fb5d75c

Reported-by: syzbot <syzbot+047a11c361b872896a4f@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 23790c7b2289d..c27ac96a95bd3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2041,7 +2041,8 @@ static int nfs23_validate_mount_data(void *options,
 		memcpy(sap, &data->addr, sizeof(data->addr));
 		args->nfs_server.addrlen = sizeof(data->addr);
 		args->nfs_server.port = ntohs(data->addr.sin_port);
-		if (!nfs_verify_server_address(sap))
+		if (sap->sa_family != AF_INET ||
+		    !nfs_verify_server_address(sap))
 			goto out_no_address;
 
 		if (!(data->flags & NFS_MOUNT_TCP))
-- 
GitLab


From 29e7ca715f2a0b6c0a99b1aec1b0956d1f271955 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 9 Apr 2019 10:44:16 -0400
Subject: [PATCH 1000/1861] NFS: Fix handling of reply page vector

NFSv4 GETACL and FS_LOCATIONS requests stopped working in v5.1-rc.

These two need the extra padding to be added directly to the reply
length.

Reported-by: Olga Kornievskaia <aglo@umich.edu>
Fixes: 02ef04e432ba ("NFS: Account for XDR pad of buf->pages")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Olga Kornievskaia <aglo@umich.edu>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4xdr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cfcabc33e24d0..602446158bfb5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2589,7 +2589,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 			ARRAY_SIZE(nfs4_acl_bitmap), &hdr);
 
 	rpc_prepare_reply_pages(req, args->acl_pages, 0,
-				args->acl_len, replen);
+				args->acl_len, replen + 1);
 	encode_nops(&hdr);
 }
 
@@ -2811,7 +2811,7 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 	}
 
 	rpc_prepare_reply_pages(req, (struct page **)&args->page, 0,
-				PAGE_SIZE, replen);
+				PAGE_SIZE, replen + 1);
 	encode_nops(&hdr);
 }
 
-- 
GitLab


From e1ede312f17e96a9c5cda9aaa1cdcf442c1a5da8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 9 Apr 2019 17:04:09 -0400
Subject: [PATCH 1001/1861] xprtrdma: Fix helper that drains the transport

We want to drain only the RQ first. Otherwise the transport can
deadlock on ->close if there are outstanding Send completions.

Fixes: 6d2d0ee27c7a ("xprtrdma: Replace rpcrdma_receive_wq ... ")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org # v5.0+
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprtrdma/verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 89a63391d4d44..30cfc0efe6990 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -90,7 +90,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
 	/* Flush Receives, then wait for deferred Reply work
 	 * to complete.
 	 */
-	ib_drain_qp(ia->ri_id->qp);
+	ib_drain_rq(ia->ri_id->qp);
 	drain_workqueue(buf->rb_completion_wq);
 
 	/* Deferred Reply processing might have scheduled
-- 
GitLab


From 0769663b4f580566ef6cdf366f3073dbe8022c39 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Thu, 11 Apr 2019 14:34:18 -0400
Subject: [PATCH 1002/1861] NFSv4.1 fix incorrect return value in
 copy_file_range

According to the NFSv4.2 spec if the input and output file is the
same file, operation should fail with EINVAL. However, linux
copy_file_range() system call has no such restrictions. Therefore,
in such case let's return EOPNOTSUPP and allow VFS to fallback
to doing do_splice_direct(). Also when copy_file_range is called
on an NFSv4.0 or 4.1 mount (ie., a server that doesn't support
COPY functionality), we also need to return EOPNOTSUPP and
fallback to a regular copy.

Fixes xfstest generic/075, generic/091, generic/112, generic/263
for all NFSv4.x versions.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs42proc.c | 3 ---
 fs/nfs/nfs4file.c  | 4 +++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index ff6f85fb676b7..5196bfa7894d2 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -329,9 +329,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 	};
 	ssize_t err, err2;
 
-	if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
-		return -EOPNOTSUPP;
-
 	src_lock = nfs_get_lock_context(nfs_file_open_context(src));
 	if (IS_ERR(src_lock))
 		return PTR_ERR(src_lock);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 45b2322e092d2..00d17198ee12a 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,8 +133,10 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
 				    struct file *file_out, loff_t pos_out,
 				    size_t count, unsigned int flags)
 {
+	if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
+		return -EOPNOTSUPP;
 	if (file_inode(file_in) == file_inode(file_out))
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
 }
 
-- 
GitLab


From af6b61d7ef58099c82d854395a0e002be6bd036c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 11 Apr 2019 15:16:52 -0400
Subject: [PATCH 1003/1861] Revert "SUNRPC: Micro-optimise when the task is
 known not to be sleeping"

This reverts commit 009a82f6437490c262584d65a14094a818bcb747.

The ability to optimise here relies on compiler being able to optimise
away tail calls to avoid stack overflows. Unfortunately, we are seeing
reports of problems, so let's just revert.

Reported-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |  8 -------
 net/sunrpc/clnt.c            | 45 +++++++-----------------------------
 2 files changed, 8 insertions(+), 45 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ec861cd0cfe8c..52d41d0c1ae1d 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -304,12 +304,4 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
 }
 #endif /* CONFIG_SUNRPC_SWAP */
 
-static inline bool
-rpc_task_need_resched(const struct rpc_task *task)
-{
-	if (RPC_IS_QUEUED(task) || task->tk_callback)
-		return true;
-	return false;
-}
-
 #endif /* _LINUX_SUNRPC_SCHED_H_ */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 187d10443a158..1d0395ef62c95 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1540,7 +1540,6 @@ call_start(struct rpc_task *task)
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 	rpc_task_set_transport(task, clnt);
-	call_reserve(task);
 }
 
 /*
@@ -1554,9 +1553,6 @@ call_reserve(struct rpc_task *task)
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_reserve(task);
-	if (rpc_task_need_resched(task))
-		return;
-	 call_reserveresult(task);
 }
 
 static void call_retry_reserve(struct rpc_task *task);
@@ -1579,7 +1575,6 @@ call_reserveresult(struct rpc_task *task)
 	if (status >= 0) {
 		if (task->tk_rqstp) {
 			task->tk_action = call_refresh;
-			call_refresh(task);
 			return;
 		}
 
@@ -1605,7 +1600,6 @@ call_reserveresult(struct rpc_task *task)
 		/* fall through */
 	case -EAGAIN:	/* woken up; retry */
 		task->tk_action = call_retry_reserve;
-		call_retry_reserve(task);
 		return;
 	case -EIO:	/* probably a shutdown */
 		break;
@@ -1628,9 +1622,6 @@ call_retry_reserve(struct rpc_task *task)
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_retry_reserve(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_reserveresult(task);
 }
 
 /*
@@ -1645,9 +1636,6 @@ call_refresh(struct rpc_task *task)
 	task->tk_status = 0;
 	task->tk_client->cl_stats->rpcauthrefresh++;
 	rpcauth_refreshcred(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_refreshresult(task);
 }
 
 /*
@@ -1666,7 +1654,6 @@ call_refreshresult(struct rpc_task *task)
 	case 0:
 		if (rpcauth_uptodatecred(task)) {
 			task->tk_action = call_allocate;
-			call_allocate(task);
 			return;
 		}
 		/* Use rate-limiting and a max number of retries if refresh
@@ -1685,7 +1672,6 @@ call_refreshresult(struct rpc_task *task)
 		task->tk_cred_retry--;
 		dprintk("RPC: %5u %s: retry refresh creds\n",
 				task->tk_pid, __func__);
-		call_refresh(task);
 		return;
 	}
 	dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
@@ -1711,10 +1697,8 @@ call_allocate(struct rpc_task *task)
 	task->tk_status = 0;
 	task->tk_action = call_encode;
 
-	if (req->rq_buffer) {
-		call_encode(task);
+	if (req->rq_buffer)
 		return;
-	}
 
 	if (proc->p_proc != 0) {
 		BUG_ON(proc->p_arglen == 0);
@@ -1740,12 +1724,8 @@ call_allocate(struct rpc_task *task)
 
 	status = xprt->ops->buf_alloc(task);
 	xprt_inject_disconnect(xprt);
-	if (status == 0) {
-		if (rpc_task_need_resched(task))
-			return;
-		call_encode(task);
+	if (status == 0)
 		return;
-	}
 	if (status != -ENOMEM) {
 		rpc_exit(task, status);
 		return;
@@ -1828,8 +1808,12 @@ call_encode(struct rpc_task *task)
 		xprt_request_enqueue_receive(task);
 	xprt_request_enqueue_transmit(task);
 out:
-	task->tk_action = call_bind;
-	call_bind(task);
+	task->tk_action = call_transmit;
+	/* Check that the connection is OK */
+	if (!xprt_bound(task->tk_xprt))
+		task->tk_action = call_bind;
+	else if (!xprt_connected(task->tk_xprt))
+		task->tk_action = call_connect;
 }
 
 /*
@@ -1847,7 +1831,6 @@ rpc_task_handle_transmitted(struct rpc_task *task)
 {
 	xprt_end_transmit(task);
 	task->tk_action = call_transmit_status;
-	call_transmit_status(task);
 }
 
 /*
@@ -1865,7 +1848,6 @@ call_bind(struct rpc_task *task)
 
 	if (xprt_bound(xprt)) {
 		task->tk_action = call_connect;
-		call_connect(task);
 		return;
 	}
 
@@ -1896,7 +1878,6 @@ call_bind_status(struct rpc_task *task)
 		dprint_status(task);
 		task->tk_status = 0;
 		task->tk_action = call_connect;
-		call_connect(task);
 		return;
 	}
 
@@ -1981,7 +1962,6 @@ call_connect(struct rpc_task *task)
 
 	if (xprt_connected(xprt)) {
 		task->tk_action = call_transmit;
-		call_transmit(task);
 		return;
 	}
 
@@ -2051,7 +2031,6 @@ call_connect_status(struct rpc_task *task)
 	case 0:
 		clnt->cl_stats->netreconn++;
 		task->tk_action = call_transmit;
-		call_transmit(task);
 		return;
 	}
 	rpc_exit(task, status);
@@ -2087,9 +2066,6 @@ call_transmit(struct rpc_task *task)
 		xprt_transmit(task);
 	}
 	xprt_end_transmit(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_transmit_status(task);
 }
 
 /*
@@ -2107,9 +2083,6 @@ call_transmit_status(struct rpc_task *task)
 	if (rpc_task_transmitted(task)) {
 		if (task->tk_status == 0)
 			xprt_request_wait_receive(task);
-		if (rpc_task_need_resched(task))
-			return;
-		call_status(task);
 		return;
 	}
 
@@ -2170,7 +2143,6 @@ call_bc_encode(struct rpc_task *task)
 {
 	xprt_request_enqueue_transmit(task);
 	task->tk_action = call_bc_transmit;
-	call_bc_transmit(task);
 }
 
 /*
@@ -2261,7 +2233,6 @@ call_status(struct rpc_task *task)
 	status = task->tk_status;
 	if (status >= 0) {
 		task->tk_action = call_decode;
-		call_decode(task);
 		return;
 	}
 
-- 
GitLab


From 99834eead2a04e93a120abb112542b87c42ff5e1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 Mar 2019 14:24:48 +0100
Subject: [PATCH 1004/1861] clocksource/drivers/npcm: select TIMER_OF

When this is disabled, we get a link failure:

drivers/clocksource/timer-npcm7xx.o: In function `npcm7xx_timer_init':
timer-npcm7xx.c:(.init.text+0xf): undefined reference to `timer_of_init'

Fixes: 1c00289ecd12 ("clocksource/drivers/npcm: Add NPCM7xx timer driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 171502a356aa1..4b3d143f0f8a4 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -145,6 +145,7 @@ config VT8500_TIMER
 config NPCM7XX_TIMER
 	bool "NPCM7xx timer driver" if COMPILE_TEST
 	depends on HAS_IOMEM
+	select TIMER_OF
 	select CLKSRC_MMIO
 	help
 	  Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture,
-- 
GitLab


From 9155697e20040658438b89e4ceec415ec125f478 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Tue, 5 Mar 2019 12:08:51 -0500
Subject: [PATCH 1005/1861] clocksource/drivers/arm_arch_timer: Remove unneeded
 pr_fmt macro

After this commit ded24019b6b6f(clocksource: arm_arch_timer: clean up
printk usage), the previous macro is redundant, so delete it.

And move the new macro to the previous position.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/arm_arch_timer.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index aa4ec53281cea..ea373cfbcecb5 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -9,7 +9,7 @@
  * published by the Free Software Foundation.
  */
 
-#define pr_fmt(fmt)	"arm_arch_timer: " fmt
+#define pr_fmt(fmt) 	"arch_timer: " fmt
 
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -33,9 +33,6 @@
 
 #include <clocksource/arm_arch_timer.h>
 
-#undef pr_fmt
-#define pr_fmt(fmt) "arch_timer: " fmt
-
 #define CNTTIDR		0x08
 #define CNTTIDR_VIRT(n)	(BIT(1) << ((n) * 4))
 
-- 
GitLab


From fbc87aa0f7c429999dc31f1bac3b2615008cac32 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 12 Mar 2019 11:32:56 +0100
Subject: [PATCH 1006/1861] clocksource/drivers/oxnas: Fix OX820 compatible

The OX820 compatible is wrong is the driver, fix it.

Fixes: 2ea3401e2a84 ("clocksource/drivers/oxnas: Add OX820 compatible")
Reported-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-oxnas-rps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c
index eed6feff8b5f2..30c6f4ce672b3 100644
--- a/drivers/clocksource/timer-oxnas-rps.c
+++ b/drivers/clocksource/timer-oxnas-rps.c
@@ -296,4 +296,4 @@ err_alloc:
 TIMER_OF_DECLARE(ox810se_rps,
 		       "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
 TIMER_OF_DECLARE(ox820_rps,
-		       "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init);
+		       "oxsemi,ox820-rps-timer", oxnas_rps_timer_init);
-- 
GitLab


From e94999688e3aa3c0a8ad5a60352cdc3ca3030434 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 11 Apr 2019 20:17:33 +0200
Subject: [PATCH 1007/1861] PM / Domains: Add genpd governor for CPUs

After some preceding changes, PM domains managed by genpd may contain
CPU devices, so idle state residency values should be taken into
account during the state selection process. [The residency value is
the minimum amount of time to be spent by a CPU (or a group of CPUs)
in an idle state in order to save more energy than could be saved
by picking up a shallower idle state.]

For this purpose, add a new genpd governor, pm_domain_cpu_gov, to be
used for selecting idle states of PM domains with CPU devices attached
either directly or through subdomains.

The new governor computes the minimum expected idle duration for all
online CPUs attached to a PM domain and its subdomains.  Next, it
finds the deepest idle state whose target residency is within the
expected idle duration and selects it as the target idle state of
the domain.

It should be noted that the minimum expected idle duration computation
is based on the closest timer event information stored in the per-CPU
variables cpuidle_devices for all of the CPUs in the domain.  That
needs to be revisited in future, as obviously there are other reasons
why a CPU may be woken up from idle.

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain_governor.c | 67 +++++++++++++++++++++++++++-
 include/linux/pm_domain.h            |  4 ++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 4d07e38a8247f..7912bc9572447 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -10,6 +10,9 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
 #include <linux/hrtimer.h>
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/ktime.h>
 
 static int dev_update_qos_constraint(struct device *dev, void *data)
 {
@@ -210,8 +213,10 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 	struct generic_pm_domain *genpd = pd_to_genpd(pd);
 	struct gpd_link *link;
 
-	if (!genpd->max_off_time_changed)
+	if (!genpd->max_off_time_changed) {
+		genpd->state_idx = genpd->cached_power_down_state_idx;
 		return genpd->cached_power_down_ok;
+	}
 
 	/*
 	 * We have to invalidate the cached results for the masters, so
@@ -236,6 +241,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 		genpd->state_idx--;
 	}
 
+	genpd->cached_power_down_state_idx = genpd->state_idx;
 	return genpd->cached_power_down_ok;
 }
 
@@ -244,6 +250,65 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain)
 	return false;
 }
 
+#ifdef CONFIG_CPU_IDLE
+static bool cpu_power_down_ok(struct dev_pm_domain *pd)
+{
+	struct generic_pm_domain *genpd = pd_to_genpd(pd);
+	struct cpuidle_device *dev;
+	ktime_t domain_wakeup, next_hrtimer;
+	s64 idle_duration_ns;
+	int cpu, i;
+
+	/* Validate dev PM QoS constraints. */
+	if (!default_power_down_ok(pd))
+		return false;
+
+	if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
+		return true;
+
+	/*
+	 * Find the next wakeup for any of the online CPUs within the PM domain
+	 * and its subdomains. Note, we only need the genpd->cpus, as it already
+	 * contains a mask of all CPUs from subdomains.
+	 */
+	domain_wakeup = ktime_set(KTIME_SEC_MAX, 0);
+	for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) {
+		dev = per_cpu(cpuidle_devices, cpu);
+		if (dev) {
+			next_hrtimer = READ_ONCE(dev->next_hrtimer);
+			if (ktime_before(next_hrtimer, domain_wakeup))
+				domain_wakeup = next_hrtimer;
+		}
+	}
+
+	/* The minimum idle duration is from now - until the next wakeup. */
+	idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get()));
+	if (idle_duration_ns <= 0)
+		return false;
+
+	/*
+	 * Find the deepest idle state that has its residency value satisfied
+	 * and by also taking into account the power off latency for the state.
+	 * Start at the state picked by the dev PM QoS constraint validation.
+	 */
+	i = genpd->state_idx;
+	do {
+		if (idle_duration_ns >= (genpd->states[i].residency_ns +
+		    genpd->states[i].power_off_latency_ns)) {
+			genpd->state_idx = i;
+			return true;
+		}
+	} while (--i >= 0);
+
+	return false;
+}
+
+struct dev_power_governor pm_domain_cpu_gov = {
+	.suspend_ok = default_suspend_ok,
+	.power_down_ok = cpu_power_down_ok,
+};
+#endif
+
 struct dev_power_governor simple_qos_governor = {
 	.suspend_ok = default_suspend_ok,
 	.power_down_ok = default_power_down_ok,
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index a6e251fe9deb7..bc82e74560ee2 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -118,6 +118,7 @@ struct generic_pm_domain {
 	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
 	bool max_off_time_changed;
 	bool cached_power_down_ok;
+	bool cached_power_down_state_idx;
 	int (*attach_dev)(struct generic_pm_domain *domain,
 			  struct device *dev);
 	void (*detach_dev)(struct generic_pm_domain *domain,
@@ -202,6 +203,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
+#ifdef CONFIG_CPU_IDLE
+extern struct dev_power_governor pm_domain_cpu_gov;
+#endif
 #else
 
 static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
-- 
GitLab


From d7c3a206e6338e4ccdf030719dec028e26a521d5 Mon Sep 17 00:00:00 2001
From: Andy Duan <fugang.duan@nxp.com>
Date: Tue, 9 Apr 2019 03:40:56 +0000
Subject: [PATCH 1008/1861] net: fec: manage ahb clock in runtime pm

Some SOC like i.MX6SX clock have some limits:
- ahb clock should be disabled before ipg.
- ahb and ipg clocks are required for MAC MII bus.
So, move the ahb clock to runtime management together with
ipg clock.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 30 ++++++++++++++++-------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 697c2427f2b70..a96ad20ee4843 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1840,13 +1840,9 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
 	int ret;
 
 	if (enable) {
-		ret = clk_prepare_enable(fep->clk_ahb);
-		if (ret)
-			return ret;
-
 		ret = clk_prepare_enable(fep->clk_enet_out);
 		if (ret)
-			goto failed_clk_enet_out;
+			return ret;
 
 		if (fep->clk_ptp) {
 			mutex_lock(&fep->ptp_clk_mutex);
@@ -1866,7 +1862,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
 
 		phy_reset_after_clk_enable(ndev->phydev);
 	} else {
-		clk_disable_unprepare(fep->clk_ahb);
 		clk_disable_unprepare(fep->clk_enet_out);
 		if (fep->clk_ptp) {
 			mutex_lock(&fep->ptp_clk_mutex);
@@ -1885,8 +1880,6 @@ failed_clk_ref:
 failed_clk_ptp:
 	if (fep->clk_enet_out)
 		clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
-		clk_disable_unprepare(fep->clk_ahb);
 
 	return ret;
 }
@@ -3470,6 +3463,9 @@ fec_probe(struct platform_device *pdev)
 	ret = clk_prepare_enable(fep->clk_ipg);
 	if (ret)
 		goto failed_clk_ipg;
+	ret = clk_prepare_enable(fep->clk_ahb);
+	if (ret)
+		goto failed_clk_ahb;
 
 	fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy");
 	if (!IS_ERR(fep->reg_phy)) {
@@ -3563,6 +3559,9 @@ failed_reset:
 	pm_runtime_put(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 failed_regulator:
+	clk_disable_unprepare(fep->clk_ahb);
+failed_clk_ahb:
+	clk_disable_unprepare(fep->clk_ipg);
 failed_clk_ipg:
 	fec_enet_clk_enable(ndev, false);
 failed_clk:
@@ -3686,6 +3685,7 @@ static int __maybe_unused fec_runtime_suspend(struct device *dev)
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
 
+	clk_disable_unprepare(fep->clk_ahb);
 	clk_disable_unprepare(fep->clk_ipg);
 
 	return 0;
@@ -3695,8 +3695,20 @@ static int __maybe_unused fec_runtime_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	int ret;
 
-	return clk_prepare_enable(fep->clk_ipg);
+	ret = clk_prepare_enable(fep->clk_ahb);
+	if (ret)
+		return ret;
+	ret = clk_prepare_enable(fep->clk_ipg);
+	if (ret)
+		goto failed_clk_ipg;
+
+	return 0;
+
+failed_clk_ipg:
+	clk_disable_unprepare(fep->clk_ahb);
+	return ret;
 }
 
 static const struct dev_pm_ops fec_pm_ops = {
-- 
GitLab


From d3706566ae3d92677b932dd156157fd6c72534b1 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 9 Apr 2019 19:53:55 +0800
Subject: [PATCH 1009/1861] net: netrom: Fix error cleanup path of
 nr_proto_init

Syzkaller report this:

BUG: unable to handle kernel paging request at fffffbfff830524b
PGD 237fe8067 P4D 237fe8067 PUD 237e64067 PMD 1c9716067 PTE 0
Oops: 0000 [#1] SMP KASAN PTI
CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 5.0.0+ #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
RIP: 0010:__list_add_valid+0x21/0xe0 lib/list_debug.c:23
Code: 8b 0c 24 e9 17 fd ff ff 90 55 48 89 fd 48 8d 7a 08 53 48 89 d3 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 48 83 ec 08 <80> 3c 02 00 0f 85 8b 00 00 00 48 8b 53 08 48 39 f2 75 35 48 89 f2
RSP: 0018:ffff8881ea2278d0 EFLAGS: 00010282
RAX: dffffc0000000000 RBX: ffffffffc1829250 RCX: 1ffff1103d444ef4
RDX: 1ffffffff830524b RSI: ffffffff85659300 RDI: ffffffffc1829258
RBP: ffffffffc1879250 R08: fffffbfff0acb269 R09: fffffbfff0acb269
R10: ffff8881ea2278f0 R11: fffffbfff0acb268 R12: ffffffffc1829250
R13: dffffc0000000000 R14: 0000000000000008 R15: ffffffffc187c830
FS:  00007fe0361df700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: fffffbfff830524b CR3: 00000001eb39a001 CR4: 00000000007606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 __list_add include/linux/list.h:60 [inline]
 list_add include/linux/list.h:79 [inline]
 proto_register+0x444/0x8f0 net/core/sock.c:3375
 nr_proto_init+0x73/0x4b3 [netrom]
 ? 0xffffffffc1628000
 ? 0xffffffffc1628000
 do_one_initcall+0xbc/0x47d init/main.c:887
 do_init_module+0x1b5/0x547 kernel/module.c:3456
 load_module+0x6405/0x8c10 kernel/module.c:3804
 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898
 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fe0361dec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99
RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003
RBP: 00007fe0361dec70 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0361df6bc
R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004
Modules linked in: netrom(+) ax25 fcrypt pcbc af_alg arizona_ldo1 v4l2_common videodev media v4l2_dv_timings hdlc ide_cd_mod snd_soc_sigmadsp_regmap snd_soc_sigmadsp intel_spi_platform intel_spi mtd spi_nor snd_usbmidi_lib usbcore lcd ti_ads7950 hi6421_regulator snd_soc_kbl_rt5663_max98927 snd_soc_hdac_hdmi snd_hda_ext_core snd_hda_core snd_soc_rt5663 snd_soc_core snd_pcm_dmaengine snd_compress snd_soc_rl6231 mac80211 rtc_rc5t583 spi_slave_time leds_pwm hid_gt683r hid industrialio_triggered_buffer kfifo_buf industrialio ir_kbd_i2c rc_core led_class_flash dwc_xlgmac snd_ymfpci gameport snd_mpu401_uart snd_rawmidi snd_ac97_codec snd_pcm ac97_bus snd_opl3_lib snd_timer snd_seq_device snd_hwdep snd soundcore iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan
 bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel ide_pci_generic piix aesni_intel aes_x86_64 crypto_simd cryptd glue_helper ide_core psmouse input_leds i2c_piix4 serio_raw intel_agp intel_gtt ata_generic agpgart pata_acpi parport_pc rtc_cmos parport floppy sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: rxrpc]
Dumping ftrace buffer:
   (ftrace buffer empty)
CR2: fffffbfff830524b
---[ end trace 039ab24b305c4b19 ]---

If nr_proto_init failed, it may forget to call proto_unregister,
tiggering this issue.This patch rearrange code of nr_proto_init
to avoid such issues.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netrom.h           |  2 +-
 net/netrom/af_netrom.c         | 76 ++++++++++++++++++++++++----------
 net/netrom/nr_loopback.c       |  2 +-
 net/netrom/nr_route.c          |  2 +-
 net/netrom/sysctl_net_netrom.c |  5 ++-
 5 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/include/net/netrom.h b/include/net/netrom.h
index 5a0714ff500fd..80f15b1c1a489 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -266,7 +266,7 @@ void nr_stop_idletimer(struct sock *);
 int nr_t1timer_running(struct sock *);
 
 /* sysctl_net_netrom.c */
-void nr_register_sysctl(void);
+int nr_register_sysctl(void);
 void nr_unregister_sysctl(void);
 
 #endif
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 1d3144d199035..71ffd1a6dc7c6 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1392,18 +1392,22 @@ static int __init nr_proto_init(void)
 	int i;
 	int rc = proto_register(&nr_proto, 0);
 
-	if (rc != 0)
-		goto out;
+	if (rc)
+		return rc;
 
 	if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n");
-		return -1;
+		pr_err("NET/ROM: %s - nr_ndevs parameter too large\n",
+		       __func__);
+		rc = -EINVAL;
+		goto unregister_proto;
 	}
 
 	dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL);
-	if (dev_nr == NULL) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
-		return -1;
+	if (!dev_nr) {
+		pr_err("NET/ROM: %s - unable to allocate device array\n",
+		       __func__);
+		rc = -ENOMEM;
+		goto unregister_proto;
 	}
 
 	for (i = 0; i < nr_ndevs; i++) {
@@ -1413,13 +1417,13 @@ static int __init nr_proto_init(void)
 		sprintf(name, "nr%d", i);
 		dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
 		if (!dev) {
-			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
+			rc = -ENOMEM;
 			goto fail;
 		}
 
 		dev->base_addr = i;
-		if (register_netdev(dev)) {
-			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n");
+		rc = register_netdev(dev);
+		if (rc) {
 			free_netdev(dev);
 			goto fail;
 		}
@@ -1427,36 +1431,64 @@ static int __init nr_proto_init(void)
 		dev_nr[i] = dev;
 	}
 
-	if (sock_register(&nr_family_ops)) {
-		printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n");
+	rc = sock_register(&nr_family_ops);
+	if (rc)
 		goto fail;
-	}
 
-	register_netdevice_notifier(&nr_dev_notifier);
+	rc = register_netdevice_notifier(&nr_dev_notifier);
+	if (rc)
+		goto out_sock;
 
 	ax25_register_pid(&nr_pid);
 	ax25_linkfail_register(&nr_linkfail_notifier);
 
 #ifdef CONFIG_SYSCTL
-	nr_register_sysctl();
+	rc = nr_register_sysctl();
+	if (rc)
+		goto out_sysctl;
 #endif
 
 	nr_loopback_init();
 
-	proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops);
-	proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops);
-	proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops);
-out:
-	return rc;
+	rc = -ENOMEM;
+	if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops))
+		goto proc_remove1;
+	if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net,
+			     &nr_neigh_seqops))
+		goto proc_remove2;
+	if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net,
+			     &nr_node_seqops))
+		goto proc_remove3;
+
+	return 0;
+
+proc_remove3:
+	remove_proc_entry("nr_neigh", init_net.proc_net);
+proc_remove2:
+	remove_proc_entry("nr", init_net.proc_net);
+proc_remove1:
+
+	nr_loopback_clear();
+	nr_rt_free();
+
+#ifdef CONFIG_SYSCTL
+	nr_unregister_sysctl();
+out_sysctl:
+#endif
+	ax25_linkfail_release(&nr_linkfail_notifier);
+	ax25_protocol_release(AX25_P_NETROM);
+	unregister_netdevice_notifier(&nr_dev_notifier);
+out_sock:
+	sock_unregister(PF_NETROM);
 fail:
 	while (--i >= 0) {
 		unregister_netdev(dev_nr[i]);
 		free_netdev(dev_nr[i]);
 	}
 	kfree(dev_nr);
+unregister_proto:
 	proto_unregister(&nr_proto);
-	rc = -1;
-	goto out;
+	return rc;
 }
 
 module_init(nr_proto_init);
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 215ad22a96476..93d13f0199813 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused)
 	}
 }
 
-void __exit nr_loopback_clear(void)
+void nr_loopback_clear(void)
 {
 	del_timer_sync(&loopback_timer);
 	skb_queue_purge(&loopback_queue);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 6485f593e2f09..b76aa668a94bc 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = {
 /*
  *	Free all memory associated with the nodes and routes lists.
  */
-void __exit nr_rt_free(void)
+void nr_rt_free(void)
 {
 	struct nr_neigh *s = NULL;
 	struct nr_node  *t = NULL;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index ba1c368b3f186..771011b84270e 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = {
 	{ }
 };
 
-void __init nr_register_sysctl(void)
+int __init nr_register_sysctl(void)
 {
 	nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table);
+	if (!nr_table_header)
+		return -ENOMEM;
+	return 0;
 }
 
 void nr_unregister_sysctl(void)
-- 
GitLab


From a5f622984a623df9a84cf43f6b098d8dd76fbe05 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 9 Apr 2019 14:23:10 -0700
Subject: [PATCH 1010/1861] selftests: fib_tests: Fix 'Command line is not
 complete' errors

A couple of tests are verifying a route has been removed. The helper
expects the prefix as the first part of the expected output. When
checking that a route has been deleted the prefix is empty leading
to an invalid ip command:

  $ ip ro ls match
  Command line is not complete. Try option "help"

Fix by moving the comparison of expected output and output to a new
function that is used by both check_route and check_route6. Use the
new helper for the 2 checks on route removal.

Also, remove the reset of 'set -x' in route_setup which overrides the
user managed setting.

Fixes: d69faad76584c ("selftests: fib_tests: Add prefix route tests with metric")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 94 ++++++++++--------------
 1 file changed, 40 insertions(+), 54 deletions(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 1080ff55a788f..0d2a5f4f1e638 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -605,6 +605,39 @@ run_cmd()
 	return $rc
 }
 
+check_expected()
+{
+	local out="$1"
+	local expected="$2"
+	local rc=0
+
+	[ "${out}" = "${expected}" ] && return 0
+
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo route entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	# tricky way to convert output to 1-line without ip's
+	# messy '\'; this drops all extra white space
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected route entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
 # add route for a prefix, flushing any existing routes first
 # expected to be the first step of a test
 add_route6()
@@ -652,31 +685,7 @@ check_route6()
 	pfx=$1
 
 	out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
-	[ "${out}" = "${expected}" ] && return 0
-
-	if [ -z "${out}" ]; then
-		if [ "$VERBOSE" = "1" ]; then
-			printf "\nNo route entry found\n"
-			printf "Expected:\n"
-			printf "    ${expected}\n"
-		fi
-		return 1
-	fi
-
-	# tricky way to convert output to 1-line without ip's
-	# messy '\'; this drops all extra white space
-	out=$(echo ${out})
-	if [ "${out}" != "${expected}" ]; then
-		rc=1
-		if [ "${VERBOSE}" = "1" ]; then
-			printf "    Unexpected route entry. Have:\n"
-			printf "        ${out}\n"
-			printf "    Expected:\n"
-			printf "        ${expected}\n\n"
-		fi
-	fi
-
-	return $rc
+	check_expected "${out}" "${expected}"
 }
 
 route_cleanup()
@@ -725,7 +734,7 @@ route_setup()
 	ip -netns ns2 addr add 172.16.103.2/24 dev veth4
 	ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
 
-	set +ex
+	set +e
 }
 
 # assumption is that basic add of a single path route works
@@ -960,7 +969,8 @@ ipv6_addr_metric_test()
 	run_cmd "$IP li set dev dummy2 down"
 	rc=$?
 	if [ $rc -eq 0 ]; then
-		check_route6 ""
+		out=$($IP -6 ro ls match 2001:db8:104::/64)
+		check_expected "${out}" ""
 		rc=$?
 	fi
 	log_test $rc 0 "Prefix route removed on link down"
@@ -1091,38 +1101,13 @@ check_route()
 	local pfx
 	local expected="$1"
 	local out
-	local rc=0
 
 	set -- $expected
 	pfx=$1
 	[ "${pfx}" = "unreachable" ] && pfx=$2
 
 	out=$($IP ro ls match ${pfx})
-	[ "${out}" = "${expected}" ] && return 0
-
-	if [ -z "${out}" ]; then
-		if [ "$VERBOSE" = "1" ]; then
-			printf "\nNo route entry found\n"
-			printf "Expected:\n"
-			printf "    ${expected}\n"
-		fi
-		return 1
-	fi
-
-	# tricky way to convert output to 1-line without ip's
-	# messy '\'; this drops all extra white space
-	out=$(echo ${out})
-	if [ "${out}" != "${expected}" ]; then
-		rc=1
-		if [ "${VERBOSE}" = "1" ]; then
-			printf "    Unexpected route entry. Have:\n"
-			printf "        ${out}\n"
-			printf "    Expected:\n"
-			printf "        ${expected}\n\n"
-		fi
-	fi
-
-	return $rc
+	check_expected "${out}" "${expected}"
 }
 
 # assumption is that basic add of a single path route works
@@ -1387,7 +1372,8 @@ ipv4_addr_metric_test()
 	run_cmd "$IP li set dev dummy2 down"
 	rc=$?
 	if [ $rc -eq 0 ]; then
-		check_route ""
+		out=$($IP ro ls match 172.16.104.0/24)
+		check_expected "${out}" ""
 		rc=$?
 	fi
 	log_test $rc 0 "Prefix route removed on link down"
-- 
GitLab


From a89afe58f1a74aac768a5eb77af95ef4ee15beaa Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 12 Apr 2019 10:09:16 +0800
Subject: [PATCH 1011/1861] block: fix the return errno for direct IO

If the last bio returned is not dio->bio, the status of the bio will
not assigned to dio->bio if it is error. This will cause the whole IO
status wrong.

    ksoftirqd/21-117   [021] ..s.  4017.966090:   8,0    C   N 4883648 [0]
          <idle>-0     [018] ..s.  4017.970888:   8,0    C  WS 4924800 + 1024 [0]
          <idle>-0     [018] ..s.  4017.970909:   8,0    D  WS 4935424 + 1024 [<idle>]
          <idle>-0     [018] ..s.  4017.970924:   8,0    D  WS 4936448 + 321 [<idle>]
    ksoftirqd/21-117   [021] ..s.  4017.995033:   8,0    C   R 4883648 + 336 [65475]
    ksoftirqd/21-117   [021] d.s.  4018.001988: myprobe1: (blkdev_bio_end_io+0x0/0x168) bi_status=7
    ksoftirqd/21-117   [021] d.s.  4018.001992: myprobe: (aio_complete_rw+0x0/0x148) x0=0xffff802f2595ad80 res=0x12a000 res2=0x0

We always have to assign bio->bi_status to dio->bio.bi_status because we
will only check dio->bio.bi_status when we return the whole IO to
the upper layer.

Fixes: 542ff7bf18c6 ("block: new direct I/O implementation")
Cc: stable@vger.kernel.org
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 78d3257435c00..24615c76c1d0e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -307,10 +307,10 @@ static void blkdev_bio_end_io(struct bio *bio)
 	struct blkdev_dio *dio = bio->bi_private;
 	bool should_dirty = dio->should_dirty;
 
-	if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
-		if (bio->bi_status && !dio->bio.bi_status)
-			dio->bio.bi_status = bio->bi_status;
-	} else {
+	if (bio->bi_status && !dio->bio.bi_status)
+		dio->bio.bi_status = bio->bi_status;
+
+	if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
 		if (!dio->is_sync) {
 			struct kiocb *iocb = dio->iocb;
 			ssize_t ret;
-- 
GitLab


From e3058450965972e67cc0e5492c08c4cdadafc134 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 11 Apr 2019 05:55:23 -0700
Subject: [PATCH 1012/1861] dctcp: more accurate tracking of packets delivery

After commit e21db6f69a95 ("tcp: track total bytes delivered with ECN CE marks")
core TCP stack does a very good job tracking ECN signals.

The "sender's best estimate of CE information" Yuchung mentioned in his
patch is indeed the best we can do.

DCTCP can use tp->delivered_ce and tp->delivered to not duplicate the logic,
and use the existing best estimate.

This solves some problems, since current DCTCP logic does not deal with losses
and/or GRO or ack aggregation very well.

This also removes a dubious use of inet_csk(sk)->icsk_ack.rcv_mss
(this should have been tp->mss_cache), and a 64 bit divide.

Finally, we can see that the DCTCP logic, calling dctcp_update_alpha() for
every ACK could be done differently, calling it only once per RTT.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Lawrence Brakmo <brakmo@fb.com>
Cc: Abdul Kabbani <akabbani@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_dctcp.c | 45 +++++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 28 deletions(-)

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 359da68d7c062..477cb4aa456c1 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -49,9 +49,8 @@
 #define DCTCP_MAX_ALPHA	1024U
 
 struct dctcp {
-	u32 acked_bytes_ecn;
-	u32 acked_bytes_total;
-	u32 prior_snd_una;
+	u32 old_delivered;
+	u32 old_delivered_ce;
 	u32 prior_rcv_nxt;
 	u32 dctcp_alpha;
 	u32 next_seq;
@@ -73,8 +72,8 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
 {
 	ca->next_seq = tp->snd_nxt;
 
-	ca->acked_bytes_ecn = 0;
-	ca->acked_bytes_total = 0;
+	ca->old_delivered = tp->delivered;
+	ca->old_delivered_ce = tp->delivered_ce;
 }
 
 static void dctcp_init(struct sock *sk)
@@ -86,7 +85,6 @@ static void dctcp_init(struct sock *sk)
 	     sk->sk_state == TCP_CLOSE)) {
 		struct dctcp *ca = inet_csk_ca(sk);
 
-		ca->prior_snd_una = tp->snd_una;
 		ca->prior_rcv_nxt = tp->rcv_nxt;
 
 		ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
@@ -118,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct dctcp *ca = inet_csk_ca(sk);
-	u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
-
-	/* If ack did not advance snd_una, count dupack as MSS size.
-	 * If ack did update window, do not count it at all.
-	 */
-	if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
-		acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
-	if (acked_bytes) {
-		ca->acked_bytes_total += acked_bytes;
-		ca->prior_snd_una = tp->snd_una;
-
-		if (flags & CA_ACK_ECE)
-			ca->acked_bytes_ecn += acked_bytes;
-	}
 
 	/* Expired RTT */
 	if (!before(tp->snd_una, ca->next_seq)) {
-		u64 bytes_ecn = ca->acked_bytes_ecn;
+		u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
 		u32 alpha = ca->dctcp_alpha;
 
 		/* alpha = (1 - g) * alpha + g * F */
 
 		alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
-		if (bytes_ecn) {
+		if (delivered_ce) {
+			u32 delivered = tp->delivered - ca->old_delivered;
+
 			/* If dctcp_shift_g == 1, a 32bit value would overflow
-			 * after 8 Mbytes.
+			 * after 8 M packets.
 			 */
-			bytes_ecn <<= (10 - dctcp_shift_g);
-			do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
+			delivered_ce <<= (10 - dctcp_shift_g);
+			delivered_ce /= max(1U, delivered);
 
-			alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
+			alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
 		}
 		/* dctcp_alpha can be read from dctcp_get_info() without
 		 * synchro, so we ask compiler to not use dctcp_alpha
@@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
 			     union tcp_cc_info *info)
 {
 	const struct dctcp *ca = inet_csk_ca(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Fill it also in case of VEGASINFO due to req struct limits.
 	 * We can still correctly retrieve it later.
@@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
 			info->dctcp.dctcp_enabled = 1;
 			info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
 			info->dctcp.dctcp_alpha = ca->dctcp_alpha;
-			info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
-			info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
+			info->dctcp.dctcp_ab_ecn = tp->mss_cache *
+						   (tp->delivered_ce - ca->old_delivered_ce);
+			info->dctcp.dctcp_ab_tot = tp->mss_cache *
+						   (tp->delivered - ca->old_delivered);
 		}
 
 		*attr = INET_DIAG_DCTCPINFO;
-- 
GitLab


From d15d9fd02575ecfada92d42f655940c4f10af842 Mon Sep 17 00:00:00 2001
From: Jonas Karlman <jonas@kwiboo.se>
Date: Wed, 20 Feb 2019 07:52:31 +0000
Subject: [PATCH 1013/1861] drm: bridge: dw-hdmi: Fix overflow workaround for
 Rockchip SoCs

The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have
also been identified as needing this workaround with a single iteration.

Fixes: be41fc55f1aa ("drm: bridge: dw-hdmi: Handle overflow workaround based on device version")
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Tested-by: Heiko Stueber <heiko@sntech.de>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/AM3PR03MB0966818FAAAE6192FF4ED11AAC7D0@AM3PR03MB0966.eurprd03.prod.outlook.com
---
 drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index db761329a1e3e..b74ccb6a24c2b 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1800,6 +1800,8 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi)
 	 * iteration for others.
 	 * The Amlogic Meson GX SoCs (v2.01a) have been identified as needing
 	 * the workaround with a single iteration.
+	 * The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have
+	 * been identified as needing the workaround with a single iteration.
 	 */
 
 	switch (hdmi->version) {
@@ -1808,7 +1810,9 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi)
 		break;
 	case 0x131a:
 	case 0x132a:
+	case 0x200a:
 	case 0x201a:
+	case 0x211a:
 	case 0x212a:
 		count = 1;
 		break;
-- 
GitLab


From 1d54ad944074010609562da5c89e4f5df2f4e5db Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 4 Apr 2019 15:03:00 +0200
Subject: [PATCH 1014/1861] perf/core: Fix perf_event_disable_inatomic() race

Thomas-Mich Richter reported he triggered a WARN()ing from event_function_local()
on his s390. The problem boils down to:

	CPU-A				CPU-B

	perf_event_overflow()
	  perf_event_disable_inatomic()
	    @pending_disable = 1
	    irq_work_queue();

	sched-out
	  event_sched_out()
	    @pending_disable = 0

					sched-in
					perf_event_overflow()
					  perf_event_disable_inatomic()
					    @pending_disable = 1;
					    irq_work_queue(); // FAILS

	irq_work_run()
	  perf_pending_event()
	    if (@pending_disable)
	      perf_event_disable_local(); // WHOOPS

The problem exists in generic, but s390 is particularly sensitive
because it doesn't implement arch_irq_work_raise(), nor does it call
irq_work_run() from it's PMU interrupt handler (nor would that be
sufficient in this case, because s390 also generates
perf_event_overflow() from pmu::stop). Add to that the fact that s390
is a virtual architecture and (virtual) CPU-A can stall long enough
for the above race to happen, even if it would self-IPI.

Adding a irq_work_sync() to event_sched_in() would work for all hardare
PMUs that properly use irq_work_run() but fails for software PMUs.

Instead encode the CPU number in @pending_disable, such that we can
tell which CPU requested the disable. This then allows us to detect
the above scenario and even redirect the IPI to make up for the failed
queue.

Reported-by: Thomas-Mich Richter <tmricht@linux.ibm.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c        | 52 ++++++++++++++++++++++++++++++-------
 kernel/events/ring_buffer.c |  4 +--
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72d06e302e993..534e01e7bc368 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2009,8 +2009,8 @@ event_sched_out(struct perf_event *event,
 	event->pmu->del(event, 0);
 	event->oncpu = -1;
 
-	if (event->pending_disable) {
-		event->pending_disable = 0;
+	if (READ_ONCE(event->pending_disable) >= 0) {
+		WRITE_ONCE(event->pending_disable, -1);
 		state = PERF_EVENT_STATE_OFF;
 	}
 	perf_event_set_state(event, state);
@@ -2198,7 +2198,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
 
 void perf_event_disable_inatomic(struct perf_event *event)
 {
-	event->pending_disable = 1;
+	WRITE_ONCE(event->pending_disable, smp_processor_id());
+	/* can fail, see perf_pending_event_disable() */
 	irq_work_queue(&event->pending);
 }
 
@@ -5810,10 +5811,45 @@ void perf_event_wakeup(struct perf_event *event)
 	}
 }
 
+static void perf_pending_event_disable(struct perf_event *event)
+{
+	int cpu = READ_ONCE(event->pending_disable);
+
+	if (cpu < 0)
+		return;
+
+	if (cpu == smp_processor_id()) {
+		WRITE_ONCE(event->pending_disable, -1);
+		perf_event_disable_local(event);
+		return;
+	}
+
+	/*
+	 *  CPU-A			CPU-B
+	 *
+	 *  perf_event_disable_inatomic()
+	 *    @pending_disable = CPU-A;
+	 *    irq_work_queue();
+	 *
+	 *  sched-out
+	 *    @pending_disable = -1;
+	 *
+	 *				sched-in
+	 *				perf_event_disable_inatomic()
+	 *				  @pending_disable = CPU-B;
+	 *				  irq_work_queue(); // FAILS
+	 *
+	 *  irq_work_run()
+	 *    perf_pending_event()
+	 *
+	 * But the event runs on CPU-B and wants disabling there.
+	 */
+	irq_work_queue_on(&event->pending, cpu);
+}
+
 static void perf_pending_event(struct irq_work *entry)
 {
-	struct perf_event *event = container_of(entry,
-			struct perf_event, pending);
+	struct perf_event *event = container_of(entry, struct perf_event, pending);
 	int rctx;
 
 	rctx = perf_swevent_get_recursion_context();
@@ -5822,10 +5858,7 @@ static void perf_pending_event(struct irq_work *entry)
 	 * and we won't recurse 'further'.
 	 */
 
-	if (event->pending_disable) {
-		event->pending_disable = 0;
-		perf_event_disable_local(event);
-	}
+	perf_pending_event_disable(event);
 
 	if (event->pending_wakeup) {
 		event->pending_wakeup = 0;
@@ -10236,6 +10269,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 
 	init_waitqueue_head(&event->waitq);
+	event->pending_disable = -1;
 	init_irq_work(&event->pending, perf_pending_event);
 
 	mutex_init(&event->mmap_mutex);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index a4047321d7d80..2545ac08cc77b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -392,7 +392,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 		 * store that will be enabled on successful return
 		 */
 		if (!handle->size) { /* A, matches D */
-			event->pending_disable = 1;
+			event->pending_disable = smp_processor_id();
 			perf_output_wakeup(handle);
 			local_set(&rb->aux_nest, 0);
 			goto err_put;
@@ -480,7 +480,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
 
 	if (wakeup) {
 		if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
-			handle->event->pending_disable = 1;
+			handle->event->pending_disable = smp_processor_id();
 		perf_output_wakeup(handle);
 	}
 
-- 
GitLab


From 837f74116585dcd235fae1696e1e1471b6bb9e01 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 9 Apr 2019 17:16:59 +0200
Subject: [PATCH 1015/1861] xfrm: update doc about xfrm[46]_gc_thresh

Those entries are not used anymore.

CC: Florian Westphal <fw@strlen.de>
Fixes: 09c7570480f7 ("xfrm: remove flow cache")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 Documentation/networking/ip-sysctl.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index acdfb5d2bcaa4..f1843b1324535 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1336,6 +1336,7 @@ tag - INTEGER
 	Default value is 0.
 
 xfrm4_gc_thresh - INTEGER
+	(Obsolete since linux-4.14)
 	The threshold at which we will start garbage collecting for IPv4
 	destination cache entries.  At twice this value the system will
 	refuse new allocations.
@@ -1919,6 +1920,7 @@ echo_ignore_all - BOOLEAN
 	Default: 0
 
 xfrm6_gc_thresh - INTEGER
+	(Obsolete since linux-4.14)
 	The threshold at which we will start garbage collecting for IPv6
 	destination cache entries.  At twice this value the system will
 	refuse new allocations.
-- 
GitLab


From 13e962140be671f31a011543f11477af67a6c33e Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 2 Apr 2019 21:38:32 +0800
Subject: [PATCH 1016/1861] ACPI: button: reinitialize button state upon resume

With commit dfa46c50f65b ("ACPI / button: Fix an issue in
button.lid_init_state=ignore mode"), the lid device is considered to be
not compliant to SW_LID if the Lid state is unchanged when updating it.

This is not wrong, but we overlooked the resume case, where Lid state is
updated unconditionally in the button driver .resume() callback. And this
results in warning message "ACPI: button: The lid device is not compliant
to  SW_LID." after resume, if the machine is suspended with Lid opened and
then resumed with Lid opened.

Fix this by flushing the cached lid state before updating the Lid device
in .resume() callback.

Fixes: dfa46c50f65b ("ACPI / button: Fix an issue in button.lid_init_state=ignore mode")
Reported-and-tested-by: Zhao Lijian <lijian.zhao@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/button.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index a19ff3977ac4a..623998a8d722b 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -456,8 +456,11 @@ static int acpi_button_resume(struct device *dev)
 	struct acpi_button *button = acpi_driver_data(device);
 
 	button->suspended = false;
-	if (button->type == ACPI_BUTTON_TYPE_LID && button->input->users)
+	if (button->type == ACPI_BUTTON_TYPE_LID && button->input->users) {
+		button->last_state = !!acpi_lid_evaluate_state(device);
+		button->last_time = ktime_get();
 		acpi_lid_initialize_state(device);
+	}
 	return 0;
 }
 #endif
-- 
GitLab


From e720a6c8fbdb86dcbb493d432e632dfafe6381cc Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 10 Apr 2019 10:20:21 +0200
Subject: [PATCH 1017/1861] drivers: firmware: psci: Move psci to separate
 directory

Some following changes extends the PSCI driver with some additional
files.  Avoid to continue cluttering the toplevel firmware directory
and first move the PSCI files into a PSCI sub-directory.

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/firmware/Kconfig                   | 15 +--------------
 drivers/firmware/Makefile                  |  3 +--
 drivers/firmware/psci/Kconfig              | 13 +++++++++++++
 drivers/firmware/psci/Makefile             |  4 ++++
 drivers/firmware/{ => psci}/psci.c         |  0
 drivers/firmware/{ => psci}/psci_checker.c |  0
 6 files changed, 19 insertions(+), 16 deletions(-)
 create mode 100644 drivers/firmware/psci/Kconfig
 create mode 100644 drivers/firmware/psci/Makefile
 rename drivers/firmware/{ => psci}/psci.c (100%)
 rename drivers/firmware/{ => psci}/psci_checker.c (100%)

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index cac16c4b0df3e..7b655f6156fba 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -5,20 +5,6 @@
 
 menu "Firmware Drivers"
 
-config ARM_PSCI_FW
-	bool
-
-config ARM_PSCI_CHECKER
-	bool "ARM PSCI checker"
-	depends on ARM_PSCI_FW && HOTPLUG_CPU && CPU_IDLE && !TORTURE_TEST
-	help
-	  Run the PSCI checker during startup. This checks that hotplug and
-	  suspend operations work correctly when using PSCI.
-
-	  The torture tests may interfere with the PSCI checker by turning CPUs
-	  on and off through hotplug, so for now torture tests and PSCI checker
-	  are mutually exclusive.
-
 config ARM_SCMI_PROTOCOL
 	bool "ARM System Control and Management Interface (SCMI) Message Protocol"
 	depends on ARM || ARM64 || COMPILE_TEST
@@ -270,6 +256,7 @@ config TI_SCI_PROTOCOL
 config HAVE_ARM_SMCCC
 	bool
 
+source "drivers/firmware/psci/Kconfig"
 source "drivers/firmware/broadcom/Kconfig"
 source "drivers/firmware/google/Kconfig"
 source "drivers/firmware/efi/Kconfig"
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 80feb635120fc..9a3909a226820 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -2,8 +2,6 @@
 #
 # Makefile for the linux kernel.
 #
-obj-$(CONFIG_ARM_PSCI_FW)	+= psci.o
-obj-$(CONFIG_ARM_PSCI_CHECKER)	+= psci_checker.o
 obj-$(CONFIG_ARM_SCPI_PROTOCOL)	+= arm_scpi.o
 obj-$(CONFIG_ARM_SCPI_POWER_DOMAIN) += scpi_pm_domain.o
 obj-$(CONFIG_ARM_SDE_INTERFACE)	+= arm_sdei.o
@@ -25,6 +23,7 @@ CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch armv7-a\n.arch_extension sec,-DREQU
 obj-$(CONFIG_TI_SCI_PROTOCOL)	+= ti_sci.o
 
 obj-$(CONFIG_ARM_SCMI_PROTOCOL)	+= arm_scmi/
+obj-y				+= psci/
 obj-y				+= broadcom/
 obj-y				+= meson/
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
diff --git a/drivers/firmware/psci/Kconfig b/drivers/firmware/psci/Kconfig
new file mode 100644
index 0000000000000..26a3b32bf7aba
--- /dev/null
+++ b/drivers/firmware/psci/Kconfig
@@ -0,0 +1,13 @@
+config ARM_PSCI_FW
+	bool
+
+config ARM_PSCI_CHECKER
+	bool "ARM PSCI checker"
+	depends on ARM_PSCI_FW && HOTPLUG_CPU && CPU_IDLE && !TORTURE_TEST
+	help
+	  Run the PSCI checker during startup. This checks that hotplug and
+	  suspend operations work correctly when using PSCI.
+
+	  The torture tests may interfere with the PSCI checker by turning CPUs
+	  on and off through hotplug, so for now torture tests and PSCI checker
+	  are mutually exclusive.
diff --git a/drivers/firmware/psci/Makefile b/drivers/firmware/psci/Makefile
new file mode 100644
index 0000000000000..1956b882470f1
--- /dev/null
+++ b/drivers/firmware/psci/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+obj-$(CONFIG_ARM_PSCI_FW)	+= psci.o
+obj-$(CONFIG_ARM_PSCI_CHECKER)	+= psci_checker.o
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci/psci.c
similarity index 100%
rename from drivers/firmware/psci.c
rename to drivers/firmware/psci/psci.c
diff --git a/drivers/firmware/psci_checker.c b/drivers/firmware/psci/psci_checker.c
similarity index 100%
rename from drivers/firmware/psci_checker.c
rename to drivers/firmware/psci/psci_checker.c
-- 
GitLab


From f0f6ad9092601d95729ce14ec6da3b5055e18714 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 10 Apr 2019 10:20:22 +0200
Subject: [PATCH 1018/1861] MAINTAINERS: Update files for PSCI

The files for the PSCI firmware driver were moved to a sub-directory,
so update MAINTAINERS to reflect that.

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2359e12e4c41e..4c7136581e520 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12415,7 +12415,7 @@ M:	Mark Rutland <mark.rutland@arm.com>
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 L:	linux-arm-kernel@lists.infradead.org
 S:	Maintained
-F:	drivers/firmware/psci*.c
+F:	drivers/firmware/psci/
 F:	include/linux/psci.h
 F:	include/uapi/linux/psci.h
 
-- 
GitLab


From 0865d20c50741e1d37d4a1108764e5e45a83973e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 10 Apr 2019 10:20:23 +0200
Subject: [PATCH 1019/1861] drivers: firmware: psci: Split
 psci_dt_cpu_init_idle()

Split the psci_dt_cpu_init_idle() function into two functions. This
makes the code clearer and provides better re-usability.

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/firmware/psci/psci.c | 42 ++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index c80ec1d032744..9788bfc1cf8b9 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -270,9 +270,26 @@ static int __init psci_features(u32 psci_func_id)
 #ifdef CONFIG_CPU_IDLE
 static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
 
+static int psci_dt_parse_state_node(struct device_node *np, u32 *state)
+{
+	int err = of_property_read_u32(np, "arm,psci-suspend-param", state);
+
+	if (err) {
+		pr_warn("%pOF missing arm,psci-suspend-param property\n", np);
+		return err;
+	}
+
+	if (!psci_power_state_is_valid(*state)) {
+		pr_warn("Invalid PSCI power state %#x\n", *state);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
 {
-	int i, ret, count = 0;
+	int i, ret = 0, count = 0;
 	u32 *psci_states;
 	struct device_node *state_node;
 
@@ -291,29 +308,16 @@ static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
 		return -ENOMEM;
 
 	for (i = 0; i < count; i++) {
-		u32 state;
-
 		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+		ret = psci_dt_parse_state_node(state_node, &psci_states[i]);
+		of_node_put(state_node);
 
-		ret = of_property_read_u32(state_node,
-					   "arm,psci-suspend-param",
-					   &state);
-		if (ret) {
-			pr_warn(" * %pOF missing arm,psci-suspend-param property\n",
-				state_node);
-			of_node_put(state_node);
+		if (ret)
 			goto free_mem;
-		}
 
-		of_node_put(state_node);
-		pr_debug("psci-power-state %#x index %d\n", state, i);
-		if (!psci_power_state_is_valid(state)) {
-			pr_warn("Invalid PSCI power state %#x\n", state);
-			ret = -EINVAL;
-			goto free_mem;
-		}
-		psci_states[i] = state;
+		pr_debug("psci-power-state %#x index %d\n", psci_states[i], i);
 	}
+
 	/* Idle states parsed correctly, initialize per-cpu pointer */
 	per_cpu(psci_power_state, cpu) = psci_states;
 	return 0;
-- 
GitLab


From d036b5cfef6360808117abd0c53775b7a8981a2e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 10 Apr 2019 10:20:24 +0200
Subject: [PATCH 1020/1861] drivers: firmware: psci: Simplify error path of
 psci_dt_init()

Instead of having each PSCI init function taking care of the
of_node_put(), deal with that from psci_dt_init(), as this enables
a bit simpler error path for each PSCI init function.

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/firmware/psci/psci.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 9788bfc1cf8b9..e480e0af632c1 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -609,9 +609,9 @@ static int __init psci_0_2_init(struct device_node *np)
 	int err;
 
 	err = get_set_conduit_method(np);
-
 	if (err)
-		goto out_put_node;
+		return err;
+
 	/*
 	 * Starting with v0.2, the PSCI specification introduced a call
 	 * (PSCI_VERSION) that allows probing the firmware version, so
@@ -619,11 +619,7 @@ static int __init psci_0_2_init(struct device_node *np)
 	 * can be carried out according to the specific version reported
 	 * by firmware
 	 */
-	err = psci_probe();
-
-out_put_node:
-	of_node_put(np);
-	return err;
+	return psci_probe();
 }
 
 /*
@@ -635,9 +631,8 @@ static int __init psci_0_1_init(struct device_node *np)
 	int err;
 
 	err = get_set_conduit_method(np);
-
 	if (err)
-		goto out_put_node;
+		return err;
 
 	pr_info("Using PSCI v0.1 Function IDs from DT\n");
 
@@ -661,9 +656,7 @@ static int __init psci_0_1_init(struct device_node *np)
 		psci_ops.migrate = psci_migrate;
 	}
 
-out_put_node:
-	of_node_put(np);
-	return err;
+	return 0;
 }
 
 static const struct of_device_id psci_of_match[] __initconst = {
@@ -678,6 +671,7 @@ int __init psci_dt_init(void)
 	struct device_node *np;
 	const struct of_device_id *matched_np;
 	psci_initcall_t init_fn;
+	int ret;
 
 	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);
 
@@ -685,7 +679,10 @@ int __init psci_dt_init(void)
 		return -ENODEV;
 
 	init_fn = (psci_initcall_t)matched_np->data;
-	return init_fn(np);
+	ret = init_fn(np);
+
+	of_node_put(np);
+	return ret;
 }
 
 #ifdef CONFIG_ACPI
-- 
GitLab


From 60dd1ead65e83268af9ccd19b97c7011cb50186b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 10 Apr 2019 10:20:25 +0200
Subject: [PATCH 1021/1861] drivers: firmware: psci: Announce support for OS
 initiated suspend mode

PSCI firmware v1.0+, supports two different modes for CPU_SUSPEND.
The Platform Coordinated mode, which is the default and mandatory
mode, while support for the OS initiated (OSI) mode is optional.

In some cases it's interesting for the user/developer to know if
the OSI mode is supported by the PSCI FW, so print a message to
the log if that is the case.

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/firmware/psci/psci.c | 21 ++++++++++++++++++++-
 include/uapi/linux/psci.h    |  5 +++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index e480e0af632c1..eabd01383cd6e 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -95,6 +95,11 @@ static inline bool psci_has_ext_power_state(void)
 				PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK;
 }
 
+static inline bool psci_has_osi_support(void)
+{
+	return psci_cpu_suspend_feature & PSCI_1_0_OS_INITIATED;
+}
+
 static inline bool psci_power_state_loses_context(u32 state)
 {
 	const u32 mask = psci_has_ext_power_state() ?
@@ -659,10 +664,24 @@ static int __init psci_0_1_init(struct device_node *np)
 	return 0;
 }
 
+static int __init psci_1_0_init(struct device_node *np)
+{
+	int err;
+
+	err = psci_0_2_init(np);
+	if (err)
+		return err;
+
+	if (psci_has_osi_support())
+		pr_info("OSI mode supported.\n");
+
+	return 0;
+}
+
 static const struct of_device_id psci_of_match[] __initconst = {
 	{ .compatible = "arm,psci",	.data = psci_0_1_init},
 	{ .compatible = "arm,psci-0.2",	.data = psci_0_2_init},
-	{ .compatible = "arm,psci-1.0",	.data = psci_0_2_init},
+	{ .compatible = "arm,psci-1.0",	.data = psci_1_0_init},
 	{},
 };
 
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
index b3bcabe380da8..581f72085c334 100644
--- a/include/uapi/linux/psci.h
+++ b/include/uapi/linux/psci.h
@@ -49,6 +49,7 @@
 
 #define PSCI_1_0_FN_PSCI_FEATURES		PSCI_0_2_FN(10)
 #define PSCI_1_0_FN_SYSTEM_SUSPEND		PSCI_0_2_FN(14)
+#define PSCI_1_0_FN_SET_SUSPEND_MODE		PSCI_0_2_FN(15)
 
 #define PSCI_1_0_FN64_SYSTEM_SUSPEND		PSCI_0_2_FN64(14)
 
@@ -97,6 +98,10 @@
 #define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK	\
 			(0x1 << PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT)
 
+#define PSCI_1_0_OS_INITIATED			BIT(0)
+#define PSCI_1_0_SUSPEND_MODE_PC		0
+#define PSCI_1_0_SUSPEND_MODE_OSI		1
+
 /* PSCI return values (inclusive of all PSCI versions) */
 #define PSCI_RET_SUCCESS			0
 #define PSCI_RET_NOT_SUPPORTED			-1
-- 
GitLab


From dc351d4c5f4fe4d0f274d6d660227be0c3a03317 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 10 Apr 2019 11:55:16 +0200
Subject: [PATCH 1022/1861] PM / core: Propagate dev->power.wakeup_path when no
 callbacks

The dev->power.direct_complete flag may become set in device_prepare() in
case the device don't have any PM callbacks (dev->power.no_pm_callbacks is
set). This leads to a broken behaviour, when there is child having wakeup
enabled and relies on its parent to be used in the wakeup path.

More precisely, when the direct complete path becomes selected for the
child in __device_suspend(), the propagation of the dev->power.wakeup_path
becomes skipped as well.

Let's address this problem, by checking if the device is a part the wakeup
path or has wakeup enabled, then prevent the direct complete path from
being used.

Reported-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ rjw: Comment cleanup ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 9b8c829798f19..43e863cc0c1b8 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1736,6 +1736,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	if (dev->power.syscore)
 		goto Complete;
 
+	/* Avoid direct_complete to let wakeup_path propagate. */
+	if (device_may_wakeup(dev) || dev->power.wakeup_path)
+		dev->power.direct_complete = false;
+
 	if (dev->power.direct_complete) {
 		if (pm_runtime_status_suspended(dev)) {
 			pm_runtime_disable(dev);
-- 
GitLab


From f16628d6e8c6616b071ffe775908b95e07404cab Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Sat, 30 Mar 2019 09:20:16 +0100
Subject: [PATCH 1023/1861] clang-format: Update with the latest for_each macro
 list

Re-run the shell fragment that generated the original list now that
there are two dozens of new entries after v5.1's merge window.

Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 .clang-format | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/.clang-format b/.clang-format
index f49620f506f17..f3923a1f98583 100644
--- a/.clang-format
+++ b/.clang-format
@@ -78,6 +78,8 @@ ForEachMacros:
   - 'ata_qc_for_each_with_internal'
   - 'ax25_for_each'
   - 'ax25_uid_for_each'
+  - '__bio_for_each_bvec'
+  - 'bio_for_each_bvec'
   - 'bio_for_each_integrity_vec'
   - '__bio_for_each_segment'
   - 'bio_for_each_segment'
@@ -118,10 +120,12 @@ ForEachMacros:
   - 'drm_for_each_legacy_plane'
   - 'drm_for_each_plane'
   - 'drm_for_each_plane_mask'
+  - 'drm_for_each_privobj'
   - 'drm_mm_for_each_hole'
   - 'drm_mm_for_each_node'
   - 'drm_mm_for_each_node_in_range'
   - 'drm_mm_for_each_node_safe'
+  - 'flow_action_for_each'
   - 'for_each_active_drhd_unit'
   - 'for_each_active_iommu'
   - 'for_each_available_child_of_node'
@@ -158,6 +162,9 @@ ForEachMacros:
   - 'for_each_dss_dev'
   - 'for_each_efi_memory_desc'
   - 'for_each_efi_memory_desc_in_map'
+  - 'for_each_element'
+  - 'for_each_element_extid'
+  - 'for_each_element_id'
   - 'for_each_endpoint_of_node'
   - 'for_each_evictable_lru'
   - 'for_each_fib6_node_rt_rcu'
@@ -195,6 +202,7 @@ ForEachMacros:
   - 'for_each_net_rcu'
   - 'for_each_new_connector_in_state'
   - 'for_each_new_crtc_in_state'
+  - 'for_each_new_mst_mgr_in_state'
   - 'for_each_new_plane_in_state'
   - 'for_each_new_private_obj_in_state'
   - 'for_each_node'
@@ -210,8 +218,10 @@ ForEachMacros:
   - 'for_each_of_pci_range'
   - 'for_each_old_connector_in_state'
   - 'for_each_old_crtc_in_state'
+  - 'for_each_old_mst_mgr_in_state'
   - 'for_each_oldnew_connector_in_state'
   - 'for_each_oldnew_crtc_in_state'
+  - 'for_each_oldnew_mst_mgr_in_state'
   - 'for_each_oldnew_plane_in_state'
   - 'for_each_oldnew_plane_in_state_reverse'
   - 'for_each_oldnew_private_obj_in_state'
@@ -243,6 +253,9 @@ ForEachMacros:
   - 'for_each_sg_dma_page'
   - 'for_each_sg_page'
   - 'for_each_sibling_event'
+  - 'for_each_subelement'
+  - 'for_each_subelement_extid'
+  - 'for_each_subelement_id'
   - '__for_each_thread'
   - 'for_each_thread'
   - 'for_each_zone'
@@ -252,6 +265,8 @@ ForEachMacros:
   - 'fwnode_for_each_child_node'
   - 'fwnode_graph_for_each_endpoint'
   - 'gadget_for_each_ep'
+  - 'genradix_for_each'
+  - 'genradix_for_each_from'
   - 'hash_for_each'
   - 'hash_for_each_possible'
   - 'hash_for_each_possible_rcu'
@@ -293,7 +308,11 @@ ForEachMacros:
   - 'key_for_each'
   - 'key_for_each_safe'
   - 'klp_for_each_func'
+  - 'klp_for_each_func_safe'
+  - 'klp_for_each_func_static'
   - 'klp_for_each_object'
+  - 'klp_for_each_object_safe'
+  - 'klp_for_each_object_static'
   - 'kvm_for_each_memslot'
   - 'kvm_for_each_vcpu'
   - 'list_for_each'
@@ -324,6 +343,8 @@ ForEachMacros:
   - 'media_device_for_each_intf'
   - 'media_device_for_each_link'
   - 'media_device_for_each_pad'
+  - 'mp_bvec_for_each_page'
+  - 'mp_bvec_for_each_segment'
   - 'nanddev_io_for_each_page'
   - 'netdev_for_each_lower_dev'
   - 'netdev_for_each_lower_private'
@@ -375,6 +396,7 @@ ForEachMacros:
   - 'rht_for_each_rcu'
   - 'rht_for_each_rcu_continue'
   - '__rq_for_each_bio'
+  - 'rq_for_each_bvec'
   - 'rq_for_each_segment'
   - 'scsi_for_each_prot_sg'
   - 'scsi_for_each_sg'
@@ -410,6 +432,8 @@ ForEachMacros:
   - 'v4l2_m2m_for_each_src_buf_safe'
   - 'virtio_device_for_each_vq'
   - 'xa_for_each'
+  - 'xa_for_each_marked'
+  - 'xa_for_each_start'
   - 'xas_for_each'
   - 'xas_for_each_conflict'
   - 'xas_for_each_marked'
-- 
GitLab


From 3c677d206210f53a4be972211066c0f1cd47fe12 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 12 Apr 2019 12:50:31 +0200
Subject: [PATCH 1024/1861] iommu/amd: Set exclusion range correctly

The exlcusion range limit register needs to contain the
base-address of the last page that is part of the range, as
bits 0-11 of this register are treated as 0xfff by the
hardware for comparisons.

So correctly set the exclusion range in the hardware to the
last page which is _in_ the range.

Fixes: b2026aa2dce44 ('x86, AMD IOMMU: add functions for programming IOMMU MMIO space')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 1b1378619fc9e..ff40ba758cf36 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -359,7 +359,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
 	u64 start = iommu->exclusion_start & PAGE_MASK;
-	u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
+	u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
 	u64 entry;
 
 	if (!iommu->exclusion_start)
-- 
GitLab


From 045afc24124d80c6998d9c770844c67912083506 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Apr 2019 12:45:09 +0100
Subject: [PATCH 1025/1861] arm64: futex: Fix FUTEX_WAKE_OP atomic ops with
 non-zero result value

Rather embarrassingly, our futex() FUTEX_WAKE_OP implementation doesn't
explicitly set the return value on the non-faulting path and instead
leaves it holding the result of the underlying atomic operation. This
means that any FUTEX_WAKE_OP atomic operation which computes a non-zero
value will be reported as having failed. Regrettably, I wrote the buggy
code back in 2011 and it was upstreamed as part of the initial arm64
support in 2012.

The reasons we appear to get away with this are:

  1. FUTEX_WAKE_OP is rarely used and therefore doesn't appear to get
     exercised by futex() test applications

  2. If the result of the atomic operation is zero, the system call
     behaves correctly

  3. Prior to version 2.25, the only operation used by GLIBC set the
     futex to zero, and therefore worked as expected. From 2.25 onwards,
     FUTEX_WAKE_OP is not used by GLIBC at all.

Fix the implementation by ensuring that the return value is either 0
to indicate that the atomic operation completed successfully, or -EFAULT
if we encountered a fault when accessing the user mapping.

Cc: <stable@kernel.org>
Fixes: 6170a97460db ("arm64: Atomic operations")
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/futex.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index cccb83ad7fa8e..e1d95f08f8e12 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -30,8 +30,8 @@ do {									\
 "	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
-"2:	stlxr	%w3, %w0, %2\n"						\
-"	cbnz	%w3, 1b\n"						\
+"2:	stlxr	%w0, %w3, %2\n"						\
+"	cbnz	%w0, 1b\n"						\
 "	dmb	ish\n"							\
 "3:\n"									\
 "	.pushsection .fixup,\"ax\"\n"					\
@@ -50,30 +50,30 @@ do {									\
 static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
 {
-	int oldval = 0, ret, tmp;
+	int oldval, ret, tmp;
 	u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
 
 	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
-		__futex_atomic_op("mov	%w0, %w4",
+		__futex_atomic_op("mov	%w3, %w4",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_ADD:
-		__futex_atomic_op("add	%w0, %w1, %w4",
+		__futex_atomic_op("add	%w3, %w1, %w4",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_OR:
-		__futex_atomic_op("orr	%w0, %w1, %w4",
+		__futex_atomic_op("orr	%w3, %w1, %w4",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_ANDN:
-		__futex_atomic_op("and	%w0, %w1, %w4",
+		__futex_atomic_op("and	%w3, %w1, %w4",
 				  ret, oldval, uaddr, tmp, ~oparg);
 		break;
 	case FUTEX_OP_XOR:
-		__futex_atomic_op("eor	%w0, %w1, %w4",
+		__futex_atomic_op("eor	%w3, %w1, %w4",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	default:
-- 
GitLab


From a823c35ff2eda73046cc1847326071de350fceda Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 12 Apr 2019 23:22:01 +0900
Subject: [PATCH 1026/1861] arm64: ptrace: Add function argument access API

Add regs_get_argument() which returns N th argument of the function
call. On arm64, it supports up to 8th argument.
Note that this chooses most probably assignment, in some case
it can be incorrect (e.g. passing data structure or floating
point etc.)

This enables ftrace kprobe events to access kernel function
arguments via $argN syntax.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
[will: tidied up the comment a bit]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig              |  1 +
 arch/arm64/include/asm/ptrace.h | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 555af50355920..c383625ec02c9 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -148,6 +148,7 @@ config ARM64
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_RCU_TABLE_FREE
 	select HAVE_RCU_TABLE_INVALIDATE
 	select HAVE_RSEQ
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index ec60174c8c184..b2de32939ada8 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -305,6 +305,28 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
 	return regs->regs[0];
 }
 
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs:	pt_regs of that context
+ * @n:		function argument number (start from 0)
+ *
+ * regs_get_argument() returns @n th argument of the function call.
+ *
+ * Note that this chooses the most likely register mapping. In very rare
+ * cases this may not return correct data, for example, if one of the
+ * function parameters is 16 bytes or bigger. In such cases, we cannot
+ * get access the parameter correctly and the register assignment of
+ * subsequent parameters will be shifted.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+						     unsigned int n)
+{
+#define NR_REG_ARGUMENTS 8
+	if (n < NR_REG_ARGUMENTS)
+		return pt_regs_read_reg(regs, n);
+	return 0;
+}
+
 /* We must avoid circular header include via sched.h */
 struct task_struct;
 int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
-- 
GitLab


From b575f10dbd6f84c2c8744ff1f486bfae1e4f6f38 Mon Sep 17 00:00:00 2001
From: wentalou <Wentao.Lou@amd.com>
Date: Fri, 12 Apr 2019 15:01:14 +0800
Subject: [PATCH 1027/1861] drm/amdgpu: shadow in shadow_list without
 tbo.mem.start cause page fault in sriov TDR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

shadow was added into shadow_list by amdgpu_bo_create_shadow.
meanwhile, shadow->tbo.mem was not fully configured.
tbo.mem would be fully configured by amdgpu_vm_sdma_map_table until calling amdgpu_vm_clear_bo.
If sriov TDR occurred between amdgpu_bo_create_shadow and amdgpu_vm_sdma_map_table,
amdgpu_device_recover_vram would deal with shadow without tbo.mem.start.

Signed-off-by: Wentao Lou <Wentao.Lou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5d8b30fd45345..79fb302fb9543 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3165,6 +3165,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
 
 		/* No need to recover an evicted BO */
 		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
+		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
 		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
 			continue;
 
-- 
GitLab


From 1925e7d3d4677e681cc2e878c2bdbeaee988c8e2 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 11 Apr 2019 14:54:40 -0500
Subject: [PATCH 1028/1861] drm/amdgpu/gmc9: fix VM_L2_CNTL3 programming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Got accidently dropped when 2+1 level support was added.

Fixes: 6a42fd6fbf534096 ("drm/amdgpu: implement 2+1 PD support for Raven v3")
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index d0d966d6080a6..1696644ec0223 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -182,6 +182,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
 				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
 	}
+	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
 
 	tmp = mmVM_L2_CNTL4_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
-- 
GitLab


From b3cf181c65c4d49f86b67b399fe7203ecac730a9 Mon Sep 17 00:00:00 2001
From: Weiyi Lu <weiyi.lu@mediatek.com>
Date: Fri, 12 Apr 2019 11:30:27 +0800
Subject: [PATCH 1029/1861] clk: mediatek: fix clk-gate flag setting

CLK_SET_RATE_PARENT would be dropped.
Merge two flag setting together to correct the error.

Fixes: 5a1cc4c27ad2 ("clk: mediatek: Add flags to mtk_gate")
Cc: <stable@vger.kernel.org>
Signed-off-by: Weiyi Lu <weiyi.lu@mediatek.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/mediatek/clk-gate.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/clk/mediatek/clk-gate.c b/drivers/clk/mediatek/clk-gate.c
index 9628d4e7690bb..85daf826619ab 100644
--- a/drivers/clk/mediatek/clk-gate.c
+++ b/drivers/clk/mediatek/clk-gate.c
@@ -169,11 +169,10 @@ struct clk *mtk_clk_register_gate(
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
-	init.flags = CLK_SET_RATE_PARENT;
+	init.flags = flags | CLK_SET_RATE_PARENT;
 	init.parent_names = parent_name ? &parent_name : NULL;
 	init.num_parents = parent_name ? 1 : 0;
 	init.ops = ops;
-	init.flags = flags;
 
 	cg->regmap = regmap;
 	cg->set_ofs = set_ofs;
-- 
GitLab


From dd3ac9a684358b8c1d5c432ca8322aaf5e4f28ee Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:51:52 +0900
Subject: [PATCH 1030/1861] net/rds: Check address length before reading
 address family

syzbot is reporting uninitialized value at rds_connect() [1] and
rds_bind() [2]. This is because syzbot is passing ulen == 0 whereas
these functions expect that it is safe to access sockaddr->family field
in order to determine minimal address length for validation.

[1] https://syzkaller.appspot.com/bug?id=f4e61c010416c1e6f0fa3ffe247561b60a50ad71
[2] https://syzkaller.appspot.com/bug?id=a4bf9e41b7e055c3823fdcd83e8c58ca7270e38f

Reported-by: syzbot <syzbot+0049bebbf3042dbd2e8f@syzkaller.appspotmail.com>
Reported-by: syzbot <syzbot+915c9f99f3dbc4bd6cd1@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/af_rds.c | 3 +++
 net/rds/bind.c   | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index d6cc97fbbbb02..2b969f99ef131 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -543,6 +543,9 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
 	struct rds_sock *rs = rds_sk_to_rs(sk);
 	int ret = 0;
 
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		return -EINVAL;
+
 	lock_sock(sk);
 
 	switch (uaddr->sa_family) {
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 17c9d9f0c8483..0f4398e7f2a7a 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* We allow an RDS socket to be bound to either IPv4 or IPv6
 	 * address.
 	 */
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		return -EINVAL;
 	if (uaddr->sa_family == AF_INET) {
 		struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 
-- 
GitLab


From 238ffdc49ef98b15819cfd5e3fb23194e3ea3d39 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:52:36 +0900
Subject: [PATCH 1031/1861] mISDN: Check address length before reading address
 family

KMSAN will complain if valid address length passed to bind() is shorter
than sizeof("struct sockaddr_mISDN"->family) bytes.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 4ab8b1b6608f7..a14e35d405387 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -710,10 +710,10 @@ base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	struct sock *sk = sock->sk;
 	int err = 0;
 
-	if (!maddr || maddr->family != AF_ISDN)
+	if (addr_len < sizeof(struct sockaddr_mISDN))
 		return -EINVAL;
 
-	if (addr_len < sizeof(struct sockaddr_mISDN))
+	if (!maddr || maddr->family != AF_ISDN)
 		return -EINVAL;
 
 	lock_sock(sk);
-- 
GitLab


From 175f7c1f01d30b2088491bee4636fbf846fb76ce Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:53:10 +0900
Subject: [PATCH 1032/1861] sctp: Check address length before reading address
 family

KMSAN will complain if valid address length passed to connect() is shorter
than sizeof("struct sockaddr"->sa_family) bytes.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9874e60c9b0d0..4583fa914e62a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4847,7 +4847,8 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr,
 	}
 
 	/* Validate addr_len before calling common connect/connectx routine. */
-	af = sctp_get_af_specific(addr->sa_family);
+	af = addr_len < offsetofend(struct sockaddr, sa_family) ? NULL :
+		sctp_get_af_specific(addr->sa_family);
 	if (!af || addr_len < af->sockaddr_len) {
 		err = -EINVAL;
 	} else {
-- 
GitLab


From d852be84770c0611f8b76bd7046c6a814c5b9f11 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:53:38 +0900
Subject: [PATCH 1033/1861] net: netlink: Check address length before reading
 groups field

KMSAN will complain if valid address length passed to bind() is shorter
than sizeof(struct sockaddr_nl) bytes.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f28e937320a3b..216ab915dd54d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -988,7 +988,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 	int err = 0;
-	unsigned long groups = nladdr->nl_groups;
+	unsigned long groups;
 	bool bound;
 
 	if (addr_len < sizeof(struct sockaddr_nl))
@@ -996,6 +996,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 
 	if (nladdr->nl_family != AF_NETLINK)
 		return -EINVAL;
+	groups = nladdr->nl_groups;
 
 	/* Only superuser is allowed to listen multicasts */
 	if (groups) {
-- 
GitLab


From a9107a14a9b9112775459ad291fc5de0f2513ce0 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:54:05 +0900
Subject: [PATCH 1034/1861] rxrpc: Check address length before reading
 srx_service field

KMSAN will complain if valid address length passed to bind() is shorter
than sizeof(struct sockaddr_rxrpc) bytes.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/af_rxrpc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 96f2952bbdfd6..c54dce3ca0dd0 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -135,7 +135,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
 	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
 	struct rxrpc_local *local;
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
-	u16 service_id = srx->srx_service;
+	u16 service_id;
 	int ret;
 
 	_enter("%p,%p,%d", rx, saddr, len);
@@ -143,6 +143,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
 	ret = rxrpc_validate_address(rx, srx, len);
 	if (ret < 0)
 		goto error;
+	service_id = srx->srx_service;
 
 	lock_sock(&rx->sk);
 
-- 
GitLab


From bd7d46ddca06f1fadd68ceb99bc6e6f808ab50f2 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:54:33 +0900
Subject: [PATCH 1035/1861] Bluetooth: Check address length before reading
 address field

KMSAN will complain if valid address length passed to bind() is shorter
than sizeof(struct sockaddr_sco) bytes.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/sco.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 9a580999ca57e..d892b7c3cc42a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -523,12 +523,12 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
 	struct sock *sk = sock->sk;
 	int err = 0;
 
-	BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
-
 	if (!addr || addr_len < sizeof(struct sockaddr_sco) ||
 	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
+	BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
+
 	lock_sock(sk);
 
 	if (sk->sk_state != BT_OPEN) {
-- 
GitLab


From c68e747d0a98f44a4e49071940a692fa83630e47 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:55:14 +0900
Subject: [PATCH 1036/1861] llc: Check address length before reading address
 field

KMSAN will complain if valid address length passed to bind() is shorter
than sizeof(struct sockaddr_llc) bytes.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/af_llc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index b99e73a7e7e0f..2017b7d780f5a 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -320,14 +320,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	struct llc_sap *sap;
 	int rc = -EINVAL;
 
-	dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
-
 	lock_sock(sk);
 	if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
 		goto out;
 	rc = -EAFNOSUPPORT;
 	if (unlikely(addr->sllc_family != AF_LLC))
 		goto out;
+	dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
 	rc = -ENODEV;
 	rcu_read_lock();
 	if (sk->sk_bound_dev_if) {
-- 
GitLab


From ba024f2574a19557f92116ec6be129b26ae66e97 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:55:47 +0900
Subject: [PATCH 1037/1861] bpf: Check address length before reading address
 family

KMSAN will complain if valid address length passed to bpf_bind() is
shorter than sizeof("struct sockaddr"->sa_family) bytes.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index fc92ebc4e200c..27e61ffd90393 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4383,6 +4383,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
 	 * Only binding to IP is supported.
 	 */
 	err = -EINVAL;
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		return err;
 	if (addr->sa_family == AF_INET) {
 		if (addr_len < sizeof(struct sockaddr_in))
 			return err;
-- 
GitLab


From bddc028a4f2ac8cf4d0cd1c696b5f95d8305a553 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 12 Apr 2019 19:56:39 +0900
Subject: [PATCH 1038/1861] udpv6: Check address length before reading address
 family

KMSAN will complain if valid address length passed to udpv6_pre_connect()
is shorter than sizeof("struct sockaddr"->sa_family) bytes.

(This patch is bogus if it is guaranteed that udpv6_pre_connect() is
always called after checking "struct sockaddr"->sa_family. In that case,
we want a comment why we don't need to check valid address length here.)

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b444483cdb2b4..622eeaf5732b3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1047,6 +1047,8 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
 static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 			     int addr_len)
 {
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		return -EINVAL;
 	/* The following checks are replicated from __ip6_datagram_connect()
 	 * and intended to prevent BPF program called below from accessing
 	 * bytes that are out of the bound specified by user in addr_len.
-- 
GitLab


From 2170e2157d7c5398f84477935553d63a93a1f6b8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 26 Mar 2019 09:34:19 +0100
Subject: [PATCH 1039/1861] mt76: mt7603: add missing initialization for
 dev->ps_lock

Fixes lockdep complaint and a potential race condition

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt7603/init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
index d54dda67d036c..3af45949e8689 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
@@ -510,6 +510,8 @@ int mt7603_register_device(struct mt7603_dev *dev)
 	bus_ops->rmw = mt7603_rmw;
 	dev->mt76.bus = bus_ops;
 
+	spin_lock_init(&dev->ps_lock);
+
 	INIT_DELAYED_WORK(&dev->mac_work, mt7603_mac_work);
 	tasklet_init(&dev->pre_tbtt_tasklet, mt7603_pre_tbtt_tasklet,
 		     (unsigned long)dev);
-- 
GitLab


From aa3cb24be18b9b537750c354c5cff96c3d17ae44 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 26 Mar 2019 09:34:20 +0100
Subject: [PATCH 1040/1861] mt76: mt7603: fix sequence number assignment

If the MT_TXD3_SN_VALID flag is not set in the tx descriptor, the hardware
assigns the sequence number. However, the rest of the code assumes that the
sequence number specified in the 802.11 header gets transmitted.
This was causing issues with the aggregation setup, which worked for the
initial one (where the sequence numbers were still close), but not for
further teardown/re-establishing of sessions.

Additionally, the overwrite of the TID sequence number in WTBL2 was resetting
the hardware assigned sequence numbers, causing them to drift further apart.

Fix this by using the software assigned sequence numbers

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/mediatek/mt76/mt7603/mac.c   | 53 +++++--------------
 .../net/wireless/mediatek/mt76/mt7603/main.c  |  6 +--
 .../wireless/mediatek/mt76/mt7603/mt7603.h    |  2 +-
 3 files changed, 18 insertions(+), 43 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 5e31d7da96fc8..5abc02b578185 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -343,7 +343,7 @@ void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid)
 		 MT_BA_CONTROL_1_RESET));
 }
 
-void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn,
+void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid,
 			    int ba_size)
 {
 	u32 addr = mt7603_wtbl2_addr(wcid);
@@ -358,43 +358,6 @@ void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn,
 		mt76_clear(dev, addr + (15 * 4), tid_mask);
 		return;
 	}
-	mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000);
-
-	mt7603_mac_stop(dev);
-	switch (tid) {
-	case 0:
-		mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID0_SN, ssn);
-		break;
-	case 1:
-		mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID1_SN, ssn);
-		break;
-	case 2:
-		mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID2_SN_LO,
-			       ssn);
-		mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID2_SN_HI,
-			       ssn >> 8);
-		break;
-	case 3:
-		mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID3_SN, ssn);
-		break;
-	case 4:
-		mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID4_SN, ssn);
-		break;
-	case 5:
-		mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID5_SN_LO,
-			       ssn);
-		mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID5_SN_HI,
-			       ssn >> 4);
-		break;
-	case 6:
-		mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID6_SN, ssn);
-		break;
-	case 7:
-		mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID7_SN, ssn);
-		break;
-	}
-	mt7603_wtbl_update(dev, wcid, MT_WTBL_UPDATE_WTBL2);
-	mt7603_mac_start(dev);
 
 	for (i = 7; i > 0; i--) {
 		if (ba_size >= MT_AGG_SIZE_LIMIT(i))
@@ -827,6 +790,7 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_tx_rate *rate = &info->control.rates[0];
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data;
 	struct ieee80211_vif *vif = info->control.vif;
 	struct mt7603_vif *mvif;
 	int wlan_idx;
@@ -834,6 +798,7 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
 	int tx_count = 8;
 	u8 frame_type, frame_subtype;
 	u16 fc = le16_to_cpu(hdr->frame_control);
+	u16 seqno = 0;
 	u8 vif_idx = 0;
 	u32 val;
 	u8 bw;
@@ -919,7 +884,17 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
 		tx_count = 0x1f;
 
 	val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, tx_count) |
-	      FIELD_PREP(MT_TXD3_SEQ, le16_to_cpu(hdr->seq_ctrl));
+		  MT_TXD3_SN_VALID;
+
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		seqno = le16_to_cpu(hdr->seq_ctrl);
+	else if (ieee80211_is_back_req(hdr->frame_control))
+		seqno = le16_to_cpu(bar->start_seq_num);
+	else
+		val &= ~MT_TXD3_SN_VALID;
+
+	val |= FIELD_PREP(MT_TXD3_SEQ, seqno >> 4);
+
 	txwi[3] = cpu_to_le32(val);
 
 	if (key) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
index cc0fe0933b2d8..31a7ca6911956 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -584,13 +584,13 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	case IEEE80211_AMPDU_TX_OPERATIONAL:
 		mtxq->aggr = true;
 		mtxq->send_bar = false;
-		mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, ba_size);
+		mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, ba_size);
 		break;
 	case IEEE80211_AMPDU_TX_STOP_FLUSH:
 	case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
 		mtxq->aggr = false;
 		ieee80211_send_bar(vif, sta->addr, tid, mtxq->agg_ssn);
-		mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1);
+		mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1);
 		break;
 	case IEEE80211_AMPDU_TX_START:
 		mtxq->agg_ssn = *ssn << 4;
@@ -598,7 +598,7 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		break;
 	case IEEE80211_AMPDU_TX_STOP_CONT:
 		mtxq->aggr = false;
-		mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1);
+		mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1);
 		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 		break;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
index 79f3324294328..6049f3b7c8fec 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
@@ -200,7 +200,7 @@ void mt7603_beacon_set_timer(struct mt7603_dev *dev, int idx, int intval);
 int mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb);
 void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data);
 void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid);
-void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn,
+void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid,
 			    int ba_size);
 
 void mt7603_pse_client_reset(struct mt7603_dev *dev);
-- 
GitLab


From 9dc27bcbe78c5d3926f48b1105840f349c827766 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 26 Mar 2019 09:34:21 +0100
Subject: [PATCH 1041/1861] mt76: mt7603: send BAR after powersave wakeup

Now that the sequence number allocation is fixed, we can finally send a BAR
at powersave wakeup time to refresh the receiver side reorder window

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
index 31a7ca6911956..a3c4ef198bfee 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -372,7 +372,7 @@ mt7603_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps)
 	struct mt7603_sta *msta = (struct mt7603_sta *)sta->drv_priv;
 	struct sk_buff_head list;
 
-	mt76_stop_tx_queues(&dev->mt76, sta, false);
+	mt76_stop_tx_queues(&dev->mt76, sta, true);
 	mt7603_wtbl_set_ps(dev, msta, ps);
 	if (ps)
 		return;
-- 
GitLab


From 746ba11f170603bf1eaade817553a6c2e9135bbe Mon Sep 17 00:00:00 2001
From: Vijayakumar Durai <vijayakumar.durai1@vivint.com>
Date: Wed, 27 Mar 2019 11:03:17 +0100
Subject: [PATCH 1042/1861] rt2x00: do not increment sequence number while
 re-transmitting

Currently rt2x00 devices retransmit the management frames with
incremented sequence number if hardware is assigning the sequence.

This is HW bug fixed already for non-QOS data frames, but it should
be fixed for management frames except beacon.

Without fix retransmitted frames have wrong SN:

 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1648, FN=0, Flags=........C Frame is not being retransmitted 1648 1
 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1649, FN=0, Flags=....R...C Frame is being retransmitted 1649 1
 AlphaNet_e8:fb:36 Vivotek_52:31:51 Authentication, SN=1650, FN=0, Flags=....R...C Frame is being retransmitted 1650 1

With the fix SN stays correctly the same:

 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=........C
 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=....R...C
 88:6a:e3:e8:f9:a2 8c:f5:a3:88:76:87 Authentication, SN=1450, FN=0, Flags=....R...C

Cc: stable@vger.kernel.org
Signed-off-by: Vijayakumar Durai <vijayakumar.durai1@vivint.com>
[sgruszka: simplify code, change comments and changelog]
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ralink/rt2x00/rt2x00.h      |  1 -
 drivers/net/wireless/ralink/rt2x00/rt2x00mac.c   | 10 ----------
 drivers/net/wireless/ralink/rt2x00/rt2x00queue.c | 15 +++++++++------
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
index 4b1744e9fb78a..50b92ca92bd75 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
@@ -673,7 +673,6 @@ enum rt2x00_state_flags {
 	CONFIG_CHANNEL_HT40,
 	CONFIG_POWERSAVING,
 	CONFIG_HT_DISABLED,
-	CONFIG_QOS_DISABLED,
 	CONFIG_MONITORING,
 
 	/*
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
index 2825560e2424d..e8462f25d2522 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
@@ -642,18 +642,8 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw,
 			rt2x00dev->intf_associated--;
 
 		rt2x00leds_led_assoc(rt2x00dev, !!rt2x00dev->intf_associated);
-
-		clear_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags);
 	}
 
-	/*
-	 * Check for access point which do not support 802.11e . We have to
-	 * generate data frames sequence number in S/W for such AP, because
-	 * of H/W bug.
-	 */
-	if (changes & BSS_CHANGED_QOS && !bss_conf->qos)
-		set_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags);
-
 	/*
 	 * When the erp information has changed, we should perform
 	 * additional configuration steps. For all other changes we are done.
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
index 92ddc19e7bf74..4834b4eb02064 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
@@ -201,15 +201,18 @@ static void rt2x00queue_create_tx_descriptor_seq(struct rt2x00_dev *rt2x00dev,
 	if (!rt2x00_has_cap_flag(rt2x00dev, REQUIRE_SW_SEQNO)) {
 		/*
 		 * rt2800 has a H/W (or F/W) bug, device incorrectly increase
-		 * seqno on retransmited data (non-QOS) frames. To workaround
-		 * the problem let's generate seqno in software if QOS is
-		 * disabled.
+		 * seqno on retransmitted data (non-QOS) and management frames.
+		 * To workaround the problem let's generate seqno in software.
+		 * Except for beacons which are transmitted periodically by H/W
+		 * hence hardware has to assign seqno for them.
 		 */
-		if (test_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags))
-			__clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
-		else
+	    	if (ieee80211_is_beacon(hdr->frame_control)) {
+			__set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
 			/* H/W will generate sequence number */
 			return;
+		}
+
+		__clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
 	}
 
 	/*
-- 
GitLab


From bafdf85dfa59374f927ff597bc8c259193afda30 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Fri, 5 Apr 2019 13:42:56 +0200
Subject: [PATCH 1043/1861] mt76x02: avoid status_list.lock and
 sta->rate_ctrl_lock dependency

Move ieee80211_tx_status_ext() outside of status_list lock section
in order to avoid locking dependency and possible deadlock reposed by
LOCKDEP in below warning.

Also do mt76_tx_status_lock() just before it's needed.

[  440.224832] WARNING: possible circular locking dependency detected
[  440.224833] 5.1.0-rc2+ #22 Not tainted
[  440.224834] ------------------------------------------------------
[  440.224835] kworker/u16:28/2362 is trying to acquire lock:
[  440.224836] 0000000089b8cacf (&(&q->lock)->rlock#2){+.-.}, at: mt76_wake_tx_queue+0x4c/0xb0 [mt76]
[  440.224842]
               but task is already holding lock:
[  440.224842] 000000002cfedc59 (&(&sta->lock)->rlock){+.-.}, at: ieee80211_stop_tx_ba_cb+0x32/0x1f0 [mac80211]
[  440.224863]
               which lock already depends on the new lock.

[  440.224863]
               the existing dependency chain (in reverse order) is:
[  440.224864]
               -> #3 (&(&sta->lock)->rlock){+.-.}:
[  440.224869]        _raw_spin_lock_bh+0x34/0x40
[  440.224880]        ieee80211_start_tx_ba_session+0xe4/0x3d0 [mac80211]
[  440.224894]        minstrel_ht_get_rate+0x45c/0x510 [mac80211]
[  440.224906]        rate_control_get_rate+0xc1/0x140 [mac80211]
[  440.224918]        ieee80211_tx_h_rate_ctrl+0x195/0x3c0 [mac80211]
[  440.224930]        ieee80211_xmit_fast+0x26d/0xa50 [mac80211]
[  440.224942]        __ieee80211_subif_start_xmit+0xfc/0x310 [mac80211]
[  440.224954]        ieee80211_subif_start_xmit+0x38/0x390 [mac80211]
[  440.224956]        dev_hard_start_xmit+0xb8/0x300
[  440.224957]        __dev_queue_xmit+0x7d4/0xbb0
[  440.224968]        ip6_finish_output2+0x246/0x860 [ipv6]
[  440.224978]        mld_sendpack+0x1bd/0x360 [ipv6]
[  440.224987]        mld_ifc_timer_expire+0x1a4/0x2f0 [ipv6]
[  440.224989]        call_timer_fn+0x89/0x2a0
[  440.224990]        run_timer_softirq+0x1bd/0x4d0
[  440.224992]        __do_softirq+0xdb/0x47c
[  440.224994]        irq_exit+0xfa/0x100
[  440.224996]        smp_apic_timer_interrupt+0x9a/0x220
[  440.224997]        apic_timer_interrupt+0xf/0x20
[  440.224999]        cpuidle_enter_state+0xc1/0x470
[  440.225000]        do_idle+0x21a/0x260
[  440.225001]        cpu_startup_entry+0x19/0x20
[  440.225004]        start_secondary+0x135/0x170
[  440.225006]        secondary_startup_64+0xa4/0xb0
[  440.225007]
               -> #2 (&(&sta->rate_ctrl_lock)->rlock){+.-.}:
[  440.225009]        _raw_spin_lock_bh+0x34/0x40
[  440.225022]        rate_control_tx_status+0x4f/0xb0 [mac80211]
[  440.225031]        ieee80211_tx_status_ext+0x142/0x1a0 [mac80211]
[  440.225035]        mt76x02_send_tx_status+0x2e4/0x340 [mt76x02_lib]
[  440.225037]        mt76x02_tx_status_data+0x31/0x40 [mt76x02_lib]
[  440.225040]        mt76u_tx_status_data+0x51/0xa0 [mt76_usb]
[  440.225042]        process_one_work+0x237/0x5d0
[  440.225043]        worker_thread+0x3c/0x390
[  440.225045]        kthread+0x11d/0x140
[  440.225046]        ret_from_fork+0x3a/0x50
[  440.225047]
               -> #1 (&(&list->lock)->rlock#8){+.-.}:
[  440.225049]        _raw_spin_lock_bh+0x34/0x40
[  440.225052]        mt76_tx_status_skb_add+0x51/0x100 [mt76]
[  440.225054]        mt76x02u_tx_prepare_skb+0xbd/0x116 [mt76x02_usb]
[  440.225056]        mt76u_tx_queue_skb+0x5f/0x180 [mt76_usb]
[  440.225058]        mt76_tx+0x93/0x190 [mt76]
[  440.225070]        ieee80211_tx_frags+0x148/0x210 [mac80211]
[  440.225081]        __ieee80211_tx+0x75/0x1b0 [mac80211]
[  440.225092]        ieee80211_tx+0xde/0x110 [mac80211]
[  440.225105]        __ieee80211_tx_skb_tid_band+0x72/0x90 [mac80211]
[  440.225122]        ieee80211_send_auth+0x1f3/0x360 [mac80211]
[  440.225141]        ieee80211_auth.cold.40+0x6c/0x100 [mac80211]
[  440.225156]        ieee80211_mgd_auth.cold.50+0x132/0x15f [mac80211]
[  440.225171]        cfg80211_mlme_auth+0x149/0x360 [cfg80211]
[  440.225181]        nl80211_authenticate+0x273/0x2e0 [cfg80211]
[  440.225183]        genl_family_rcv_msg+0x196/0x3a0
[  440.225184]        genl_rcv_msg+0x47/0x8e
[  440.225185]        netlink_rcv_skb+0x3a/0xf0
[  440.225187]        genl_rcv+0x24/0x40
[  440.225188]        netlink_unicast+0x16d/0x210
[  440.225189]        netlink_sendmsg+0x204/0x3b0
[  440.225191]        sock_sendmsg+0x36/0x40
[  440.225193]        ___sys_sendmsg+0x259/0x2b0
[  440.225194]        __sys_sendmsg+0x47/0x80
[  440.225196]        do_syscall_64+0x60/0x1f0
[  440.225197]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  440.225198]
               -> #0 (&(&q->lock)->rlock#2){+.-.}:
[  440.225200]        lock_acquire+0xb9/0x1a0
[  440.225202]        _raw_spin_lock_bh+0x34/0x40
[  440.225204]        mt76_wake_tx_queue+0x4c/0xb0 [mt76]
[  440.225215]        ieee80211_agg_start_txq+0xe8/0x2b0 [mac80211]
[  440.225225]        ieee80211_stop_tx_ba_cb+0xb8/0x1f0 [mac80211]
[  440.225235]        ieee80211_ba_session_work+0x1c1/0x2f0 [mac80211]
[  440.225236]        process_one_work+0x237/0x5d0
[  440.225237]        worker_thread+0x3c/0x390
[  440.225239]        kthread+0x11d/0x140
[  440.225240]        ret_from_fork+0x3a/0x50
[  440.225240]
               other info that might help us debug this:

[  440.225241] Chain exists of:
                 &(&q->lock)->rlock#2 --> &(&sta->rate_ctrl_lock)->rlock --> &(&sta->lock)->rlock

[  440.225243]  Possible unsafe locking scenario:

[  440.225244]        CPU0                    CPU1
[  440.225244]        ----                    ----
[  440.225245]   lock(&(&sta->lock)->rlock);
[  440.225245]                                lock(&(&sta->rate_ctrl_lock)->rlock);
[  440.225246]                                lock(&(&sta->lock)->rlock);
[  440.225247]   lock(&(&q->lock)->rlock#2);
[  440.225248]
                *** DEADLOCK ***

[  440.225249] 5 locks held by kworker/u16:28/2362:
[  440.225250]  #0: 0000000048fcd291 ((wq_completion)phy0){+.+.}, at: process_one_work+0x1b5/0x5d0
[  440.225252]  #1: 00000000f1c6828f ((work_completion)(&sta->ampdu_mlme.work)){+.+.}, at: process_one_work+0x1b5/0x5d0
[  440.225254]  #2: 00000000433d2b2c (&sta->ampdu_mlme.mtx){+.+.}, at: ieee80211_ba_session_work+0x5c/0x2f0 [mac80211]
[  440.225265]  #3: 000000002cfedc59 (&(&sta->lock)->rlock){+.-.}, at: ieee80211_stop_tx_ba_cb+0x32/0x1f0 [mac80211]
[  440.225276]  #4: 000000009d7b9a44 (rcu_read_lock){....}, at: ieee80211_agg_start_txq+0x33/0x2b0 [mac80211]
[  440.225286]
               stack backtrace:
[  440.225288] CPU: 2 PID: 2362 Comm: kworker/u16:28 Not tainted 5.1.0-rc2+ #22
[  440.225289] Hardware name: LENOVO 20KGS23S0P/20KGS23S0P, BIOS N23ET55W (1.30 ) 08/31/2018
[  440.225300] Workqueue: phy0 ieee80211_ba_session_work [mac80211]
[  440.225301] Call Trace:
[  440.225304]  dump_stack+0x85/0xc0
[  440.225306]  print_circular_bug.isra.38.cold.58+0x15c/0x195
[  440.225307]  check_prev_add.constprop.48+0x5f0/0xc00
[  440.225309]  ? check_prev_add.constprop.48+0x39d/0xc00
[  440.225311]  ? __lock_acquire+0x41d/0x1100
[  440.225312]  __lock_acquire+0xd98/0x1100
[  440.225313]  ? __lock_acquire+0x41d/0x1100
[  440.225315]  lock_acquire+0xb9/0x1a0
[  440.225317]  ? mt76_wake_tx_queue+0x4c/0xb0 [mt76]
[  440.225319]  _raw_spin_lock_bh+0x34/0x40
[  440.225321]  ? mt76_wake_tx_queue+0x4c/0xb0 [mt76]
[  440.225323]  mt76_wake_tx_queue+0x4c/0xb0 [mt76]
[  440.225334]  ieee80211_agg_start_txq+0xe8/0x2b0 [mac80211]
[  440.225344]  ieee80211_stop_tx_ba_cb+0xb8/0x1f0 [mac80211]
[  440.225354]  ieee80211_ba_session_work+0x1c1/0x2f0 [mac80211]
[  440.225356]  process_one_work+0x237/0x5d0
[  440.225358]  worker_thread+0x3c/0x390
[  440.225359]  ? wq_calc_node_cpumask+0x70/0x70
[  440.225360]  kthread+0x11d/0x140
[  440.225362]  ? kthread_create_on_node+0x40/0x40
[  440.225363]  ret_from_fork+0x3a/0x50

Cc: stable@vger.kernel.org
Fixes: 88046b2c9f6d ("mt76: add support for reporting tx status with skb")
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index 9ed231abe9167..4fe5a83ca5a41 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -466,7 +466,6 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
 		return;
 
 	rcu_read_lock();
-	mt76_tx_status_lock(mdev, &list);
 
 	if (stat->wcid < ARRAY_SIZE(dev->mt76.wcid))
 		wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]);
@@ -479,6 +478,8 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
 					  drv_priv);
 	}
 
+	mt76_tx_status_lock(mdev, &list);
+
 	if (wcid) {
 		if (stat->pktid >= MT_PACKET_ID_FIRST)
 			status.skb = mt76_tx_status_skb_get(mdev, wcid,
@@ -498,7 +499,9 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
 		if (*update == 0 && stat_val == stat_cache &&
 		    stat->wcid == msta->status.wcid && msta->n_frames < 32) {
 			msta->n_frames++;
-			goto out;
+			mt76_tx_status_unlock(mdev, &list);
+			rcu_read_unlock();
+			return;
 		}
 
 		mt76x02_mac_fill_tx_status(dev, status.info, &msta->status,
@@ -514,11 +517,10 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
 
 	if (status.skb)
 		mt76_tx_status_skb_done(mdev, status.skb, &list);
-	else
-		ieee80211_tx_status_ext(mt76_hw(dev), &status);
-
-out:
 	mt76_tx_status_unlock(mdev, &list);
+
+	if (!status.skb)
+		ieee80211_tx_status_ext(mt76_hw(dev), &status);
 	rcu_read_unlock();
 }
 
-- 
GitLab


From d5bc73f34cc97c4b4b9202cc93182c2515076edf Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 10 Apr 2019 15:05:31 -0600
Subject: [PATCH 1044/1861] PCI: Fix issue with "pci=disable_acs_redir"
 parameter being ignored

In most cases, kmalloc() will not be available early in boot when
pci_setup() is called.  Thus, the kstrdup() call that was added to fix the
__initdata bug with the disable_acs_redir parameter usually returns NULL,
so the parameter is discarded and has no effect.

To fix this, store the string that's in initdata until an initcall function
can allocate the memory appropriately.  This way we don't need any
additional static memory.

Fixes: d2fd6e81912a ("PCI: Fix __initdata issue with "pci=disable_acs_redir" parameter")
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7c1b362f599ae..766f5779db929 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6262,8 +6262,7 @@ static int __init pci_setup(char *str)
 			} else if (!strncmp(str, "pcie_scan_all", 13)) {
 				pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
 			} else if (!strncmp(str, "disable_acs_redir=", 18)) {
-				disable_acs_redir_param =
-					kstrdup(str + 18, GFP_KERNEL);
+				disable_acs_redir_param = str + 18;
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);
@@ -6274,3 +6273,19 @@ static int __init pci_setup(char *str)
 	return 0;
 }
 early_param("pci", pci_setup);
+
+/*
+ * 'disable_acs_redir_param' is initialized in pci_setup(), above, to point
+ * to data in the __initdata section which will be freed after the init
+ * sequence is complete. We can't allocate memory in pci_setup() because some
+ * architectures do not have any memory allocation service available during
+ * an early_param() call. So we allocate memory and copy the variable here
+ * before the init section is freed.
+ */
+static int __init pci_realloc_setup_params(void)
+{
+	disable_acs_redir_param = kstrdup(disable_acs_redir_param, GFP_KERNEL);
+
+	return 0;
+}
+pure_initcall(pci_realloc_setup_params);
-- 
GitLab


From f89b9e1be7da8bb0aac667a0206a00975cefe6d3 Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Fri, 12 Apr 2019 14:10:03 +0000
Subject: [PATCH 1045/1861] clk: imx: Fix PLL_1416X not rounding rates

Code which initializes the "clk_init_data.ops" checks pll->rate_table
before that field is ever assigned to so it always picks
"clk_pll1416x_min_ops".

This breaks dynamic rate rounding for features such as cpufreq.

Fix by checking pll_clk->rate_table instead, here pll_clk refers to
the constant initialization data coming from per-soc clk driver.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Fixes: 8646d4dcc7fb ("clk: imx: Add PLLs driver for imx8mm soc")
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/imx/clk-pll14xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/imx/clk-pll14xx.c b/drivers/clk/imx/clk-pll14xx.c
index 1acfa3e3cfb40..113d71042199b 100644
--- a/drivers/clk/imx/clk-pll14xx.c
+++ b/drivers/clk/imx/clk-pll14xx.c
@@ -362,7 +362,7 @@ struct clk *imx_clk_pll14xx(const char *name, const char *parent_name,
 
 	switch (pll_clk->type) {
 	case PLL_1416X:
-		if (!pll->rate_table)
+		if (!pll_clk->rate_table)
 			init.ops = &clk_pll1416x_min_ops;
 		else
 			init.ops = &clk_pll1416x_ops;
-- 
GitLab


From 0a2c34f18c94b596562bf3d019fceab998b8b584 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 12 Apr 2019 14:45:12 +0100
Subject: [PATCH 1046/1861] vxge: fix return of a free'd memblock on a failed
 dma mapping

Currently if a pci dma mapping failure is detected a free'd
memblock address is returned rather than a NULL (that indicates
an error). Fix this by ensuring NULL is returned on this error case.

Addresses-Coverity: ("Use after free")
Fixes: 528f727279ae ("vxge: code cleanup and reorganization")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/neterion/vxge/vxge-config.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c
index 7cde387e5ec62..51cd57ab3d958 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c
@@ -2366,6 +2366,7 @@ static void *__vxge_hw_blockpool_malloc(struct __vxge_hw_device *devh, u32 size,
 				dma_object->addr))) {
 			vxge_os_dma_free(devh->pdev, memblock,
 				&dma_object->acc_handle);
+			memblock = NULL;
 			goto exit;
 		}
 
-- 
GitLab


From 1dc2b3d65523780ed1972d446c76e62e13f3e8f5 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 12 Apr 2019 15:13:27 +0100
Subject: [PATCH 1047/1861] qede: fix write to free'd pointer error and double
 free of ptp

The err2 error return path calls qede_ptp_disable that cleans up
on an error and frees ptp. After this, the free'd ptp is dereferenced
when ptp->clock is set to NULL and the code falls-through to error
path err1 that frees ptp again.

Fix this by calling qede_ptp_disable and exiting via an error
return path that does not set ptp->clock or kfree ptp.

Addresses-Coverity: ("Write to pointer after free")
Fixes: 035744975aec ("qede: Add support for PTP resource locking.")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_ptp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index 5f3f42a253616..bddb2b5982dcf 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -490,18 +490,17 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc)
 
 	ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev);
 	if (IS_ERR(ptp->clock)) {
-		rc = -EINVAL;
 		DP_ERR(edev, "PTP clock registration failed\n");
+		qede_ptp_disable(edev);
+		rc = -EINVAL;
 		goto err2;
 	}
 
 	return 0;
 
-err2:
-	qede_ptp_disable(edev);
-	ptp->clock = NULL;
 err1:
 	kfree(ptp);
+err2:
 	edev->ptp = NULL;
 
 	return rc;
-- 
GitLab


From 56d282d9db19f85f759b7a81f0829b58c00571b0 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 12 Apr 2019 16:33:40 +0100
Subject: [PATCH 1048/1861] rxrpc: Clear socket error

When an ICMP or ICMPV6 error is received, the error will be attached
to the socket (sk_err) and the report function will get called.
Clear any pending error here by calling sock_error().

This would cause the following attempt to use the socket to fail with
the error code stored by the ICMP error, resulting in unexpected errors
with various side effects depending on the context.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Jonathan Billings <jsbillin@umich.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/peer_event.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index bc05af89fc381..6e84d878053c7 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -157,6 +157,11 @@ void rxrpc_error_report(struct sock *sk)
 
 	_enter("%p{%d}", sk, local->debug_id);
 
+	/* Clear the outstanding error value on the socket so that it doesn't
+	 * cause kernel_sendmsg() to return it later.
+	 */
+	sock_error(sk);
+
 	skb = sock_dequeue_err_skb(sk);
 	if (!skb) {
 		_leave("UDP socket errqueue empty");
-- 
GitLab


From 4611da30d679a4b0a2c2b5d4d7b3fbbafc922df7 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 12 Apr 2019 16:33:47 +0100
Subject: [PATCH 1049/1861] rxrpc: Make rxrpc_kernel_check_life() indicate if
 call completed

Make rxrpc_kernel_check_life() pass back the life counter through the
argument list and return true if the call has not yet completed.

Suggested-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rxrpc.txt | 16 +++++++++-------
 fs/afs/rxrpc.c                     |  4 ++--
 include/net/af_rxrpc.h             |  4 +++-
 net/rxrpc/af_rxrpc.c               | 14 +++++++++-----
 4 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 2df5894353d69..cd7303d7fa25d 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -1009,16 +1009,18 @@ The kernel interface functions are as follows:
 
  (*) Check call still alive.
 
-	u32 rxrpc_kernel_check_life(struct socket *sock,
-				    struct rxrpc_call *call);
+	bool rxrpc_kernel_check_life(struct socket *sock,
+				     struct rxrpc_call *call,
+				     u32 *_life);
 	void rxrpc_kernel_probe_life(struct socket *sock,
 				     struct rxrpc_call *call);
 
-     The first function returns a number that is updated when ACKs are received
-     from the peer (notably including PING RESPONSE ACKs which we can elicit by
-     sending PING ACKs to see if the call still exists on the server).  The
-     caller should compare the numbers of two calls to see if the call is still
-     alive after waiting for a suitable interval.
+     The first function passes back in *_life a number that is updated when
+     ACKs are received from the peer (notably including PING RESPONSE ACKs
+     which we can elicit by sending PING ACKs to see if the call still exists
+     on the server).  The caller should compare the numbers of two calls to see
+     if the call is still alive after waiting for a suitable interval.  It also
+     returns true as long as the call hasn't yet reached the completed state.
 
      This allows the caller to work out if the server is still contactable and
      if the call is still alive on the server while waiting for the server to
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 2c588f9bbbda2..5cb11aff92980 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -621,7 +621,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 		rtt2 = 2;
 
 	timeout = rtt2;
-	last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+	rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life);
 
 	add_wait_queue(&call->waitq, &myself);
 	for (;;) {
@@ -639,7 +639,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 		if (afs_check_call_state(call, AFS_CALL_COMPLETE))
 			break;
 
-		life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+		rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life);
 		if (timeout == 0 &&
 		    life == last_life && signal_pending(current)) {
 			if (stalled)
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 2bfb87eb98ce1..78c856cba4f53 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -61,10 +61,12 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
 			       rxrpc_user_attach_call_t, unsigned long, gfp_t,
 			       unsigned int);
 void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
+			     u32 *);
 void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
 u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
 bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
 				 ktime_t *);
+bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
 
 #endif /* _NET_RXRPC_H */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c54dce3ca0dd0..ae8c5d7f3bf1e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -371,18 +371,22 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
  * rxrpc_kernel_check_life - Check to see whether a call is still alive
  * @sock: The socket the call is on
  * @call: The call to check
+ * @_life: Where to store the life value
  *
  * Allow a kernel service to find out whether a call is still alive - ie. we're
- * getting ACKs from the server.  Returns a number representing the life state
- * which can be compared to that returned by a previous call.
+ * getting ACKs from the server.  Passes back in *_life a number representing
+ * the life state which can be compared to that returned by a previous call and
+ * return true if the call is still alive.
  *
  * If the life state stalls, rxrpc_kernel_probe_life() should be called and
  * then 2RTT waited.
  */
-u32 rxrpc_kernel_check_life(const struct socket *sock,
-			    const struct rxrpc_call *call)
+bool rxrpc_kernel_check_life(const struct socket *sock,
+			     const struct rxrpc_call *call,
+			     u32 *_life)
 {
-	return call->acks_latest;
+	*_life = call->acks_latest;
+	return call->state != RXRPC_CALL_COMPLETE;
 }
 EXPORT_SYMBOL(rxrpc_kernel_check_life);
 
-- 
GitLab


From 8e8715aaa905f6593f610f950d513e81fab5006a Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 12 Apr 2019 16:33:54 +0100
Subject: [PATCH 1050/1861] rxrpc: Allow errors to be returned from
 rxrpc_queue_packet()

Change rxrpc_queue_packet()'s signature so that it can return any error
code it may encounter when trying to send the packet.

This allows the caller to eventually do something in case of error - though
it should be noted that the packet has been queued and a resend is
scheduled.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/sendmsg.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 46c9312085b1b..bec64deb7b0a2 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -152,12 +152,13 @@ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call,
 }
 
 /*
- * Queue a DATA packet for transmission, set the resend timeout and send the
- * packet immediately
+ * Queue a DATA packet for transmission, set the resend timeout and send
+ * the packet immediately.  Returns the error from rxrpc_send_data_packet()
+ * in case the caller wants to do something with it.
  */
-static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
-			       struct sk_buff *skb, bool last,
-			       rxrpc_notify_end_tx_t notify_end_tx)
+static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+			      struct sk_buff *skb, bool last,
+			      rxrpc_notify_end_tx_t notify_end_tx)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	unsigned long now;
@@ -250,7 +251,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 
 out:
 	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
-	_leave("");
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
@@ -423,9 +425,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 			if (ret < 0)
 				goto out;
 
-			rxrpc_queue_packet(rx, call, skb,
-					   !msg_data_left(msg) && !more,
-					   notify_end_tx);
+			ret = rxrpc_queue_packet(rx, call, skb,
+						 !msg_data_left(msg) && !more,
+						 notify_end_tx);
+			/* Should check for failure here */
 			skb = NULL;
 		}
 	} while (msg_data_left(msg) > 0);
-- 
GitLab


From f7f1dd3162efc7ffdbcdb9da1ad1599f8ab51296 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 12 Apr 2019 16:34:02 +0100
Subject: [PATCH 1051/1861] afs: Check for rxrpc call completion in wait loop

Check the state of the rxrpc call backing an afs call in each iteration of
the call wait loop in case the rxrpc call has already been terminated at
the rxrpc layer.

Interrupt the wait loop and mark the afs call as complete if the rxrpc
layer call is complete.

There were cases where rxrpc errors were not passed up to afs, which could
result in this loop waiting forever for an afs call to transition to
AFS_CALL_COMPLETE while the rx call was already complete.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/rxrpc.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 5cb11aff92980..c14001b42d200 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -610,6 +610,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 	bool stalled = false;
 	u64 rtt;
 	u32 life, last_life;
+	bool rxrpc_complete = false;
 
 	DECLARE_WAITQUEUE(myself, current);
 
@@ -639,7 +640,12 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 		if (afs_check_call_state(call, AFS_CALL_COMPLETE))
 			break;
 
-		rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life);
+		if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) {
+			/* rxrpc terminated the call. */
+			rxrpc_complete = true;
+			break;
+		}
+
 		if (timeout == 0 &&
 		    life == last_life && signal_pending(current)) {
 			if (stalled)
@@ -663,12 +669,16 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 	remove_wait_queue(&call->waitq, &myself);
 	__set_current_state(TASK_RUNNING);
 
-	/* Kill off the call if it's still live. */
 	if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
-		_debug("call interrupted");
-		if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-					    RX_USER_ABORT, -EINTR, "KWI"))
-			afs_set_call_complete(call, -EINTR, 0);
+		if (rxrpc_complete) {
+			afs_set_call_complete(call, call->error, call->abort_code);
+		} else {
+			/* Kill off the call if it's still live. */
+			_debug("call interrupted");
+			if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+						    RX_USER_ABORT, -EINTR, "KWI"))
+				afs_set_call_complete(call, -EINTR, 0);
+		}
 	}
 
 	spin_lock_bh(&call->state_lock);
-- 
GitLab


From 39ce67557568962fa9d1593741f76c4cc6762469 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2019 16:34:09 +0100
Subject: [PATCH 1052/1861] rxrpc: Trace received connection aborts

Trace received calls that are aborted due to a connection abort, typically
because of authentication failure.  Without this, connection aborts don't
show up in the trace log.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/conn_event.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index b6fca8ebb1173..8d31fb4c51e17 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -153,7 +153,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
  * pass a connection-level abort onto all calls on that connection
  */
 static void rxrpc_abort_calls(struct rxrpc_connection *conn,
-			      enum rxrpc_call_completion compl)
+			      enum rxrpc_call_completion compl,
+			      rxrpc_serial_t serial)
 {
 	struct rxrpc_call *call;
 	int i;
@@ -173,6 +174,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
 						  call->call_id, 0,
 						  conn->abort_code,
 						  conn->error);
+			else
+				trace_rxrpc_rx_abort(call, serial,
+						     conn->abort_code);
 			if (rxrpc_set_call_completion(call, compl,
 						      conn->abort_code,
 						      conn->error))
@@ -213,8 +217,6 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 	conn->state = RXRPC_CONN_LOCALLY_ABORTED;
 	spin_unlock_bh(&conn->state_lock);
 
-	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED);
-
 	msg.msg_name	= &conn->params.peer->srx.transport;
 	msg.msg_namelen	= conn->params.peer->srx.transport_len;
 	msg.msg_control	= NULL;
@@ -242,6 +244,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 	len = iov[0].iov_len + iov[1].iov_len;
 
 	serial = atomic_inc_return(&conn->serial);
+	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
 	whdr.serial = htonl(serial);
 	_proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code);
 
@@ -321,7 +324,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 		conn->error = -ECONNABORTED;
 		conn->abort_code = abort_code;
 		conn->state = RXRPC_CONN_REMOTELY_ABORTED;
-		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED);
+		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
 		return -ECONNABORTED;
 
 	case RXRPC_PACKET_TYPE_CHALLENGE:
-- 
GitLab


From 1a2391c30c0b9d041bc340f68df81d49c53546cc Mon Sep 17 00:00:00 2001
From: Jeffrey Altman <jaltman@auristor.com>
Date: Fri, 12 Apr 2019 16:34:16 +0100
Subject: [PATCH 1053/1861] rxrpc: Fix detection of out of order acks

The rxrpc packet serial number cannot be safely used to compute out of
order ack packets for several reasons:

 1. The allocation of serial numbers cannot be assumed to imply the order
    by which acks are populated and transmitted.  In some rxrpc
    implementations, delayed acks and ping acks are transmitted
    asynchronously to the receipt of data packets and so may be transmitted
    out of order.  As a result, they can race with idle acks.

 2. Serial numbers are allocated by the rxrpc connection and not the call
    and as such may wrap independently if multiple channels are in use.

In any case, what matters is whether the ack packet provides new
information relating to the bounds of the window (the firstPacket and
previousPacket in the ACK data).

Fix this by discarding packets that appear to wind back the window bounds
rather than on serial number procession.

Fixes: 298bc15b2079 ("rxrpc: Only take the rwind and mtu values from latest ACK")
Signed-off-by: Jeffrey Altman <jaltman@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-internal.h |  1 +
 net/rxrpc/input.c       | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 4b1a534d290a7..062ca9dc29b8a 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -654,6 +654,7 @@ struct rxrpc_call {
 	u8			ackr_reason;	/* reason to ACK */
 	u16			ackr_skew;	/* skew on packet being ACK'd */
 	rxrpc_serial_t		ackr_serial;	/* serial of packet being ACK'd */
+	rxrpc_serial_t		ackr_first_seq;	/* first sequence number received */
 	rxrpc_seq_t		ackr_prev_seq;	/* previous sequence number received */
 	rxrpc_seq_t		ackr_consumed;	/* Highest packet shown consumed */
 	rxrpc_seq_t		ackr_seen;	/* Highest packet shown seen */
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 9128aa0e40aac..4c6f9d0a00e79 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -837,7 +837,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
 		u8 acks[RXRPC_MAXACKS];
 	} buf;
 	rxrpc_serial_t acked_serial;
-	rxrpc_seq_t first_soft_ack, hard_ack;
+	rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
 	int nr_acks, offset, ioffset;
 
 	_enter("");
@@ -851,13 +851,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
 
 	acked_serial = ntohl(buf.ack.serial);
 	first_soft_ack = ntohl(buf.ack.firstPacket);
+	prev_pkt = ntohl(buf.ack.previousPacket);
 	hard_ack = first_soft_ack - 1;
 	nr_acks = buf.ack.nAcks;
 	summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
 			      buf.ack.reason : RXRPC_ACK__INVALID);
 
 	trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial,
-			   first_soft_ack, ntohl(buf.ack.previousPacket),
+			   first_soft_ack, prev_pkt,
 			   summary.ack_reason, nr_acks);
 
 	if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
@@ -878,8 +879,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
 				  rxrpc_propose_ack_respond_to_ack);
 	}
 
-	/* Discard any out-of-order or duplicate ACKs. */
-	if (before_eq(sp->hdr.serial, call->acks_latest))
+	/* Discard any out-of-order or duplicate ACKs (outside lock). */
+	if (before(first_soft_ack, call->ackr_first_seq) ||
+	    before(prev_pkt, call->ackr_prev_seq))
 		return;
 
 	buf.info.rxMTU = 0;
@@ -890,12 +892,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
 
 	spin_lock(&call->input_lock);
 
-	/* Discard any out-of-order or duplicate ACKs. */
-	if (before_eq(sp->hdr.serial, call->acks_latest))
+	/* Discard any out-of-order or duplicate ACKs (inside lock). */
+	if (before(first_soft_ack, call->ackr_first_seq) ||
+	    before(prev_pkt, call->ackr_prev_seq))
 		goto out;
 	call->acks_latest_ts = skb->tstamp;
 	call->acks_latest = sp->hdr.serial;
 
+	call->ackr_first_seq = first_soft_ack;
+	call->ackr_prev_seq = prev_pkt;
+
 	/* Parse rwind and mtu sizes if provided. */
 	if (buf.info.rxMTU)
 		rxrpc_input_ackinfo(call, skb, &buf.info);
-- 
GitLab


From ed0de45a1008991fdaa27a0152befcb74d126a8b Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Fri, 12 Apr 2019 16:19:27 -0400
Subject: [PATCH 1054/1861] ipv4: recompile ip options in ipv4_link_failure

Recompile IP options since IPCB may not be valid anymore when
ipv4_link_failure is called from arp_error_report.

Refer to the commit 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error")
and the commit before that (9ef6b42ad6fd) for a similar issue.

Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a5da63e5faa2d..0206789bc2b73 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1186,8 +1186,16 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 static void ipv4_link_failure(struct sk_buff *skb)
 {
 	struct rtable *rt;
+	struct ip_options opt;
 
-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+	/* Recompile ip options since IPCB may not be valid anymore.
+	 */
+	memset(&opt, 0, sizeof(opt));
+	opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+	if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL))
+		return;
+
+	__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
 
 	rt = skb_rtable(skb);
 	if (rt)
-- 
GitLab


From ba25b81e3a420f8345585029d49ee32e73de9d5f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 13 Apr 2019 08:37:36 +0100
Subject: [PATCH 1055/1861] afs: avoid deprecated get_seconds()

get_seconds() has a limited range on 32-bit architectures and is
deprecated because of that. While AFS uses the same limits for
its inode timestamps on the wire protocol, let's just use the
simpler current_time() as we do for other file systems.

This will still zero out the 'tv_nsec' field of the timestamps
internally.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1a4ce07fb406d..9cedc3fc1b774 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -216,9 +216,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
 	set_nlink(inode, 2);
 	inode->i_uid		= GLOBAL_ROOT_UID;
 	inode->i_gid		= GLOBAL_ROOT_GID;
-	inode->i_ctime.tv_sec	= get_seconds();
-	inode->i_ctime.tv_nsec	= 0;
-	inode->i_atime		= inode->i_mtime = inode->i_ctime;
+	inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
 	inode->i_blocks		= 0;
 	inode_set_iversion_raw(inode, 0);
 	inode->i_generation	= 0;
-- 
GitLab


From d2abfa86ff373bd00634a656c7ad5531747f2bf8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 13 Apr 2019 08:37:36 +0100
Subject: [PATCH 1056/1861] afs: Avoid section confusion in CM_NAME

__tracepoint_str cannot be const because the tracepoint_str
section is not read-only. Remove the stray const.

Cc: dhowells@redhat.com
Cc: viro@zeniv.linux.org.uk
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 fs/afs/cmservice.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 8ee5972893ed5..2f8acb4c556d2 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -34,7 +34,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
 static int afs_deliver_yfs_cb_callback(struct afs_call *);
 
 #define CM_NAME(name) \
-	const char afs_SRXCB##name##_name[] __tracepoint_string =	\
+	char afs_SRXCB##name##_name[] __tracepoint_string =	\
 		"CB." #name
 
 /*
-- 
GitLab


From 8022c4b95c3793d7ba28ab0701ea15b5deb46e02 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 13 Apr 2019 08:37:37 +0100
Subject: [PATCH 1057/1861] afs: Differentiate abort due to unmarshalling from
 other errors

Differentiate an abort due to an unmarshalling error from an abort due to
other errors, such as ENETUNREACH.  It doesn't make sense to set abort code
RXGEN_*_UNMARSHAL in such a case, so use RX_USER_ABORT instead.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index c14001b42d200..15c7e82d80cb3 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -572,13 +572,17 @@ static void afs_deliver_to_call(struct afs_call *call)
 		case -ENODATA:
 		case -EBADMSG:
 		case -EMSGSIZE:
-		default:
 			abort_code = RXGEN_CC_UNMARSHAL;
 			if (state != AFS_CALL_CL_AWAIT_REPLY)
 				abort_code = RXGEN_SS_UNMARSHAL;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 						abort_code, ret, "KUM");
 			goto local_abort;
+		default:
+			abort_code = RX_USER_ABORT;
+			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+						abort_code, ret, "KER");
+			goto local_abort;
 		}
 	}
 
-- 
GitLab


From 21bd68f196ca91fc0f3d9bd1b32f6e530e8c1c88 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Sat, 13 Apr 2019 08:37:37 +0100
Subject: [PATCH 1058/1861] afs: Unlock pages for __pagevec_release()

__pagevec_release() complains loudly if any page in the vector is still
locked.  The pages need to be locked for generic_error_remove_page(), but
that function doesn't actually unlock them.

Unlock the pages afterwards.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Jonathan Billings <jsbillin@umich.edu>
---
 fs/afs/write.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 72efcfcf9f95e..0122d7445fba1 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -264,6 +264,7 @@ static void afs_kill_pages(struct address_space *mapping,
 				first = page->index + 1;
 			lock_page(page);
 			generic_error_remove_page(mapping, page);
+			unlock_page(page);
 		}
 
 		__pagevec_release(&pv);
-- 
GitLab


From eeba1e9cf31d064284dd1fa7bd6cfe01395bd03d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 13 Apr 2019 08:37:37 +0100
Subject: [PATCH 1059/1861] afs: Fix in-progess ops to ignore server-level
 callback invalidation

The in-kernel afs filesystem client counts the number of server-level
callback invalidation events (CB.InitCallBackState* RPC operations) that it
receives from the server.  This is stored in cb_s_break in various
structures, including afs_server and afs_vnode.

If an inode is examined by afs_validate(), say, the afs_server copy is
compared, along with other break counters, to those in afs_vnode, and if
one or more of the counters do not match, it is considered that the
server's callback promise is broken.  At points where this happens,
AFS_VNODE_CB_PROMISED is cleared to indicate that the status must be
refetched from the server.

afs_validate() issues an FS.FetchStatus operation to get updated metadata -
and based on the updated data_version may invalidate the pagecache too.

However, the break counters are also used to determine whether to note a
new callback in the vnode (which would set the AFS_VNODE_CB_PROMISED flag)
and whether to cache the permit data included in the YFSFetchStatus record
by the server.


The problem comes when the server sends us a CB.InitCallBackState op.  The
first such instance doesn't cause cb_s_break to be incremented, but rather
causes AFS_SERVER_FL_NEW to be cleared - but thereafter, say some hours
after last use and all the volumes have been automatically unmounted and
the server has forgotten about the client[*], this *will* likely cause an
increment.

 [*] There are other circumstances too, such as the server restarting or
     needing to make space in its callback table.

Note that the server won't send us a CB.InitCallBackState op until we talk
to it again.

So what happens is:

 (1) A mount for a new volume is attempted, a inode is created for the root
     vnode and vnode->cb_s_break and AFS_VNODE_CB_PROMISED aren't set
     immediately, as we don't have a nominated server to talk to yet - and
     we may iterate through a few to find one.

 (2) Before the operation happens, afs_fetch_status(), say, notes in the
     cursor (fc.cb_break) the break counter sum from the vnode, volume and
     server counters, but the server->cb_s_break is currently 0.

 (3) We send FS.FetchStatus to the server.  The server sends us back
     CB.InitCallBackState.  We increment server->cb_s_break.

 (4) Our FS.FetchStatus completes.  The reply includes a callback record.

 (5) xdr_decode_AFSCallBack()/xdr_decode_YFSCallBack() check to see whether
     the callback promise was broken by checking the break counter sum from
     step (2) against the current sum.

     This fails because of step (3), so we don't set the callback record
     and, importantly, don't set AFS_VNODE_CB_PROMISED on the vnode.

This does not preclude the syscall from progressing, and we don't loop here
rechecking the status, but rather assume it's good enough for one round
only and will need to be rechecked next time.

 (6) afs_validate() it triggered on the vnode, probably called from
     d_revalidate() checking the parent directory.

 (7) afs_validate() notes that AFS_VNODE_CB_PROMISED isn't set, so doesn't
     update vnode->cb_s_break and assumes the vnode to be invalid.

 (8) afs_validate() needs to calls afs_fetch_status().  Go back to step (2)
     and repeat, every time the vnode is validated.

This primarily affects volume root dir vnodes.  Everything subsequent to
those inherit an already incremented cb_s_break upon mounting.


The issue is that we assume that the callback record and the cached permit
information in a reply from the server can't be trusted after getting a
server break - but this is wrong since the server makes sure things are
done in the right order, holding up our ops if necessary[*].

 [*] There is an extremely unlikely scenario where a reply from before the
     CB.InitCallBackState could get its delivery deferred till after - at
     which point we think we have a promise when we don't.  This, however,
     requires unlucky mass packet loss to one call.

AFS_SERVER_FL_NEW tries to paper over the cracks for the initial mount from
a server we've never contacted before, but this should be unnecessary.
It's also further insulated from the problem on an initial mount by
querying the server first with FS.GetCapabilities, which triggers the
CB.InitCallBackState.


Fix this by

 (1) Remove AFS_SERVER_FL_NEW.

 (2) In afs_calc_vnode_cb_break(), don't include cb_s_break in the
     calculation.

 (3) In afs_cb_is_broken(), don't include cb_s_break in the check.


Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/callback.c | 3 +--
 fs/afs/internal.h | 4 +---
 fs/afs/server.c   | 1 -
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 1c7955f5cdaf2..128f2dbe256a4 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -203,8 +203,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
  */
 void afs_init_callback_state(struct afs_server *server)
 {
-	if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
-		server->cb_s_break++;
+	server->cb_s_break++;
 }
 
 /*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index bb1f244b2b3ac..3904ab0b95632 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -474,7 +474,6 @@ struct afs_server {
 	time64_t		put_time;	/* Time at which last put */
 	time64_t		update_at;	/* Time at which to next update the record */
 	unsigned long		flags;
-#define AFS_SERVER_FL_NEW	0		/* New server, don't inc cb_s_break */
 #define AFS_SERVER_FL_NOT_READY	1		/* The record is not ready for use */
 #define AFS_SERVER_FL_NOT_FOUND	2		/* VL server says no such server */
 #define AFS_SERVER_FL_VL_FAIL	3		/* Failed to access VL server */
@@ -827,7 +826,7 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest
 
 static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
 {
-	return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+	return vnode->cb_break + vnode->cb_v_break;
 }
 
 static inline bool afs_cb_is_broken(unsigned int cb_break,
@@ -835,7 +834,6 @@ static inline bool afs_cb_is_broken(unsigned int cb_break,
 				    const struct afs_cb_interest *cbi)
 {
 	return !cbi || cb_break != (vnode->cb_break +
-				    cbi->server->cb_s_break +
 				    vnode->volume->cb_v_break);
 }
 
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 642afa2e9783c..65b33b6da48b9 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -226,7 +226,6 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
 	RCU_INIT_POINTER(server->addresses, alist);
 	server->addr_version = alist->version;
 	server->uuid = *uuid;
-	server->flags = (1UL << AFS_SERVER_FL_NEW);
 	server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
 	rwlock_init(&server->fs_lock);
 	INIT_HLIST_HEAD(&server->cb_volumes);
-- 
GitLab


From 183ab39eb0ea9879bb68422a83e65f750f3192f0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sat, 13 Apr 2019 10:04:49 +0200
Subject: [PATCH 1060/1861] ALSA: hda: Initialize power_state field properly

The recent commit 98081ca62cba ("ALSA: hda - Record the current power
state before suspend/resume calls") made the HD-audio driver to store
the PM state in power_state field.  This forgot, however, the
initialization at power up.  Although the codec drivers usually don't
need to refer to this field in the normal operation, let's initialize
it properly for consistency.

Fixes: 98081ca62cba ("ALSA: hda - Record the current power state before suspend/resume calls")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_codec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index ec0b8595eb4da..701a69d856f5f 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -969,6 +969,7 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card,
 
 	/* power-up all before initialization */
 	hda_set_power_state(codec, AC_PWRST_D0);
+	codec->core.dev.power.power_state = PMSG_ON;
 
 	snd_hda_codec_proc_new(codec);
 
-- 
GitLab


From becf2319f320cae43e20cf179cc51a355a0deb5f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Mar 2019 23:11:53 +0100
Subject: [PATCH 1061/1861] selftests: netfilter: check icmp pkttoobig errors
 are set as related

When an icmp error such as pkttoobig is received, conntrack checks
if the "inner" header (header of packet that did not fit link mtu)
is matches an existing connection, and, if so, sets that packet as
being related to the conntrack entry it found.

It was recently reported that this "related" setting also works
if the inner header is from another, different connection (i.e.,
artificial/forged icmp error).

Add a test, followup patch will add additional "inner dst matches
outer dst in reverse direction" check before setting related state.

Link: https://www.synacktiv.com/posts/systems/icmp-reachable.html
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/Makefile    |   2 +-
 .../netfilter/conntrack_icmp_related.sh       | 283 ++++++++++++++++++
 2 files changed, 284 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/conntrack_icmp_related.sh

diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index c9ff2b47bd1ca..a37cb1192c6a6 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for netfilter selftests
 
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
new file mode 100755
index 0000000000000..b48e1833bc896
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+cleanup() {
+	for i in 1 2;do ip netns del nsclient$i;done
+	for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+    echo -n 192.168.$1.2
+}
+
+ipv6 () {
+    echo -n dead:$1::2
+}
+
+check_counter()
+{
+	ns=$1
+	name=$2
+	expect=$3
+	local lret=0
+
+	cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+		ip netns exec $ns nft list counter inet filter "$name" 1>&2
+		lret=1
+	fi
+
+	return $lret
+}
+
+check_unknown()
+{
+	expect="packets 0 bytes 0"
+	for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+		check_counter $n "unknown" "$expect"
+		if [ $? -ne 0 ] ;then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+  ip netns add $n
+  ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+    ip -net nsclient$i link set $DEV up
+    ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+    ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+	ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+	ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter related { }
+	chain forward {
+		type filter hook forward priority 0; policy accept;
+		meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+		meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+		meta l4proto { icmp, icmpv6 } ct state new,established accept
+		counter name "unknown" drop
+	}
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter related { }
+	chain input {
+		type filter hook input priority 0; policy accept;
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+		meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+		counter name "unknown" drop
+	}
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter new { }
+	counter established { }
+
+	chain input {
+		type filter hook input priority 0; policy accept;
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+		meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+		meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+		counter name "unknown" drop
+	}
+	chain output {
+		type filter hook output priority 0; policy accept;
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+		meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+		meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+		counter name "unknown" drop
+	}
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		ip protocol icmp oifname "veth0" counter masquerade
+	}
+}
+table ip6 nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+	}
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1  mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+	echo "ERROR: netns ip routing/connectivity broken" 1>&2
+	cleanup
+	exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+	echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+	cleanup
+	exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+	ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+	check_counter "$netns" "related" "$expect"
+	if [ $? -ne 0 ]; then
+		ret=1
+	fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+	ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+	echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+	ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+	ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+	check_counter "$netns" "related" "$expect"
+	if [ $? -ne 0 ]; then
+		ret=1
+	fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+	echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+	ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+	check_counter "$netns" "related" "$expect"
+	if [ $? -ne 0 ]; then
+		ret=1
+	fi
+done
+
+if [ $ret -eq 0 ];then
+	echo "PASS: icmp mtu error had RELATED state"
+else
+	echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret
-- 
GitLab


From 1025ce75212bf06d93910297a03ed6a4d41d8213 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Mar 2019 23:11:54 +0100
Subject: [PATCH 1062/1861] netfilter: conntrack: don't set related state for
 different outer address

Luca Moro says:
 ------
The issue lies in the filtering of ICMP and ICMPv6 errors that include an
inner IP datagram.
For these packets, icmp_error_message() extract the ICMP error and inner
layer to search of a known state.
If a state is found the packet is tagged as related (IP_CT_RELATED).

The problem is that there is no correlation check between the inner and
outer layer of the packet.
So one can encapsulate an error with an inner layer matching a known state,
while its outer layer is directed to a filtered host.
In this case the whole packet will be tagged as related.
This has various implications from a rule bypass (if a rule to related
trafic is allow), to a known state oracle.

Unfortunately, we could not find a real statement in a RFC on how this case
should be filtered.
The closest we found is RFC5927 (Section 4.3) but it is not very clear.

A possible fix would be to check that the inner IP source is the same than
the outer destination.

We believed this kind of attack was not documented yet, so we started to
write a blog post about it.
You can find it attached to this mail (sorry for the extract quality).
It contains more technical details, PoC and discussion about the identified
behavior.
We discovered later that
https://www.gont.com.ar/papers/filtering-of-icmp-error-messages.pdf
described a similar attack concept in 2004 but without the stateful
filtering in mind.
 -----

This implements above suggested fix:
In icmp(v6) error handler, take outer destination address, then pass
that into the common function that does the "related" association.

After obtaining the nf_conn of the matching inner-headers connection,
check that the destination address of the opposite direction tuple
is the same as the outer address and only set RELATED if thats the case.

Reported-by: Luca Moro <luca.moro@synacktiv.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h |  6 ++
 net/netfilter/nf_conntrack_proto_icmp.c      | 93 +++++++++++++++-----
 net/netfilter/nf_conntrack_proto_icmpv6.c    | 52 ++---------
 3 files changed, 84 insertions(+), 67 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 778087591983d..a49edfdf47e83 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -75,6 +75,12 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
 bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
 				      const struct nf_conntrack_tuple *orig);
 
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+			    unsigned int dataoff,
+			    const struct nf_hook_state *state,
+			    u8 l4proto,
+			    union nf_inet_addr *outer_daddr);
+
 int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
 			      struct sk_buff *skb,
 			      unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 7df477996b164..9becac9535873 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -103,49 +103,94 @@ int nf_conntrack_icmp_packet(struct nf_conn *ct,
 	return NF_ACCEPT;
 }
 
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-static int
-icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
-		   const struct nf_hook_state *state)
+/* Check inner header is related to any of the existing connections */
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+			    unsigned int dataoff,
+			    const struct nf_hook_state *state,
+			    u8 l4proto, union nf_inet_addr *outer_daddr)
 {
 	struct nf_conntrack_tuple innertuple, origtuple;
 	const struct nf_conntrack_tuple_hash *h;
 	const struct nf_conntrack_zone *zone;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
+	union nf_inet_addr *ct_daddr;
+	enum ip_conntrack_dir dir;
+	struct nf_conn *ct;
 
 	WARN_ON(skb_nfct(skb));
 	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
 
 	/* Are they talking about one of our connections? */
-	if (!nf_ct_get_tuplepr(skb,
-			       skb_network_offset(skb) + ip_hdrlen(skb)
-						       + sizeof(struct icmphdr),
-			       PF_INET, state->net, &origtuple)) {
-		pr_debug("icmp_error_message: failed to get tuple\n");
+	if (!nf_ct_get_tuplepr(skb, dataoff,
+			       state->pf, state->net, &origtuple))
 		return -NF_ACCEPT;
-	}
 
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
 	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&innertuple, &origtuple)) {
-		pr_debug("icmp_error_message: no match\n");
+	if (!nf_ct_invert_tuple(&innertuple, &origtuple))
 		return -NF_ACCEPT;
-	}
-
-	ctinfo = IP_CT_RELATED;
 
 	h = nf_conntrack_find_get(state->net, zone, &innertuple);
-	if (!h) {
-		pr_debug("icmp_error_message: no match\n");
+	if (!h)
+		return -NF_ACCEPT;
+
+	/* Consider: A -> T (=This machine) -> B
+	 *   Conntrack entry will look like this:
+	 *      Original:  A->B
+	 *      Reply:     B->T (SNAT case) OR A
+	 *
+	 * When this function runs, we got packet that looks like this:
+	 * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..).
+	 *
+	 * Above nf_conntrack_find_get() makes lookup based on inner_hdr,
+	 * so we should expect that destination of the found connection
+	 * matches outer header destination address.
+	 *
+	 * In above example, we can consider these two cases:
+	 *  1. Error coming in reply direction from B or M (middle box) to
+	 *     T (SNAT case) or A.
+	 *     Inner saddr will be B, dst will be T or A.
+	 *     The found conntrack will be reply tuple (B->T/A).
+	 *  2. Error coming in original direction from A or M to B.
+	 *     Inner saddr will be A, inner daddr will be B.
+	 *     The found conntrack will be original tuple (A->B).
+	 *
+	 * In both cases, conntrack[dir].dst == inner.dst.
+	 *
+	 * A bogus packet could look like this:
+	 *   Inner: B->T
+	 *   Outer: B->X (other machine reachable by T).
+	 *
+	 * In this case, lookup yields connection A->B and will
+	 * set packet from B->X as *RELATED*, even though no connection
+	 * from X was ever seen.
+	 */
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	dir = NF_CT_DIRECTION(h);
+	ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
+	if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
+		if (state->pf == AF_INET) {
+			nf_l4proto_log_invalid(skb, state->net, state->pf,
+					       l4proto,
+					       "outer daddr %pI4 != inner %pI4",
+					       &outer_daddr->ip, &ct_daddr->ip);
+		} else if (state->pf == AF_INET6) {
+			nf_l4proto_log_invalid(skb, state->net, state->pf,
+					       l4proto,
+					       "outer daddr %pI6 != inner %pI6",
+					       &outer_daddr->ip6, &ct_daddr->ip6);
+		}
+		nf_ct_put(ct);
 		return -NF_ACCEPT;
 	}
 
-	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+	ctinfo = IP_CT_RELATED;
+	if (dir == IP_CT_DIR_REPLY)
 		ctinfo += IP_CT_IS_REPLY;
 
 	/* Update skb to refer to this connection */
-	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+	nf_ct_set(skb, ct, ctinfo);
 	return NF_ACCEPT;
 }
 
@@ -162,11 +207,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
 			      struct sk_buff *skb, unsigned int dataoff,
 			      const struct nf_hook_state *state)
 {
+	union nf_inet_addr outer_daddr;
 	const struct icmphdr *icmph;
 	struct icmphdr _ih;
 
 	/* Not enough header? */
-	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+	icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
 	if (icmph == NULL) {
 		icmp_error_log(skb, state, "short packet");
 		return -NF_ACCEPT;
@@ -199,7 +245,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
 	    icmph->type != ICMP_REDIRECT)
 		return NF_ACCEPT;
 
-	return icmp_error_message(tmpl, skb, state);
+	memset(&outer_daddr, 0, sizeof(outer_daddr));
+	outer_daddr.ip = ip_hdr(skb)->daddr;
+
+	dataoff += sizeof(*icmph);
+	return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+				       IPPROTO_ICMP, &outer_daddr);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index bec4a32116585..c63ee36128555 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -123,51 +123,6 @@ int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
 	return NF_ACCEPT;
 }
 
-static int
-icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
-		     struct sk_buff *skb,
-		     unsigned int icmp6off)
-{
-	struct nf_conntrack_tuple intuple, origtuple;
-	const struct nf_conntrack_tuple_hash *h;
-	enum ip_conntrack_info ctinfo;
-	struct nf_conntrack_zone tmp;
-
-	WARN_ON(skb_nfct(skb));
-
-	/* Are they talking about one of our connections? */
-	if (!nf_ct_get_tuplepr(skb,
-			       skb_network_offset(skb)
-				+ sizeof(struct ipv6hdr)
-				+ sizeof(struct icmp6hdr),
-			       PF_INET6, net, &origtuple)) {
-		pr_debug("icmpv6_error: Can't get tuple\n");
-		return -NF_ACCEPT;
-	}
-
-	/* Ordinarily, we'd expect the inverted tupleproto, but it's
-	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&intuple, &origtuple)) {
-		pr_debug("icmpv6_error: Can't invert tuple\n");
-		return -NF_ACCEPT;
-	}
-
-	ctinfo = IP_CT_RELATED;
-
-	h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
-				  &intuple);
-	if (!h) {
-		pr_debug("icmpv6_error: no match\n");
-		return -NF_ACCEPT;
-	} else {
-		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
-			ctinfo += IP_CT_IS_REPLY;
-	}
-
-	/* Update skb to refer to this connection */
-	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
-	return NF_ACCEPT;
-}
 
 static void icmpv6_error_log(const struct sk_buff *skb,
 			     const struct nf_hook_state *state,
@@ -182,6 +137,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
 			      unsigned int dataoff,
 			      const struct nf_hook_state *state)
 {
+	union nf_inet_addr outer_daddr;
 	const struct icmp6hdr *icmp6h;
 	struct icmp6hdr _ih;
 	int type;
@@ -210,7 +166,11 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
 
-	return icmpv6_error_message(state->net, tmpl, skb, dataoff);
+	memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+	       sizeof(outer_daddr.ip6));
+	dataoff += sizeof(*icmp6h);
+	return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+				       IPPROTO_ICMPV6, &outer_daddr);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-- 
GitLab


From 8176c8332751bf27597488d6e45c9b8f530593bf Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Thu, 28 Mar 2019 10:47:20 +0100
Subject: [PATCH 1063/1861] netfilter: conntrack: initialize ct->timeout

KMSAN started reporting an error when accessing ct->timeout for the
first time without initialization:

 BUG: KMSAN: uninit-value in __nf_ct_refresh_acct+0x1ae/0x470 net/netfilter/nf_conntrack_core.c:1765
 ...
 dump_stack+0x173/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x131/0x2a0 mm/kmsan/kmsan.c:624
 __msan_warning+0x7a/0xf0 mm/kmsan/kmsan_instr.c:310
 __nf_ct_refresh_acct+0x1ae/0x470 net/netfilter/nf_conntrack_core.c:1765
 nf_ct_refresh_acct ./include/net/netfilter/nf_conntrack.h:201
 nf_conntrack_udp_packet+0xb44/0x1040 net/netfilter/nf_conntrack_proto_udp.c:122
 nf_conntrack_handle_packet net/netfilter/nf_conntrack_core.c:1605
 nf_conntrack_in+0x1250/0x26c9 net/netfilter/nf_conntrack_core.c:1696
 ...
 Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:205
 kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:159
 kmsan_kmalloc+0xa9/0x130 mm/kmsan/kmsan_hooks.c:173
 kmem_cache_alloc+0x554/0xb10 mm/slub.c:2789
 __nf_conntrack_alloc+0x16f/0x690 net/netfilter/nf_conntrack_core.c:1342
 init_conntrack+0x6cb/0x2490 net/netfilter/nf_conntrack_core.c:1421

Signed-off-by: Alexander Potapenko <glider@google.com>
Fixes: cc16921351d8ba1 ("netfilter: conntrack: avoid same-timeout update")
Cc: Florian Westphal <fw@strlen.de>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 82bfbeef46afa..a137d4e7f218c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1350,6 +1350,7 @@ __nf_conntrack_alloc(struct net *net,
 	/* save hash for reusing when confirming */
 	*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
 	ct->status = 0;
+	ct->timeout = 0;
 	write_pnet(&ct->ct_net, net);
 	memset(&ct->__nfct_init_offset[0], 0,
 	       offsetof(struct nf_conn, proto) -
-- 
GitLab


From 0261ea1bd1eb0da5c0792a9119b8655cf33c80a3 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 31 Mar 2019 13:24:52 +0300
Subject: [PATCH 1064/1861] ipvs: do not schedule icmp errors from tunnels

We can receive ICMP errors from client or from
tunneling real server. While the former can be
scheduled to real server, the latter should
not be scheduled, they are decapsulated only when
existing connection is found.

Fixes: 6044eeffafbe ("ipvs: attempt to schedule icmp packets")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 43bbaa32b1d65..14457551bcb4e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1678,7 +1678,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
 	if (!cp) {
 		int v;
 
-		if (!sysctl_schedule_icmp(ipvs))
+		if (ipip || !sysctl_schedule_icmp(ipvs))
 			return NF_ACCEPT;
 
 		if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
-- 
GitLab


From 06058632464845abb1af91521122fd04dd3daaec Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 13 Apr 2019 09:26:03 -0600
Subject: [PATCH 1065/1861] io_uring: park SQPOLL thread if it's percpu

kthread expects this, or we can throw a warning on exit:

WARNING: CPU: 0 PID: 7822 at kernel/kthread.c:399
__kthread_bind_mask+0x3b/0xc0 kernel/kthread.c:399
Kernel panic - not syncing: panic_on_warn set ...
CPU: 0 PID: 7822 Comm: syz-executor030 Not tainted 5.1.0-rc4-next-20190412
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x172/0x1f0 lib/dump_stack.c:113
  panic+0x2cb/0x72b kernel/panic.c:214
  __warn.cold+0x20/0x46 kernel/panic.c:576
  report_bug+0x263/0x2b0 lib/bug.c:186
  fixup_bug arch/x86/kernel/traps.c:179 [inline]
  fixup_bug arch/x86/kernel/traps.c:174 [inline]
  do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272
  do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291
  invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973
RIP: 0010:__kthread_bind_mask+0x3b/0xc0 kernel/kthread.c:399
Code: 48 89 fb e8 f7 ab 24 00 4c 89 e6 48 89 df e8 ac e1 02 00 31 ff 49 89
c4 48 89 c6 e8 7f ad 24 00 4d 85 e4 75 15 e8 d5 ab 24 00 <0f> 0b e8 ce ab
24 00 5b 41 5c 41 5d 41 5e 5d c3 e8 c0 ab 24 00 4c
RSP: 0018:ffff8880a89bfbb8 EFLAGS: 00010293
RAX: ffff88808ca7a280 RBX: ffff8880a98e4380 RCX: ffffffff814bdd11
RDX: 0000000000000000 RSI: ffffffff814bdd1b RDI: 0000000000000007
RBP: ffff8880a89bfbd8 R08: ffff88808ca7a280 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: ffffffff87691148 R14: ffff8880a98e43a0 R15: ffffffff81c91e10
  __kthread_bind kernel/kthread.c:412 [inline]
  kthread_unpark+0x123/0x160 kernel/kthread.c:480
  kthread_stop+0xfa/0x6c0 kernel/kthread.c:556
  io_sq_thread_stop fs/io_uring.c:2057 [inline]
  io_sq_thread_stop fs/io_uring.c:2052 [inline]
  io_finish_async+0xab/0x180 fs/io_uring.c:2064
  io_ring_ctx_free fs/io_uring.c:2534 [inline]
  io_ring_ctx_wait_and_kill+0x133/0x510 fs/io_uring.c:2591
  io_uring_release+0x42/0x50 fs/io_uring.c:2599
  __fput+0x2e5/0x8d0 fs/file_table.c:278
  ____fput+0x16/0x20 fs/file_table.c:309
  task_work_run+0x14a/0x1c0 kernel/task_work.c:113
  exit_task_work include/linux/task_work.h:22 [inline]
  do_exit+0x90a/0x2fa0 kernel/exit.c:876
  do_group_exit+0x135/0x370 kernel/exit.c:980
  __do_sys_exit_group kernel/exit.c:991 [inline]
  __se_sys_exit_group kernel/exit.c:989 [inline]
  __x64_sys_exit_group+0x44/0x50 kernel/exit.c:989
  do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

Reported-by: syzbot+6d4a92619eb0ad08602b@syzkaller.appspotmail.com
Fixes: 6c271ce2f1d5 ("io_uring: add submission polling")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 89aa8412b5f59..e5008c1b82be9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1920,6 +1920,10 @@ static int io_sq_thread(void *data)
 		unuse_mm(cur_mm);
 		mmput(cur_mm);
 	}
+
+	if (kthread_should_park())
+		kthread_parkme();
+
 	return 0;
 }
 
@@ -2054,6 +2058,7 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
 	if (ctx->sqo_thread) {
 		ctx->sqo_stop = 1;
 		mb();
+		kthread_park(ctx->sqo_thread);
 		kthread_stop(ctx->sqo_thread);
 		ctx->sqo_thread = NULL;
 	}
-- 
GitLab


From 917257daa0fea7a007102691c0e27d9216a96768 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 13 Apr 2019 09:28:55 -0600
Subject: [PATCH 1066/1861] io_uring: only test SQPOLL cpu after we've verified
 it

We currently call cpu_possible() even if we don't use the CPU. Move the
test under the SQ_AFF branch, which is the only place where we'll use
the value. Do the cpu_possible() test AFTER we've limited it to a max
of NR_CPUS. This avoids triggering the following warning:

WARNING: CPU: 1 PID: 7600 at include/linux/cpumask.h:121 cpu_max_bits_warn

if CONFIG_DEBUG_PER_CPU_MAPS is enabled.

While in there, also move the SQ thread idle period assignment inside
SETUP_SQPOLL, as we don't use it otherwise either.

Reported-by: syzbot+cd714a07c6de2bc34293@syzkaller.appspotmail.com
Fixes: 6c271ce2f1d5 ("io_uring: add submission polling")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e5008c1b82be9..24355e0c47f05 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2241,10 +2241,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 	mmgrab(current->mm);
 	ctx->sqo_mm = current->mm;
 
-	ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
-	if (!ctx->sq_thread_idle)
-		ctx->sq_thread_idle = HZ;
-
 	ret = -EINVAL;
 	if (!cpu_possible(p->sq_thread_cpu))
 		goto err;
@@ -2254,10 +2250,18 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 		if (!capable(CAP_SYS_ADMIN))
 			goto err;
 
+		ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
+		if (!ctx->sq_thread_idle)
+			ctx->sq_thread_idle = HZ;
+
 		if (p->flags & IORING_SETUP_SQ_AFF) {
 			int cpu;
 
 			cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS);
+			ret = -EINVAL;
+			if (!cpu_possible(p->sq_thread_cpu))
+				goto err;
+
 			ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
 							ctx, cpu,
 							"io_uring-sq");
-- 
GitLab


From 77f1e0a52d26242b6c2dba019f6ebebfb9ff701e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 18 Jan 2019 10:34:16 -0700
Subject: [PATCH 1067/1861] bfq: update internal depth state when queue depth
 changes

A previous commit moved the shallow depth and BFQ depth map calculations
to be done at init time, moving it outside of the hotter IO path. This
potentially causes hangs if the users changes the depth of the scheduler
map, by writing to the 'nr_requests' sysfs file for that device.

Add a blk-mq-sched hook that allows blk-mq to inform the scheduler if
the depth changes, so that the scheduler can update its internal state.

Tested-by: Kai Krakow <kai@kaishome.de>
Reported-by: Paolo Valente <paolo.valente@linaro.org>
Fixes: f0635b8a416e ("bfq: calculate shallow depths at init time")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c      | 8 +++++++-
 block/blk-mq.c           | 2 ++
 include/linux/elevator.h | 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index dfb8cb0af13a8..5ba1e0d841b4d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5396,7 +5396,7 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd,
 	return min_shallow;
 }
 
-static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
+static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
 {
 	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
 	struct blk_mq_tags *tags = hctx->sched_tags;
@@ -5404,6 +5404,11 @@ static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
 
 	min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
 	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
+}
+
+static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
+{
+	bfq_depth_updated(hctx);
 	return 0;
 }
 
@@ -5826,6 +5831,7 @@ static struct elevator_type iosched_bfq_mq = {
 		.requests_merged	= bfq_requests_merged,
 		.request_merged		= bfq_request_merged,
 		.has_work		= bfq_has_work,
+		.depth_updated		= bfq_depth_updated,
 		.init_hctx		= bfq_init_hctx,
 		.init_sched		= bfq_init_queue,
 		.exit_sched		= bfq_exit_queue,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9516304a38ee3..fc60ed7e940ea 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3135,6 +3135,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 		}
 		if (ret)
 			break;
+		if (q->elevator && q->elevator->type->ops.depth_updated)
+			q->elevator->type->ops.depth_updated(hctx);
 	}
 
 	if (!ret)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 2e9e2763bf47d..6e8bc53740f05 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -31,6 +31,7 @@ struct elevator_mq_ops {
 	void (*exit_sched)(struct elevator_queue *);
 	int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
 	void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
+	void (*depth_updated)(struct blk_mq_hw_ctx *);
 
 	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
 	bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
-- 
GitLab


From 3d6770fbd9353988839611bab107e4e891506aad Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 13 Apr 2019 11:50:54 -0600
Subject: [PATCH 1068/1861] io_uring: drop io_file_put() 'file' argument

Since the fget/fput handling was reworked in commit 09bb839434bd, we
never call io_file_put() with state == NULL (and hence file != NULL)
anymore. Remove that case.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 24355e0c47f05..f4ddb9d232414 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -682,11 +682,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
 		list_add_tail(&req->list, &ctx->poll_list);
 }
 
-static void io_file_put(struct io_submit_state *state, struct file *file)
+static void io_file_put(struct io_submit_state *state)
 {
-	if (!state) {
-		fput(file);
-	} else if (state->file) {
+	if (state->file) {
 		int diff = state->has_refs - state->used_refs;
 
 		if (diff)
@@ -711,7 +709,7 @@ static struct file *io_file_get(struct io_submit_state *state, int fd)
 			state->ios_left--;
 			return state->file;
 		}
-		io_file_put(state, NULL);
+		io_file_put(state);
 	}
 	state->file = fget_many(fd, state->ios_left);
 	if (!state->file)
@@ -1671,7 +1669,7 @@ out:
 static void io_submit_state_end(struct io_submit_state *state)
 {
 	blk_finish_plug(&state->plug);
-	io_file_put(state, NULL);
+	io_file_put(state);
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs,
 					&state->reqs[state->cur_req]);
-- 
GitLab


From 26536e7c242e2b0f73c25c46fc50d2525ebe400b Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Sat, 13 Apr 2019 13:22:44 -0400
Subject: [PATCH 1069/1861] locking/rwsem: Prevent unneeded warning during
 locking selftest

Disable the DEBUG_RWSEMS check when locking selftest is running with
debug_locks_silent flag set.

Signed-off-by: Waiman Long <longman@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: huang ying <huang.ying.caritas@gmail.com>
Link: http://lkml.kernel.org/r/20190413172259.2740-2-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 37db17890e36a..64877f5294e35 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -30,7 +30,8 @@
 
 #ifdef CONFIG_DEBUG_RWSEMS
 # define DEBUG_RWSEMS_WARN_ON(c, sem)	do {			\
-	if (WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
+	if (!debug_locks_silent &&				\
+	    WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
 		#c, atomic_long_read(&(sem)->count),		\
 		(long)((sem)->owner), (long)current,		\
 		list_empty(&(sem)->wait_list) ? "" : "not "))	\
-- 
GitLab


From 40fba00ffa431c8597ca785ea1cfa4d9f6503390 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Wed, 10 Apr 2019 03:53:49 +0800
Subject: [PATCH 1070/1861] x86/resctrl: Do not repeat rdtgroup mode
 initialization

When cache allocation is supported and the user creates a new resctrl
resource group, the allocations of the new resource group are
initialized to all regions that it can possibly use. At this time these
regions are all that are shareable by other resource groups as well as
regions that are not currently used. The new resource group's mode is
also initialized to reflect this initialization and set to "shareable".

The new resource group's mode is currently repeatedly initialized within
the loop that configures the hardware with the resource group's default
allocations.

Move the initialization of the resource group's mode outside the
hardware configuration loop. The resource group's mode is now
initialized only once as the final step to reflect that its configured
allocations are "shareable".

Fixes: 95f0b77efa57 ("x86/intel_rdt: Initialize new resource group with sane defaults")
Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Reinette Chatre <reinette.chatre@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: pei.p.jia@intel.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1554839629-5448-1-git-send-email-xiaochen.shen@intel.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 54b9eef3eea97..85212a32b54df 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2610,9 +2610,10 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 			rdt_last_cmd_puts("Failed to initialize allocations\n");
 			return ret;
 		}
-		rdtgrp->mode = RDT_MODE_SHAREABLE;
 	}
 
+	rdtgrp->mode = RDT_MODE_SHAREABLE;
+
 	return 0;
 }
 
-- 
GitLab


From f958d7b528b1b40c44cfda5eabe2d82760d868c3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Apr 2019 10:06:20 -0700
Subject: [PATCH 1071/1861] mm: make page ref count overflow check tighter and
 more explicit

We have a VM_BUG_ON() to check that the page reference count doesn't
underflow (or get close to overflow) by checking the sign of the count.

That's all fine, but we actually want to allow people to use a "get page
ref unless it's already very high" helper function, and we want that one
to use the sign of the page ref (without triggering this VM_BUG_ON).

Change the VM_BUG_ON to only check for small underflows (or _very_ close
to overflowing), and ignore overflows which have strayed into negative
territory.

Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb6408fe73a..541d99b86aea9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -965,6 +965,10 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
 }
 #endif /* CONFIG_DEV_PAGEMAP_OPS */
 
+/* 127: arbitrary random number, small enough to assemble well */
+#define page_ref_zero_or_close_to_overflow(page) \
+	((unsigned int) page_ref_count(page) + 127u <= 127u)
+
 static inline void get_page(struct page *page)
 {
 	page = compound_head(page);
@@ -972,7 +976,7 @@ static inline void get_page(struct page *page)
 	 * Getting a normal page or the head of a compound page
 	 * requires to already have an elevated page->_refcount.
 	 */
-	VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
+	VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
 	page_ref_inc(page);
 }
 
-- 
GitLab


From 88b1a17dfc3ed7728316478fae0f5ad508f50397 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Apr 2019 10:14:59 -0700
Subject: [PATCH 1072/1861] mm: add 'try_get_page()' helper function

This is the same as the traditional 'get_page()' function, but instead
of unconditionally incrementing the reference count of the page, it only
does so if the count was "safe".  It returns whether the reference count
was incremented (and is marked __must_check, since the caller obviously
has to be aware of it).

Also like 'get_page()', you can't use this function unless you already
had a reference to the page.  The intent is that you can use this
exactly like get_page(), but in situations where you want to limit the
maximum reference count.

The code currently does an unconditional WARN_ON_ONCE() if we ever hit
the reference count issues (either zero or negative), as a notification
that the conditional non-increment actually happened.

NOTE! The count access for the "safety" check is inherently racy, but
that doesn't matter since the buffer we use is basically half the range
of the reference count (ie we look at the sign of the count).

Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 541d99b86aea9..7000ddd807e01 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -980,6 +980,15 @@ static inline void get_page(struct page *page)
 	page_ref_inc(page);
 }
 
+static inline __must_check bool try_get_page(struct page *page)
+{
+	page = compound_head(page);
+	if (WARN_ON_ONCE(page_ref_count(page) <= 0))
+		return false;
+	page_ref_inc(page);
+	return true;
+}
+
 static inline void put_page(struct page *page)
 {
 	page = compound_head(page);
-- 
GitLab


From 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Apr 2019 10:49:19 -0700
Subject: [PATCH 1073/1861] mm: prevent get_user_pages() from overflowing page
 refcount

If the page refcount wraps around past zero, it will be freed while
there are still four billion references to it.  One of the possible
avenues for an attacker to try to make this happen is by doing direct IO
on a page multiple times.  This patch makes get_user_pages() refuse to
take a new page reference if there are already more than two billion
references to the page.

Reported-by: Jann Horn <jannh@google.com>
Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c     | 48 ++++++++++++++++++++++++++++++++++++------------
 mm/hugetlb.c | 13 +++++++++++++
 2 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 75029649baca4..81e0bdefa2ccb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -157,8 +157,12 @@ retry:
 		goto retry;
 	}
 
-	if (flags & FOLL_GET)
-		get_page(page);
+	if (flags & FOLL_GET) {
+		if (unlikely(!try_get_page(page))) {
+			page = ERR_PTR(-ENOMEM);
+			goto out;
+		}
+	}
 	if (flags & FOLL_TOUCH) {
 		if ((flags & FOLL_WRITE) &&
 		    !pte_dirty(pte) && !PageDirty(page))
@@ -295,7 +299,10 @@ retry_locked:
 			if (pmd_trans_unstable(pmd))
 				ret = -EBUSY;
 		} else {
-			get_page(page);
+			if (unlikely(!try_get_page(page))) {
+				spin_unlock(ptl);
+				return ERR_PTR(-ENOMEM);
+			}
 			spin_unlock(ptl);
 			lock_page(page);
 			ret = split_huge_page(page);
@@ -497,7 +504,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 		if (is_device_public_page(*page))
 			goto unmap;
 	}
-	get_page(*page);
+	if (unlikely(!try_get_page(*page))) {
+		ret = -ENOMEM;
+		goto unmap;
+	}
 out:
 	ret = 0;
 unmap:
@@ -1393,6 +1403,20 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
 	}
 }
 
+/*
+ * Return the compund head page with ref appropriately incremented,
+ * or NULL if that failed.
+ */
+static inline struct page *try_get_compound_head(struct page *page, int refs)
+{
+	struct page *head = compound_head(page);
+	if (WARN_ON_ONCE(page_ref_count(head) < 0))
+		return NULL;
+	if (unlikely(!page_cache_add_speculative(head, refs)))
+		return NULL;
+	return head;
+}
+
 #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 			 int write, struct page **pages, int *nr)
@@ -1427,9 +1451,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
-		head = compound_head(page);
 
-		if (!page_cache_get_speculative(head))
+		head = try_get_compound_head(page, 1);
+		if (!head)
 			goto pte_unmap;
 
 		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
@@ -1568,8 +1592,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	head = compound_head(pmd_page(orig));
-	if (!page_cache_add_speculative(head, refs)) {
+	head = try_get_compound_head(pmd_page(orig), refs);
+	if (!head) {
 		*nr -= refs;
 		return 0;
 	}
@@ -1606,8 +1630,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	head = compound_head(pud_page(orig));
-	if (!page_cache_add_speculative(head, refs)) {
+	head = try_get_compound_head(pud_page(orig), refs);
+	if (!head) {
 		*nr -= refs;
 		return 0;
 	}
@@ -1643,8 +1667,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	head = compound_head(pgd_page(orig));
-	if (!page_cache_add_speculative(head, refs)) {
+	head = try_get_compound_head(pgd_page(orig), refs);
+	if (!head) {
 		*nr -= refs;
 		return 0;
 	}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8dfdffc34a99b..c220315dc5331 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4298,6 +4298,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 		pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
 		page = pte_page(huge_ptep_get(pte));
+
+		/*
+		 * Instead of doing 'try_get_page()' below in the same_page
+		 * loop, just check the count once here.
+		 */
+		if (unlikely(page_count(page) <= 0)) {
+			if (pages) {
+				spin_unlock(ptl);
+				remainder = 0;
+				err = -ENOMEM;
+				break;
+			}
+		}
 same_page:
 		if (pages) {
 			pages[i] = mem_map_offset(page, pfn_offset);
-- 
GitLab


From 15fab63e1e57be9fdb5eec1bbc5916e9825e9acb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 5 Apr 2019 14:02:10 -0700
Subject: [PATCH 1074/1861] fs: prevent page refcount overflow in pipe_buf_get

Change pipe_buf_get() to return a bool indicating whether it succeeded
in raising the refcount of the page (if the thing in the pipe is a page).
This removes another mechanism for overflowing the page refcount.  All
callers converted to handle a failure.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dev.c             | 12 ++++++------
 fs/pipe.c                 |  4 ++--
 fs/splice.c               | 12 ++++++++++--
 include/linux/pipe_fs_i.h | 10 ++++++----
 kernel/trace/trace.c      |  6 +++++-
 5 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 809c0f2f9942e..64f4de9834687 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2034,10 +2034,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 		rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
 
 	ret = -EINVAL;
-	if (rem < len) {
-		pipe_unlock(pipe);
-		goto out;
-	}
+	if (rem < len)
+		goto out_free;
 
 	rem = len;
 	while (rem) {
@@ -2055,7 +2053,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 			pipe->nrbufs--;
 		} else {
-			pipe_buf_get(pipe, ibuf);
+			if (!pipe_buf_get(pipe, ibuf))
+				goto out_free;
+
 			*obuf = *ibuf;
 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 			obuf->len = rem;
@@ -2078,11 +2078,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 	ret = fuse_dev_do_write(fud, &cs, len);
 
 	pipe_lock(pipe);
+out_free:
 	for (idx = 0; idx < nbuf; idx++)
 		pipe_buf_release(pipe, &bufs[idx]);
 	pipe_unlock(pipe);
 
-out:
 	kvfree(bufs);
 	return ret;
 }
diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c0977d0..b1543b85c14a4 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -189,9 +189,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
  *	in the tee() system call, when we duplicate the buffers in one
  *	pipe into another.
  */
-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
-	get_page(buf->page);
+	return try_get_page(buf->page);
 }
 EXPORT_SYMBOL(generic_pipe_buf_get);
 
diff --git a/fs/splice.c b/fs/splice.c
index de2ede048473c..f30af82b850dd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1588,7 +1588,11 @@ retry:
 			 * Get a reference to this pipe buffer,
 			 * so we can copy the contents over.
 			 */
-			pipe_buf_get(ipipe, ibuf);
+			if (!pipe_buf_get(ipipe, ibuf)) {
+				if (ret == 0)
+					ret = -EFAULT;
+				break;
+			}
 			*obuf = *ibuf;
 
 			/*
@@ -1660,7 +1664,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		 * Get a reference to this pipe buffer,
 		 * so we can copy the contents over.
 		 */
-		pipe_buf_get(ipipe, ibuf);
+		if (!pipe_buf_get(ipipe, ibuf)) {
+			if (ret == 0)
+				ret = -EFAULT;
+			break;
+		}
 
 		obuf = opipe->bufs + nbuf;
 		*obuf = *ibuf;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5a3bb3b7c9ad3..3f2a42c11e206 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -108,18 +108,20 @@ struct pipe_buf_operations {
 	/*
 	 * Get a reference to the pipe buffer.
 	 */
-	void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
+	bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
 /**
  * pipe_buf_get - get a reference to a pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to get a reference to
+ *
+ * Return: %true if the reference was successfully obtained.
  */
-static inline void pipe_buf_get(struct pipe_inode_info *pipe,
+static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe,
 				struct pipe_buffer *buf)
 {
-	buf->ops->get(pipe, buf);
+	return buf->ops->get(pipe, buf);
 }
 
 /**
@@ -178,7 +180,7 @@ struct pipe_inode_info *alloc_pipe_info(void);
 void free_pipe_info(struct pipe_inode_info *);
 
 /* Generic pipe buffer ops functions */
-void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
+bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c4238b4416244..0f300d488c9ff 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6835,12 +6835,16 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
 	buf->private = 0;
 }
 
-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 				struct pipe_buffer *buf)
 {
 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
 
+	if (ref->ref > INT_MAX/2)
+		return false;
+
 	ref->ref++;
+	return true;
 }
 
 /* Pipe buffer operations for a buffer. */
-- 
GitLab


From fdc7833964d83b7f7f39a03e2ee48a229ba0291f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:27:55 +0200
Subject: [PATCH 1075/1861] um/stacktrace: Remove the pointless ULONG_MAX
 marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: linux-um@lists.infradead.org
Link: https://lkml.kernel.org/r/20190410103643.662853876@linutronix.de
---
 arch/um/kernel/stacktrace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/um/kernel/stacktrace.c b/arch/um/kernel/stacktrace.c
index ebe7bcf62684c..bd95e020d5091 100644
--- a/arch/um/kernel/stacktrace.c
+++ b/arch/um/kernel/stacktrace.c
@@ -63,8 +63,6 @@ static const struct stacktrace_ops dump_ops = {
 static void __save_stack_trace(struct task_struct *tsk, struct stack_trace *trace)
 {
 	dump_trace(tsk, &dump_ops, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace(struct stack_trace *trace)
-- 
GitLab


From c5c27a0a583844c69a433039e4fd6396ba23551b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:27:56 +0200
Subject: [PATCH 1076/1861] x86/stacktrace: Remove the pointless ULONG_MAX
 marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Link: https://lkml.kernel.org/r/20190410103643.750954603@linutronix.de
---
 arch/x86/kernel/stacktrace.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 5c2d71a1dc069..b2f706f1e0b7a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -46,9 +46,6 @@ static void noinline __save_stack_trace(struct stack_trace *trace,
 		if (!addr || save_stack_address(trace, addr, nosched))
 			break;
 	}
-
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 /*
@@ -97,7 +94,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
 		if (regs) {
 			/* Success path for user tasks */
 			if (user_mode(regs))
-				goto success;
+				return 0;
 
 			/*
 			 * Kernel mode registers on the stack indicate an
@@ -132,10 +129,6 @@ __save_stack_trace_reliable(struct stack_trace *trace,
 	if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
 		return -EINVAL;
 
-success:
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
-
 	return 0;
 }
 
@@ -221,9 +214,6 @@ void save_stack_trace_user(struct stack_trace *trace)
 	/*
 	 * Trace user stack if we are not a kernel thread
 	 */
-	if (current->mm) {
+	if (current->mm)
 		__save_stack_trace_user(trace);
-	}
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
-- 
GitLab


From 2a2bcfa0c94d8bc4770676a6799928036296c037 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:27:57 +0200
Subject: [PATCH 1077/1861] arm/stacktrace: Remove the pointless ULONG_MAX
 marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lkml.kernel.org/r/20190410103643.843075256@linutronix.de
---
 arch/arm/kernel/stacktrace.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index a56e7c856ab56..86870f40f9a07 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -115,8 +115,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
 		 * running on another CPU?  For now, ignore it as we
 		 * can't guarantee we won't explode.
 		 */
-		if (trace->nr_entries < trace->max_entries)
-			trace->entries[trace->nr_entries++] = ULONG_MAX;
 		return;
 #else
 		frame.fp = thread_saved_fp(tsk);
@@ -134,8 +132,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
 	}
 
 	walk_stackframe(&frame, save_trace, &data);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
@@ -153,8 +149,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 	frame.pc = regs->ARM_pc;
 
 	walk_stackframe(&frame, save_trace, &data);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-- 
GitLab


From b01f6d368d296cac099383a3eb200e135420f885 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:27:58 +0200
Subject: [PATCH 1078/1861] sh/stacktrace: Remove the pointless ULONG_MAX
 marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Rich Felker <dalias@libc.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Cc: linux-sh@vger.kernel.org
Cc: Simon Horman <horms+renesas@verge.net.au>
Link: https://lkml.kernel.org/r/20190410103643.932464393@linutronix.de
---
 arch/sh/kernel/stacktrace.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index f3cb2cccb2624..2950b19ad0772 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -49,8 +49,6 @@ void save_stack_trace(struct stack_trace *trace)
 	unsigned long *sp = (unsigned long *)current_stack_pointer;
 
 	unwind_stack(current, NULL, sp,  &save_stack_ops, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
@@ -84,7 +82,5 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	unsigned long *sp = (unsigned long *)tsk->thread.sp;
 
 	unwind_stack(current, NULL, sp,  &save_stack_ops_nosched, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-- 
GitLab


From f8a9a269c28ddd5d741e747ceca753af01c828f2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:27:59 +0200
Subject: [PATCH 1079/1861] unicore32/stacktrace: Remove the pointless
 ULONG_MAX marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Link: https://lkml.kernel.org/r/20190410103644.036077691@linutronix.de
---
 arch/unicore32/kernel/stacktrace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/unicore32/kernel/stacktrace.c b/arch/unicore32/kernel/stacktrace.c
index 9976e767d51c2..e37da8c6837be 100644
--- a/arch/unicore32/kernel/stacktrace.c
+++ b/arch/unicore32/kernel/stacktrace.c
@@ -120,8 +120,6 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	}
 
 	walk_stackframe(&frame, save_trace, &data);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace(struct stack_trace *trace)
-- 
GitLab


From fa9833992d5ff3c0d6e81d708bec363bce2fb54c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:00 +0200
Subject: [PATCH 1080/1861] riscv/stacktrace: Remove the pointless ULONG_MAX
 marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: linux-riscv@lists.infradead.org
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Link: https://lkml.kernel.org/r/20190410103644.131061192@linutronix.de
---
 arch/riscv/kernel/stacktrace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index a4b1d94371a0d..4d403274c2e8d 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -169,8 +169,6 @@ static bool save_trace(unsigned long pc, void *arg)
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
 	walk_stackframe(tsk, NULL, save_trace, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
-- 
GitLab


From 7b2c7b6233497bfab8826ece574bc1c26e97478d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:01 +0200
Subject: [PATCH 1081/1861] arm64/stacktrace: Remove the pointless ULONG_MAX
 marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lkml.kernel.org/r/20190410103644.220247845@linutronix.de
---
 arch/arm64/kernel/stacktrace.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d908b5e9e949c..b00ec7d483d1c 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -140,8 +140,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 #endif
 
 	walk_stackframe(current, &frame, save_trace, &data);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_regs);
 
@@ -172,8 +170,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
 #endif
 
 	walk_stackframe(tsk, &frame, save_trace, &data);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 
 	put_task_stack(tsk);
 }
-- 
GitLab


From 4f3bd6ca310b594df09c8f1e319cda9baf502ec8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:02 +0200
Subject: [PATCH 1082/1861] parisc/stacktrace: Remove the pointless ULONG_MAX
 marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-parisc@vger.kernel.org
Link: https://lkml.kernel.org/r/20190410103644.308534788@linutronix.de
---
 arch/parisc/kernel/stacktrace.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
index ec5835e83a7a7..6f0b9c8d80523 100644
--- a/arch/parisc/kernel/stacktrace.c
+++ b/arch/parisc/kernel/stacktrace.c
@@ -29,22 +29,17 @@ static void dump_trace(struct task_struct *task, struct stack_trace *trace)
 	}
 }
 
-
 /*
  * Save stack-backtrace addresses into a stack_trace buffer.
  */
 void save_stack_trace(struct stack_trace *trace)
 {
 	dump_trace(current, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
 	dump_trace(tsk, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-- 
GitLab


From 6a28b4c2d93b812512d8d2e5179e61a14f578560 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:03 +0200
Subject: [PATCH 1083/1861] s390/stacktrace: Remove the pointless ULONG_MAX
 marker

Terminating the last trace entry with ULONG_MAX is a completely pointless
exercise and none of the consumers can rely on it because it's
inconsistently implemented across architectures. In fact quite some of the
callers remove the entry and adjust stack_trace.nr_entries afterwards.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Link: https://lkml.kernel.org/r/20190410103644.396788431@linutronix.de
---
 arch/s390/kernel/stacktrace.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 460dcfba7d4ec..cc9ed97870683 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -45,8 +45,6 @@ void save_stack_trace(struct stack_trace *trace)
 
 	sp = current_stack_pointer();
 	dump_trace(save_address, trace, NULL, sp);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
@@ -58,8 +56,6 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	if (tsk == current)
 		sp = current_stack_pointer();
 	dump_trace(save_address_nosched, trace, tsk, sp);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
@@ -69,7 +65,5 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 
 	sp = kernel_stack_pointer(regs);
 	dump_trace(save_address, trace, NULL, sp);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_regs);
-- 
GitLab


From 2dfed4565afe263751d2451ad22336ad806c25a6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:04 +0200
Subject: [PATCH 1084/1861] lockdep: Remove the ULONG_MAX stack trace hackery

No architecture terminates the stack trace with ULONG_MAX anymore. Remove
the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190410103644.485737321@linutronix.de
---
 kernel/locking/lockdep.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index e16766ff184b5..2edf9501d9065 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -444,17 +444,6 @@ static int save_trace(struct stack_trace *trace)
 
 	save_stack_trace(trace);
 
-	/*
-	 * Some daft arches put -1 at the end to indicate its a full trace.
-	 *
-	 * <rant> this is buggy anyway, since it takes a whole extra entry so a
-	 * complete trace that maxes out the entries provided will be reported
-	 * as incomplete, friggin useless </rant>
-	 */
-	if (trace->nr_entries != 0 &&
-	    trace->entries[trace->nr_entries-1] == ULONG_MAX)
-		trace->nr_entries--;
-
 	trace->max_entries = trace->nr_entries;
 
 	nr_stack_trace_entries += trace->nr_entries;
-- 
GitLab


From b8ca7ff7731f57b256fcc13a9b7d4913f5282e5c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:05 +0200
Subject: [PATCH 1085/1861] mm/slub: Remove the ULONG_MAX stack trace hackery

No architecture terminates the stack trace with ULONG_MAX anymore. Remove
the cruft.

While at it remove the pointless loop of clearing the stack array
completely. It's sufficient to clear the last entry as the consumers break
out on the first zeroed entry anyway.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Link: https://lkml.kernel.org/r/20190410103644.574058244@linutronix.de
---
 mm/slub.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index d30ede89f4a64..e2ccd12b6faa3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -553,7 +553,6 @@ static void set_track(struct kmem_cache *s, void *object,
 	if (addr) {
 #ifdef CONFIG_STACKTRACE
 		struct stack_trace trace;
-		int i;
 
 		trace.nr_entries = 0;
 		trace.max_entries = TRACK_ADDRS_COUNT;
@@ -563,20 +562,16 @@ static void set_track(struct kmem_cache *s, void *object,
 		save_stack_trace(&trace);
 		metadata_access_disable();
 
-		/* See rant in lockdep.c */
-		if (trace.nr_entries != 0 &&
-		    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
-			trace.nr_entries--;
-
-		for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
-			p->addrs[i] = 0;
+		if (trace.nr_entries < TRACK_ADDRS_COUNT)
+			p->addrs[trace.nr_entries] = 0;
 #endif
 		p->addr = addr;
 		p->cpu = smp_processor_id();
 		p->pid = current->pid;
 		p->when = jiffies;
-	} else
+	} else {
 		memset(p, 0, sizeof(struct track));
+	}
 }
 
 static void init_tracking(struct kmem_cache *s, void *object)
-- 
GitLab


From 4621c9858f05ab08434221e3a15cc8098645ef2a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:06 +0200
Subject: [PATCH 1086/1861] mm/page_owner: Remove the ULONG_MAX stack trace
 hackery

No architecture terminates the stack trace with ULONG_MAX anymore. Remove
the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: linux-mm@kvack.org
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20190410103644.661974663@linutronix.de
---
 mm/page_owner.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mm/page_owner.c b/mm/page_owner.c
index 925b6f44a444a..df277e6bc3c6a 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -148,9 +148,6 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
 	depot_stack_handle_t handle;
 
 	save_stack_trace(&trace);
-	if (trace.nr_entries != 0 &&
-	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
-		trace.nr_entries--;
 
 	/*
 	 * We need to check recursion here because our request to stackdepot
-- 
GitLab


From ead97a49ec3a3cb9b5133acbfed9a49b91ebf37c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:07 +0200
Subject: [PATCH 1087/1861] mm/kasan: Remove the ULONG_MAX stack trace hackery

No architecture terminates the stack trace with ULONG_MAX anymore. Remove
the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: linux-mm@kvack.org
Link: https://lkml.kernel.org/r/20190410103644.750219625@linutronix.de
---
 mm/kasan/common.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 80bbe62b16cd2..38e5f20a775ad 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -74,9 +74,6 @@ static inline depot_stack_handle_t save_stack(gfp_t flags)
 
 	save_stack_trace(&trace);
 	filter_irq_stacks(&trace);
-	if (trace.nr_entries != 0 &&
-	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
-		trace.nr_entries--;
 
 	return depot_save_stack(&trace, flags);
 }
-- 
GitLab


From accddc41b96915ab4e5d37796c6d17d70805999c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:08 +0200
Subject: [PATCH 1088/1861] latency_top: Remove the ULONG_MAX stack trace
 hackery

No architecture terminates the stack trace with ULONG_MAX anymore. The
consumer terminates on the first zero entry or at the number of entries, so
no functional change.

Remove the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Link: https://lkml.kernel.org/r/20190410103644.853527514@linutronix.de
---
 fs/proc/base.c      |  3 +--
 kernel/latencytop.c | 12 ++++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6a803a0b75df4..5569f215fc54c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -489,10 +489,9 @@ static int lstats_show_proc(struct seq_file *m, void *v)
 				   lr->count, lr->time, lr->max);
 			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
 				unsigned long bt = lr->backtrace[q];
+
 				if (!bt)
 					break;
-				if (bt == ULONG_MAX)
-					break;
 				seq_printf(m, " %ps", (void *)bt);
 			}
 			seq_putc(m, '\n');
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 96b4179cee6a7..f5a90ab3c6b94 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -120,8 +120,8 @@ account_global_scheduler_latency(struct task_struct *tsk,
 				break;
 			}
 
-			/* 0 and ULONG_MAX entries mean end of backtrace: */
-			if (record == 0 || record == ULONG_MAX)
+			/* 0 entry marks end of backtrace: */
+			if (!record)
 				break;
 		}
 		if (same) {
@@ -210,8 +210,8 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
 				break;
 			}
 
-			/* 0 and ULONG_MAX entries mean end of backtrace: */
-			if (record == 0 || record == ULONG_MAX)
+			/* 0 entry is end of backtrace */
+			if (!record)
 				break;
 		}
 		if (same) {
@@ -252,10 +252,10 @@ static int lstats_show(struct seq_file *m, void *v)
 				   lr->count, lr->time, lr->max);
 			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
 				unsigned long bt = lr->backtrace[q];
+
 				if (!bt)
 					break;
-				if (bt == ULONG_MAX)
-					break;
+
 				seq_printf(m, " %ps", (void *)bt);
 			}
 			seq_puts(m, "\n");
-- 
GitLab


From fa49e2eac9aa8259e1ea540d1bd301448d5b735d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:09 +0200
Subject: [PATCH 1089/1861] drm: Remove the ULONG_MAX stack trace hackery

No architecture terminates the stack trace with ULONG_MAX anymore. Remove
the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lkml.kernel.org/r/20190410103644.945059666@linutronix.de
---
 drivers/gpu/drm/drm_mm.c                | 3 ---
 drivers/gpu/drm/i915/intel_runtime_pm.c | 4 ----
 2 files changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 2b4f373736c7e..69552777e13a4 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -113,9 +113,6 @@ static noinline void save_stack(struct drm_mm_node *node)
 	};
 
 	save_stack_trace(&trace);
-	if (trace.nr_entries != 0 &&
-	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
-		trace.nr_entries--;
 
 	/* May be called under spinlock, so avoid sleeping */
 	node->stack = depot_save_stack(&trace, GFP_NOWAIT);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index a017a4232c0fa..1f8acbb332c9a 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -67,10 +67,6 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
 	};
 
 	save_stack_trace(&trace);
-	if (trace.nr_entries &&
-	    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
-		trace.nr_entries--;
-
 	return depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
 }
 
-- 
GitLab


From 4285f2fcef8001ead0f1c9315ba50302cab68cda Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 10 Apr 2019 12:28:10 +0200
Subject: [PATCH 1090/1861] tracing: Remove the ULONG_MAX stack trace hackery

No architecture terminates the stack trace with ULONG_MAX anymore. As the
code checks the number of entries stored anyway there is no point in
keeping all that ULONG_MAX magic around.

The histogram code zeroes the storage before saving the stack, so if the
trace is shorter than the maximum number of entries it can terminate the
print loop if a zero entry is detected.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Link: https://lkml.kernel.org/r/20190410103645.048761764@linutronix.de
---
 kernel/trace/trace_events_hist.c |  2 +-
 kernel/trace/trace_stack.c       | 20 +++++---------------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 795aa20383773..21ceae299f7eb 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5246,7 +5246,7 @@ static void hist_trigger_stacktrace_print(struct seq_file *m,
 	unsigned int i;
 
 	for (i = 0; i < max_entries; i++) {
-		if (stacktrace_entries[i] == ULONG_MAX)
+		if (!stacktrace_entries[i])
 			return;
 
 		seq_printf(m, "%*c", 1 + spaces, ' ');
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index eec648a0d673b..c6e54ff25caea 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -18,8 +18,7 @@
 
 #include "trace.h"
 
-static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
-	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES + 1];
 unsigned stack_trace_index[STACK_TRACE_ENTRIES];
 
 /*
@@ -52,10 +51,7 @@ void stack_trace_print(void)
 			   stack_trace_max.nr_entries);
 
 	for (i = 0; i < stack_trace_max.nr_entries; i++) {
-		if (stack_dump_trace[i] == ULONG_MAX)
-			break;
-		if (i+1 == stack_trace_max.nr_entries ||
-				stack_dump_trace[i+1] == ULONG_MAX)
+		if (i + 1 == stack_trace_max.nr_entries)
 			size = stack_trace_index[i];
 		else
 			size = stack_trace_index[i] - stack_trace_index[i+1];
@@ -150,8 +146,6 @@ check_stack(unsigned long ip, unsigned long *stack)
 		p = start;
 
 		for (; p < top && i < stack_trace_max.nr_entries; p++) {
-			if (stack_dump_trace[i] == ULONG_MAX)
-				break;
 			/*
 			 * The READ_ONCE_NOCHECK is used to let KASAN know that
 			 * this is not a stack-out-of-bounds error.
@@ -183,8 +177,6 @@ check_stack(unsigned long ip, unsigned long *stack)
 	}
 
 	stack_trace_max.nr_entries = x;
-	for (; x < i; x++)
-		stack_dump_trace[x] = ULONG_MAX;
 
 	if (task_stack_end_corrupted(current)) {
 		stack_trace_print();
@@ -286,7 +278,7 @@ __next(struct seq_file *m, loff_t *pos)
 {
 	long n = *pos - 1;
 
-	if (n >= stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+	if (n >= stack_trace_max.nr_entries)
 		return NULL;
 
 	m->private = (void *)n;
@@ -360,12 +352,10 @@ static int t_show(struct seq_file *m, void *v)
 
 	i = *(long *)v;
 
-	if (i >= stack_trace_max.nr_entries ||
-	    stack_dump_trace[i] == ULONG_MAX)
+	if (i >= stack_trace_max.nr_entries)
 		return 0;
 
-	if (i+1 == stack_trace_max.nr_entries ||
-	    stack_dump_trace[i+1] == ULONG_MAX)
+	if (i + 1 == stack_trace_max.nr_entries)
 		size = stack_trace_index[i];
 	else
 		size = stack_trace_index[i] - stack_trace_index[i+1];
-- 
GitLab


From c543cb4a5f07e09237ec0fc2c60c9f131b2c79ad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 13 Apr 2019 17:32:21 -0700
Subject: [PATCH 1091/1861] ipv4: ensure rcu_read_lock() in ipv4_link_failure()

fib_compute_spec_dst() needs to be called under rcu protection.

syzbot reported :

WARNING: suspicious RCU usage
5.1.0-rc4+ #165 Not tainted
include/linux/inetdevice.h:220 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
1 lock held by swapper/0/0:
 #0: 0000000051b67925 ((&n->timer)){+.-.}, at: lockdep_copy_map include/linux/lockdep.h:170 [inline]
 #0: 0000000051b67925 ((&n->timer)){+.-.}, at: call_timer_fn+0xda/0x720 kernel/time/timer.c:1315

stack backtrace:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.1.0-rc4+ #165
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 lockdep_rcu_suspicious+0x153/0x15d kernel/locking/lockdep.c:5162
 __in_dev_get_rcu include/linux/inetdevice.h:220 [inline]
 fib_compute_spec_dst+0xbbd/0x1030 net/ipv4/fib_frontend.c:294
 spec_dst_fill net/ipv4/ip_options.c:245 [inline]
 __ip_options_compile+0x15a7/0x1a10 net/ipv4/ip_options.c:343
 ipv4_link_failure+0x172/0x400 net/ipv4/route.c:1195
 dst_link_failure include/net/dst.h:427 [inline]
 arp_error_report+0xd1/0x1c0 net/ipv4/arp.c:297
 neigh_invalidate+0x24b/0x570 net/core/neighbour.c:995
 neigh_timer_handler+0xc35/0xf30 net/core/neighbour.c:1081
 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325
 expire_timers kernel/time/timer.c:1362 [inline]
 __run_timers kernel/time/timer.c:1681 [inline]
 __run_timers kernel/time/timer.c:1649 [inline]
 run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694
 __do_softirq+0x266/0x95a kernel/softirq.c:293
 invoke_softirq kernel/softirq.c:374 [inline]
 irq_exit+0x180/0x1d0 kernel/softirq.c:414
 exiting_irq arch/x86/include/asm/apic.h:536 [inline]
 smp_apic_timer_interrupt+0x14a/0x570 arch/x86/kernel/apic/apic.c:1062
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807

Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Stephen Suryaputra <ssuryaextr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0206789bc2b73..88ce038dd495d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1185,14 +1185,20 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 
 static void ipv4_link_failure(struct sk_buff *skb)
 {
-	struct rtable *rt;
 	struct ip_options opt;
+	struct rtable *rt;
+	int res;
 
 	/* Recompile ip options since IPCB may not be valid anymore.
 	 */
 	memset(&opt, 0, sizeof(opt));
 	opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
-	if (__ip_options_compile(dev_net(skb->dev), &opt, skb, NULL))
+
+	rcu_read_lock();
+	res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+	rcu_read_unlock();
+
+	if (res)
 		return;
 
 	__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
-- 
GitLab


From 9ac6bb1414ac0d45fe9cefbd1f5b06f0e1a3c98a Mon Sep 17 00:00:00 2001
From: Denis Bolotin <dbolotin@marvell.com>
Date: Sun, 14 Apr 2019 17:23:05 +0300
Subject: [PATCH 1092/1861] qed: Delete redundant doorbell recovery types

DB_REC_DRY_RUN (running doorbell recovery without sending doorbells) is
never used. DB_REC_ONCE (send a single doorbell from the doorbell recovery)
is not needed anymore because by running the periodic handler we make sure
we check the overflow status later instead.
This patch is needed because in the next patches, the only doorbell
recovery type being used is DB_REC_REAL_DEAL, and the fixes are much
cleaner without this enum.

Signed-off-by: Denis Bolotin <dbolotin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h     |  3 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 69 +++++++++--------------
 drivers/net/ethernet/qlogic/qed/qed_int.c |  6 +-
 drivers/net/ethernet/qlogic/qed/qed_int.h |  4 +-
 4 files changed, 31 insertions(+), 51 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 43a57ec296fd9..1514ec93b989c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -920,8 +920,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
 
 /* doorbell recovery mechanism */
 void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
-void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
-			     enum qed_db_rec_exec db_exec);
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
 bool qed_edpm_enabled(struct qed_hwfn *p_hwfn);
 
 /* Other Linux specific common definitions */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 9df8c4b3b54e3..da9df81d651d3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -300,26 +300,19 @@ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn)
 
 /* Ring the doorbell of a single doorbell recovery entry */
 static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
-				 struct qed_db_recovery_entry *db_entry,
-				 enum qed_db_rec_exec db_exec)
-{
-	if (db_exec != DB_REC_ONCE) {
-		/* Print according to width */
-		if (db_entry->db_width == DB_REC_WIDTH_32B) {
-			DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
-				   "%s doorbell address %p data %x\n",
-				   db_exec == DB_REC_DRY_RUN ?
-				   "would have rung" : "ringing",
-				   db_entry->db_addr,
-				   *(u32 *)db_entry->db_data);
-		} else {
-			DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
-				   "%s doorbell address %p data %llx\n",
-				   db_exec == DB_REC_DRY_RUN ?
-				   "would have rung" : "ringing",
-				   db_entry->db_addr,
-				   *(u64 *)(db_entry->db_data));
-		}
+				 struct qed_db_recovery_entry *db_entry)
+{
+	/* Print according to width */
+	if (db_entry->db_width == DB_REC_WIDTH_32B) {
+		DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+			   "ringing doorbell address %p data %x\n",
+			   db_entry->db_addr,
+			   *(u32 *)db_entry->db_data);
+	} else {
+		DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+			   "ringing doorbell address %p data %llx\n",
+			   db_entry->db_addr,
+			   *(u64 *)(db_entry->db_data));
 	}
 
 	/* Sanity */
@@ -334,14 +327,12 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
 	wmb();
 
 	/* Ring the doorbell */
-	if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) {
-		if (db_entry->db_width == DB_REC_WIDTH_32B)
-			DIRECT_REG_WR(db_entry->db_addr,
-				      *(u32 *)(db_entry->db_data));
-		else
-			DIRECT_REG_WR64(db_entry->db_addr,
-					*(u64 *)(db_entry->db_data));
-	}
+	if (db_entry->db_width == DB_REC_WIDTH_32B)
+		DIRECT_REG_WR(db_entry->db_addr,
+			      *(u32 *)(db_entry->db_data));
+	else
+		DIRECT_REG_WR64(db_entry->db_addr,
+				*(u64 *)(db_entry->db_data));
 
 	/* Flush the write combined buffer. Next doorbell may come from a
 	 * different entity to the same address...
@@ -350,29 +341,21 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
 }
 
 /* Traverse the doorbell recovery entry list and ring all the doorbells */
-void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
-			     enum qed_db_rec_exec db_exec)
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn)
 {
 	struct qed_db_recovery_entry *db_entry = NULL;
 
-	if (db_exec != DB_REC_ONCE) {
-		DP_NOTICE(p_hwfn,
-			  "Executing doorbell recovery. Counter was %d\n",
-			  p_hwfn->db_recovery_info.db_recovery_counter);
+	DP_NOTICE(p_hwfn, "Executing doorbell recovery. Counter was %d\n",
+		  p_hwfn->db_recovery_info.db_recovery_counter);
 
-		/* Track amount of times recovery was executed */
-		p_hwfn->db_recovery_info.db_recovery_counter++;
-	}
+	/* Track amount of times recovery was executed */
+	p_hwfn->db_recovery_info.db_recovery_counter++;
 
 	/* Protect the list */
 	spin_lock_bh(&p_hwfn->db_recovery_info.lock);
 	list_for_each_entry(db_entry,
-			    &p_hwfn->db_recovery_info.list, list_entry) {
-		qed_db_recovery_ring(p_hwfn, db_entry, db_exec);
-		if (db_exec == DB_REC_ONCE)
-			break;
-	}
-
+			    &p_hwfn->db_recovery_info.list, list_entry)
+		qed_db_recovery_ring(p_hwfn, db_entry);
 	spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index e23980e301b6a..3546e253c75f3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -411,10 +411,8 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 	overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
 	DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow);
-	if (!overflow) {
-		qed_db_recovery_execute(p_hwfn, DB_REC_ONCE);
+	if (!overflow)
 		return 0;
-	}
 
 	if (qed_edpm_enabled(p_hwfn)) {
 		rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
@@ -429,7 +427,7 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
 
 	/* Repeat all last doorbells (doorbell drop recovery) */
-	qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL);
+	qed_db_recovery_execute(p_hwfn);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 1f356ed4f761e..d473b522afc51 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -192,8 +192,8 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev);
 
 /**
  * @brief - Doorbell Recovery handler.
- *          Run DB_REAL_DEAL doorbell recovery in case of PF overflow
- *          (and flush DORQ if needed), otherwise run DB_REC_ONCE.
+ *          Run doorbell recovery in case of PF overflow (and flush DORQ if
+ *          needed).
  *
  * @param p_hwfn
  * @param p_ptt
-- 
GitLab


From b61b04ad81d5f975349d66abbecabf96ba211140 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <dbolotin@marvell.com>
Date: Sun, 14 Apr 2019 17:23:06 +0300
Subject: [PATCH 1093/1861] qed: Fix the doorbell address sanity check

Fix the condition which verifies that doorbell address is inside the
doorbell bar by checking that the end of the address is within range
as well.

Signed-off-by: Denis Bolotin <dbolotin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index da9df81d651d3..866cdc86a3f27 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -102,11 +102,15 @@ static void qed_db_recovery_dp_entry(struct qed_hwfn *p_hwfn,
 
 /* Doorbell address sanity (address within doorbell bar range) */
 static bool qed_db_rec_sanity(struct qed_dev *cdev,
-			      void __iomem *db_addr, void *db_data)
+			      void __iomem *db_addr,
+			      enum qed_db_rec_width db_width,
+			      void *db_data)
 {
+	u32 width = (db_width == DB_REC_WIDTH_32B) ? 32 : 64;
+
 	/* Make sure doorbell address is within the doorbell bar */
 	if (db_addr < cdev->doorbells ||
-	    (u8 __iomem *)db_addr >
+	    (u8 __iomem *)db_addr + width >
 	    (u8 __iomem *)cdev->doorbells + cdev->db_size) {
 		WARN(true,
 		     "Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n",
@@ -159,7 +163,7 @@ int qed_db_recovery_add(struct qed_dev *cdev,
 	}
 
 	/* Sanitize doorbell address */
-	if (!qed_db_rec_sanity(cdev, db_addr, db_data))
+	if (!qed_db_rec_sanity(cdev, db_addr, db_width, db_data))
 		return -EINVAL;
 
 	/* Obtain hwfn from doorbell address */
@@ -205,10 +209,6 @@ int qed_db_recovery_del(struct qed_dev *cdev,
 		return 0;
 	}
 
-	/* Sanitize doorbell address */
-	if (!qed_db_rec_sanity(cdev, db_addr, db_data))
-		return -EINVAL;
-
 	/* Obtain hwfn from doorbell address */
 	p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr);
 
@@ -317,7 +317,7 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
 
 	/* Sanity */
 	if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr,
-			       db_entry->db_data))
+			       db_entry->db_width, db_entry->db_data))
 		return;
 
 	/* Flush the write combined buffer. Since there are multiple doorbelling
-- 
GitLab


From d4476b8a6151b2dd86c09b5acec64f66430db55d Mon Sep 17 00:00:00 2001
From: Denis Bolotin <dbolotin@marvell.com>
Date: Sun, 14 Apr 2019 17:23:07 +0300
Subject: [PATCH 1094/1861] qed: Fix missing DORQ attentions

When the DORQ (doorbell block) is overflowed, all PFs get attentions at the
same time. If one PF finished handling the attention before another PF even
started, the second PF might miss the DORQ's attention bit and not handle
the attention at all.
If the DORQ attention is missed and the issue is not resolved, another
attention will not be sent, therefore each attention is treated as a
potential DORQ attention.
As a result, the attention callback is called more frequently so the debug
print was moved to reduce its quantity.
The number of periodic doorbell recovery handler schedules was reduced
because it was the previous way to mitigating the missed attention issue.

Signed-off-by: Denis Bolotin <dbolotin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h      |  1 +
 drivers/net/ethernet/qlogic/qed/qed_int.c  | 20 ++++++++++++++++++--
 drivers/net/ethernet/qlogic/qed/qed_main.c |  2 +-
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 1514ec93b989c..fcc2d745c3759 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -436,6 +436,7 @@ struct qed_db_recovery_info {
 
 	/* Lock to protect the doorbell recovery mechanism list */
 	spinlock_t lock;
+	bool dorq_attn;
 	u32 db_recovery_counter;
 };
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 3546e253c75f3..fe3286fc956b5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -438,17 +438,19 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
 	struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
 	int rc;
 
-	int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
-	DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
+	p_hwfn->db_recovery_info.dorq_attn = true;
 
 	/* int_sts may be zero since all PFs were interrupted for doorbell
 	 * overflow but another one already handled it. Can abort here. If
 	 * This PF also requires overflow recovery we will be interrupted again.
 	 * The masked almost full indication may also be set. Ignoring.
 	 */
+	int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
 	if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
 		return 0;
 
+	DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
+
 	/* check if db_drop or overflow happened */
 	if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
 		       DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
@@ -505,6 +507,17 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
 	return -EINVAL;
 }
 
+static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn)
+{
+	if (p_hwfn->db_recovery_info.dorq_attn)
+		goto out;
+
+	/* Call DORQ callback if the attention was missed */
+	qed_dorq_attn_cb(p_hwfn);
+out:
+	p_hwfn->db_recovery_info.dorq_attn = false;
+}
+
 /* Instead of major changes to the data-structure, we have a some 'special'
  * identifiers for sources that changed meaning between adapters.
  */
@@ -1078,6 +1091,9 @@ static int qed_int_deassertion(struct qed_hwfn  *p_hwfn,
 		}
 	}
 
+	/* Handle missed DORQ attention */
+	qed_dorq_attn_handler(p_hwfn);
+
 	/* Clear IGU indication for the deasserted bits */
 	DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview +
 				    GTT_BAR0_MAP_REG_IGU_CMD +
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index f164d4acebcb4..6de23b56b2945 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -970,7 +970,7 @@ static void qed_update_pf_params(struct qed_dev *cdev,
 	}
 }
 
-#define QED_PERIODIC_DB_REC_COUNT		100
+#define QED_PERIODIC_DB_REC_COUNT		10
 #define QED_PERIODIC_DB_REC_INTERVAL_MS		100
 #define QED_PERIODIC_DB_REC_INTERVAL \
 	msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS)
-- 
GitLab


From 0d72c2ac89185f179da1e8a91c40c82f3fa38f0b Mon Sep 17 00:00:00 2001
From: Denis Bolotin <dbolotin@marvell.com>
Date: Sun, 14 Apr 2019 17:23:08 +0300
Subject: [PATCH 1095/1861] qed: Fix the DORQ's attentions handling

Separate the overflow handling from the hardware interrupt status analysis.
The interrupt status is a single register and is common for all PFs. The
first PF reading the register is not necessarily the one who overflowed.
All PFs must check their overflow status on every attention.
In this change we clear the sticky indication in the attention handler to
allow doorbells to be processed again as soon as possible, but running
the doorbell recovery is scheduled for the periodic handler to reduce the
time spent in the attention handler.
Checking the need for DORQ flush was changed to "db_bar_no_edpm" because
qed_edpm_enabled()'s result could change dynamically and might have
prevented a needed flush.

Signed-off-by: Denis Bolotin <dbolotin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h     |  3 ++
 drivers/net/ethernet/qlogic/qed/qed_int.c | 61 +++++++++++++++++------
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index fcc2d745c3759..127c89b22ef0d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -431,6 +431,8 @@ struct qed_qm_info {
 	u8 num_pf_rls;
 };
 
+#define QED_OVERFLOW_BIT	1
+
 struct qed_db_recovery_info {
 	struct list_head list;
 
@@ -438,6 +440,7 @@ struct qed_db_recovery_info {
 	spinlock_t lock;
 	bool dorq_attn;
 	u32 db_recovery_counter;
+	unsigned long overflow;
 };
 
 struct storm_stats {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index fe3286fc956b5..8848d5bed6e5c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -378,6 +378,9 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
 	u32 count = QED_DB_REC_COUNT;
 	u32 usage = 1;
 
+	/* Flush any pending (e)dpms as they may never arrive */
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
+
 	/* wait for usage to zero or count to run out. This is necessary since
 	 * EDPM doorbell transactions can take multiple 64b cycles, and as such
 	 * can "split" over the pci. Possibly, the doorbell drop can happen with
@@ -406,23 +409,24 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
 
 int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-	u32 overflow;
+	u32 attn_ovfl, cur_ovfl;
 	int rc;
 
-	overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
-	DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow);
-	if (!overflow)
+	attn_ovfl = test_and_clear_bit(QED_OVERFLOW_BIT,
+				       &p_hwfn->db_recovery_info.overflow);
+	cur_ovfl = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+	if (!cur_ovfl && !attn_ovfl)
 		return 0;
 
-	if (qed_edpm_enabled(p_hwfn)) {
+	DP_NOTICE(p_hwfn, "PF Overflow sticky: attn %u current %u\n",
+		  attn_ovfl, cur_ovfl);
+
+	if (cur_ovfl && !p_hwfn->db_bar_no_edpm) {
 		rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
 		if (rc)
 			return rc;
 	}
 
-	/* Flush any pending (e)dpm as they may never arrive */
-	qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
-
 	/* Release overflow sticky indication (stop silently dropping everything) */
 	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
 
@@ -432,13 +436,35 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return 0;
 }
 
-static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
+static void qed_dorq_attn_overflow(struct qed_hwfn *p_hwfn)
 {
-	u32 int_sts, first_drop_reason, details, address, all_drops_reason;
 	struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+	u32 overflow;
 	int rc;
 
-	p_hwfn->db_recovery_info.dorq_attn = true;
+	overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+	if (!overflow)
+		goto out;
+
+	/* Run PF doorbell recovery in next periodic handler */
+	set_bit(QED_OVERFLOW_BIT, &p_hwfn->db_recovery_info.overflow);
+
+	if (!p_hwfn->db_bar_no_edpm) {
+		rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
+		if (rc)
+			goto out;
+	}
+
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
+out:
+	/* Schedule the handler even if overflow was not detected */
+	qed_periodic_db_rec_start(p_hwfn);
+}
+
+static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
+{
+	u32 int_sts, first_drop_reason, details, address, all_drops_reason;
+	struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
 
 	/* int_sts may be zero since all PFs were interrupted for doorbell
 	 * overflow but another one already handled it. Can abort here. If
@@ -477,11 +503,6 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
 			  GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
 			  first_drop_reason, all_drops_reason);
 
-		rc = qed_db_rec_handler(p_hwfn, p_ptt);
-		qed_periodic_db_rec_start(p_hwfn);
-		if (rc)
-			return rc;
-
 		/* Clear the doorbell drop details and prepare for next drop */
 		qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
 
@@ -507,6 +528,14 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
 	return -EINVAL;
 }
 
+static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
+{
+	p_hwfn->db_recovery_info.dorq_attn = true;
+	qed_dorq_attn_overflow(p_hwfn);
+
+	return qed_dorq_attn_int_sts(p_hwfn);
+}
+
 static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn)
 {
 	if (p_hwfn->db_recovery_info.dorq_attn)
-- 
GitLab


From 2f5fb19341883bb6e37da351bc3700489d8506a7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 19:51:06 +0200
Subject: [PATCH 1096/1861] x86/speculation: Prevent deadlock on
 ssb_state::lock

Mikhail reported a lockdep splat related to the AMD specific ssb_state
lock:

  CPU0                       CPU1
  lock(&st->lock);
                             local_irq_disable();
                             lock(&(&sighand->siglock)->rlock);
                             lock(&st->lock);
  <Interrupt>
     lock(&(&sighand->siglock)->rlock);

  *** DEADLOCK ***

The connection between sighand->siglock and st->lock comes through seccomp,
which takes st->lock while holding sighand->siglock.

Make sure interrupts are disabled when __speculation_ctrl_update() is
invoked via prctl() -> speculation_ctrl_update(). Add a lockdep assert to
catch future offenders.

Fixes: 1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD")
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1904141948200.4917@nanos.tec.linutronix.de
---
 arch/x86/kernel/process.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58ac7be52c7a6..957eae13b3700 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -426,6 +426,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
 	u64 msr = x86_spec_ctrl_base;
 	bool updmsr = false;
 
+	lockdep_assert_irqs_disabled();
+
 	/*
 	 * If TIF_SSBD is different, select the proper mitigation
 	 * method. Note that if SSBD mitigation is disabled or permanentely
@@ -477,10 +479,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
 
 void speculation_ctrl_update(unsigned long tif)
 {
+	unsigned long flags;
+
 	/* Forced update. Make sure all relevant TIF flags are different */
-	preempt_disable();
+	local_irq_save(flags);
 	__speculation_ctrl_update(~tif, tif);
-	preempt_enable();
+	local_irq_restore(flags);
 }
 
 /* Called from seccomp/prctl update */
-- 
GitLab


From 69f23a09daf9790acb801aaef4bc7aea6f69eec1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 14 Apr 2019 11:02:05 -0700
Subject: [PATCH 1097/1861] rtnetlink: fix rtnl_valid_stats_req() nlmsg_len
 check

Jakub forgot to either use nlmsg_len() or nlmsg_msg_size(),
allowing KMSAN to detect a possible uninit-value in rtnl_stats_get

BUG: KMSAN: uninit-value in rtnl_stats_get+0x6d9/0x11d0 net/core/rtnetlink.c:4997
CPU: 0 PID: 10428 Comm: syz-executor034 Not tainted 5.1.0-rc2+ #24
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x173/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x131/0x2a0 mm/kmsan/kmsan.c:619
 __msan_warning+0x7a/0xf0 mm/kmsan/kmsan_instr.c:310
 rtnl_stats_get+0x6d9/0x11d0 net/core/rtnetlink.c:4997
 rtnetlink_rcv_msg+0x115b/0x1550 net/core/rtnetlink.c:5192
 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2485
 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5210
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1925
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg net/socket.c:632 [inline]
 ___sys_sendmsg+0xdb3/0x1220 net/socket.c:2137
 __sys_sendmsg net/socket.c:2175 [inline]
 __do_sys_sendmsg net/socket.c:2184 [inline]
 __se_sys_sendmsg+0x305/0x460 net/socket.c:2182
 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2182
 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
 entry_SYSCALL_64_after_hwframe+0x63/0xe7

Fixes: 51bc860d4a99 ("rtnetlink: stats: validate attributes in get as well as dumps")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a51cab95ba64c..220c56e936592 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4948,7 +4948,7 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
 {
 	struct if_stats_msg *ifsm;
 
-	if (nlh->nlmsg_len < sizeof(*ifsm)) {
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) {
 		NL_SET_ERR_MSG(extack, "Invalid header for stats dump");
 		return -EINVAL;
 	}
-- 
GitLab


From dc4060a5dc2557e6b5aa813bf5b73677299d62d2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 14 Apr 2019 15:17:41 -0700
Subject: [PATCH 1098/1861] Linux 5.1-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 15c8251d4d5ed..71fd5c2ce0675 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
-- 
GitLab


From c238bfe0be9ef7420f7669a69e27c8c8f4d8a568 Mon Sep 17 00:00:00 2001
From: David Francis <David.Francis@amd.com>
Date: Fri, 29 Mar 2019 13:23:15 -0400
Subject: [PATCH 1099/1861] drm/amd/display: If one stream full updates, full
 update all planes

[Why]
On some compositors, with two monitors attached, VT terminal
switch can cause a graphical issue by the following means:

There are two streams, one for each monitor. Each stream has one
plane

current state:
	M1:S1->P1
	M2:S2->P2

The user calls for a terminal switch and a commit is made to
change both planes to linear swizzle mode. In atomic check,
a new dc_state is constructed with new planes on each stream

new state:
	M1:S1->P3
	M2:S2->P4

In commit tail, each stream is committed, one at a time. The first
stream (S1) updates properly, triggerring a full update and replacing
the state

current state:
	M1:S1->P3
	M2:S2->P4

The update for S2 comes in, but dc detects that there is no difference
between the stream and plane in the new and current states, and so
triggers a fast update. The fast update does not program swizzle,
so the second monitor is corrupted

[How]
Add a flag to dc_plane_state that forces full updates

When a stream undergoes a full update, set this flag on all changed
planes, then clear it on the current stream

Subsequent streams will get full updates as a result

Signed-off-by: David Francis <David.Francis@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Roman Li <Roman.Li@amd.com>
Acked-by: Bhawanpreet Lakha <Bhawanpreet Lakha@amd.com>
Acked-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 19 +++++++++++++++++++
 drivers/gpu/drm/amd/display/dc/dc.h      |  3 +++
 2 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index c68fbd55db3ca..a6cda201c964c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1377,6 +1377,11 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
 		return UPDATE_TYPE_FULL;
 	}
 
+	if (u->surface->force_full_update) {
+		update_flags->bits.full_update = 1;
+		return UPDATE_TYPE_FULL;
+	}
+
 	type = get_plane_info_update_type(u);
 	elevate_update_type(&overall_type, type);
 
@@ -1802,6 +1807,14 @@ void dc_commit_updates_for_stream(struct dc *dc,
 		}
 
 		dc_resource_state_copy_construct(state, context);
+
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+			struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+			if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state)
+				new_pipe->plane_state->force_full_update = true;
+		}
 	}
 
 
@@ -1838,6 +1851,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
 		dc->current_state = context;
 		dc_release_state(old);
 
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+			if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
+				pipe_ctx->plane_state->force_full_update = false;
+		}
 	}
 	/*let's use current_state to update watermark etc*/
 	if (update_type >= UPDATE_TYPE_FULL)
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 1a7fd6aa77ebb..0515095574e73 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -503,6 +503,9 @@ struct dc_plane_state {
 	struct dc_plane_status status;
 	struct dc_context *ctx;
 
+	/* HACK: Workaround for forcing full reprogramming under some conditions */
+	bool force_full_update;
+
 	/* private to dc_surface.c */
 	enum dc_irq_source irq_source;
 	struct kref refcount;
-- 
GitLab


From 3c79107631db1f7fd32cf3f7368e4672004a3010 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 1 Apr 2019 13:08:54 +0200
Subject: [PATCH 1100/1861] netfilter: ctnetlink: don't use conntrack/expect
 object addresses as id

else, we leak the addresses to userspace via ctnetlink events
and dumps.

Compute an ID on demand based on the immutable parts of nf_conn struct.

Another advantage compared to using an address is that there is no
immediate re-use of the same ID in case the conntrack entry is freed and
reallocated again immediately.

Fixes: 3583240249ef ("[NETFILTER]: nf_conntrack_expect: kill unique ID")
Fixes: 7f85f914721f ("[NETFILTER]: nf_conntrack: kill unique ID")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |  2 ++
 net/netfilter/nf_conntrack_core.c    | 35 ++++++++++++++++++++++++++++
 net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++++++++----
 3 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5ee7b30b49172..d2bc733a2ef1e 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -316,6 +316,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
 				 gfp_t flags);
 void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
+u32 nf_ct_get_id(const struct nf_conn *ct);
+
 static inline void
 nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
 {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a137d4e7f218c..3c48d44d6fff9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/jhash.h>
+#include <linux/siphash.h>
 #include <linux/err.h>
 #include <linux/percpu.h>
 #include <linux/moduleparam.h>
@@ -449,6 +450,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 }
 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
 
+/* Generate a almost-unique pseudo-id for a given conntrack.
+ *
+ * intentionally doesn't re-use any of the seeds used for hash
+ * table location, we assume id gets exposed to userspace.
+ *
+ * Following nf_conn items do not change throughout lifetime
+ * of the nf_conn after it has been committed to main hash table:
+ *
+ * 1. nf_conn address
+ * 2. nf_conn->ext address
+ * 3. nf_conn->master address (normally NULL)
+ * 4. tuple
+ * 5. the associated net namespace
+ */
+u32 nf_ct_get_id(const struct nf_conn *ct)
+{
+	static __read_mostly siphash_key_t ct_id_seed;
+	unsigned long a, b, c, d;
+
+	net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+
+	a = (unsigned long)ct;
+	b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
+	c = (unsigned long)ct->ext;
+	d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
+				   &ct_id_seed);
+#ifdef CONFIG_64BIT
+	return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+#else
+	return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_id);
+
 static void
 clean_from_lists(struct nf_conn *ct)
 {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 66c596d287a5d..d7f61b0547c65 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -29,6 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/siphash.h>
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
@@ -485,7 +486,9 @@ nla_put_failure:
 
 static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
+	__be32 id = (__force __be32)nf_ct_get_id(ct);
+
+	if (nla_put_be32(skb, CTA_ID, id))
 		goto nla_put_failure;
 	return 0;
 
@@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
 	}
 
 	if (cda[CTA_ID]) {
-		u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
-		if (id != (u32)(unsigned long)ct) {
+		__be32 id = nla_get_be32(cda[CTA_ID]);
+
+		if (id != (__force __be32)nf_ct_get_id(ct)) {
 			nf_ct_put(ct);
 			return -ENOENT;
 		}
@@ -2692,6 +2696,25 @@ nla_put_failure:
 
 static const union nf_inet_addr any_addr;
 
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+{
+	static __read_mostly siphash_key_t exp_id_seed;
+	unsigned long a, b, c, d;
+
+	net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
+
+	a = (unsigned long)exp;
+	b = (unsigned long)exp->helper;
+	c = (unsigned long)exp->master;
+	d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
+
+#ifdef CONFIG_64BIT
+	return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
+#else
+	return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
+#endif
+}
+
 static int
 ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			  const struct nf_conntrack_expect *exp)
@@ -2739,7 +2762,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 	}
 #endif
 	if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
-	    nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
+	    nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
 	    nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
 	    nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
 		goto nla_put_failure;
@@ -3044,7 +3067,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
 
 	if (cda[CTA_EXPECT_ID]) {
 		__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
-		if (ntohl(id) != (u32)(unsigned long)exp) {
+
+		if (id != nf_expect_get_id(exp)) {
 			nf_ct_expect_put(exp);
 			return -ENOENT;
 		}
-- 
GitLab


From 33d1c018179d0a30c39cc5f1682b77867282694b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 6 Apr 2019 08:26:52 +0300
Subject: [PATCH 1101/1861] netfilter: nf_tables: prevent shift wrap in
 nft_chain_parse_hook()

I believe that "hook->num" can be up to UINT_MAX.  Shifting more than
31 bits would is undefined in C but in practice it would lead to shift
wrapping.  That would lead to an array overflow in nf_tables_addchain():

	ops->hook       = hook.type->hooks[ops->hooknum];

Fixes: fe19c04ca137 ("netfilter: nf_tables: remove nhooks field from struct nft_af_info")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ef7772e976cc8..1606eaa5ae0da 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1545,7 +1545,7 @@ static int nft_chain_parse_hook(struct net *net,
 		if (IS_ERR(type))
 			return PTR_ERR(type);
 	}
-	if (!(type->hook_mask & (1 << hook->num)))
+	if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
 		return -EOPNOTSUPP;
 
 	if (type->type == NFT_CHAIN_T_NAT &&
-- 
GitLab


From 5bdac418f33f60b07a34e01e722889140ee8fac9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 9 Apr 2019 14:45:20 +0200
Subject: [PATCH 1102/1861] netfilter: nat: fix icmp id randomization

Sven Auhagen reported that a 2nd ping request will fail if 'fully-random'
mode is used.

Reason is that if no proto information is given, min/max are both 0,
so we set the icmp id to 0 instead of chosing a random value between
0 and 65535.

Update test case as well to catch this, without fix this yields:
[..]
ERROR: cannot ping ns1 from ns2 with ip masquerade fully-random (attempt 2)
ERROR: cannot ping ns1 from ns2 with ipv6 masquerade fully-random (attempt 2)

... becaus 2nd ping clashes with existing 'id 0' icmp conntrack and gets
dropped.

Fixes: 203f2e78200c27e ("netfilter: nat: remove l4proto->unique_tuple")
Reported-by: Sven Auhagen <sven.auhagen@voleatech.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_core.c                  | 11 ++++--
 tools/testing/selftests/netfilter/nft_nat.sh | 36 +++++++++++++++-----
 2 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index af7dc65377584..000952719adfd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -415,9 +415,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 	case IPPROTO_ICMPV6:
 		/* id is same for either direction... */
 		keyptr = &tuple->src.u.icmp.id;
-		min = range->min_proto.icmp.id;
-		range_size = ntohs(range->max_proto.icmp.id) -
-			     ntohs(range->min_proto.icmp.id) + 1;
+		if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+			min = 0;
+			range_size = 65536;
+		} else {
+			min = ntohs(range->min_proto.icmp.id);
+			range_size = ntohs(range->max_proto.icmp.id) -
+				     ntohs(range->min_proto.icmp.id) + 1;
+		}
 		goto find_free_id;
 #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
 	case IPPROTO_GRE:
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 8ec76681605cc..3194007cf8d1b 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -321,6 +321,7 @@ EOF
 
 test_masquerade6()
 {
+	local natflags=$1
 	local lret=0
 
 	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -354,13 +355,13 @@ ip netns exec ns0 nft -f - <<EOF
 table ip6 nat {
 	chain postrouting {
 		type nat hook postrouting priority 0; policy accept;
-		meta oif veth0 masquerade
+		meta oif veth0 masquerade $natflags
 	}
 }
 EOF
 	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
 	if [ $? -ne 0 ] ; then
-		echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+		echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags"
 		lret=1
 	fi
 
@@ -397,19 +398,26 @@ EOF
 		fi
 	done
 
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)"
+		lret=1
+	fi
+
 	ip netns exec ns0 nft flush chain ip6 nat postrouting
 	if [ $? -ne 0 ]; then
 		echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
 		lret=1
 	fi
 
-	test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+	test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2"
 
 	return $lret
 }
 
 test_masquerade()
 {
+	local natflags=$1
 	local lret=0
 
 	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
@@ -417,7 +425,7 @@ test_masquerade()
 
 	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
 	if [ $? -ne 0 ] ; then
-		echo "ERROR: canot ping ns1 from ns2"
+		echo "ERROR: cannot ping ns1 from ns2 $natflags"
 		lret=1
 	fi
 
@@ -443,13 +451,13 @@ ip netns exec ns0 nft -f - <<EOF
 table ip nat {
 	chain postrouting {
 		type nat hook postrouting priority 0; policy accept;
-		meta oif veth0 masquerade
+		meta oif veth0 masquerade $natflags
 	}
 }
 EOF
 	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
 	if [ $? -ne 0 ] ; then
-		echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+		echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags"
 		lret=1
 	fi
 
@@ -485,13 +493,19 @@ EOF
 		fi
 	done
 
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)"
+		lret=1
+	fi
+
 	ip netns exec ns0 nft flush chain ip nat postrouting
 	if [ $? -ne 0 ]; then
 		echo "ERROR: Could not flush nat postrouting" 1>&2
 		lret=1
 	fi
 
-	test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+	test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2"
 
 	return $lret
 }
@@ -750,8 +764,12 @@ test_local_dnat
 test_local_dnat6
 
 reset_counters
-test_masquerade
-test_masquerade6
+test_masquerade ""
+test_masquerade6 ""
+
+reset_counters
+test_masquerade "fully-random"
+test_masquerade6 "fully-random"
 
 reset_counters
 test_redirect
-- 
GitLab


From 6b1f16ba730d4c0cda1247568c3a1bf4fa3a2f2f Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.ibm.com>
Date: Fri, 12 Apr 2019 11:04:50 +0200
Subject: [PATCH 1103/1861] s390/pkey: add one more argument space for debug
 feature entry

The debug feature entries have been used with up to 5 arguents
(including the pointer to the format string) but there was only
space reserved for 4 arguemnts. So now the registration does
reserve space for 5 times a long value.

This fixes a sometime appearing weired value as the last
value of an debug feature entry like this:

... pkey_sec2protkey zcrypt_send_cprb (cardnr=10 domain=12)
   failed with errno -2143346254

Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Reported-by: Christian Rund <Christian.Rund@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/pkey_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 3e85d665c5729..45eb0c14b8807 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -51,7 +51,8 @@ static debug_info_t *debug_info;
 
 static void __init pkey_debug_init(void)
 {
-	debug_info = debug_register("pkey", 1, 1, 4 * sizeof(long));
+	/* 5 arguments per dbf entry (including the format string ptr) */
+	debug_info = debug_register("pkey", 1, 1, 5 * sizeof(long));
 	debug_register_view(debug_info, &debug_sprintf_view);
 	debug_set_level(debug_info, 3);
 }
-- 
GitLab


From 2aae471d66c108b78493be1147e707bca6331e50 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 Mar 2019 15:51:56 +0100
Subject: [PATCH 1104/1861] drivers: power: supply: goldfish_battery: Fix bogus
 SPDX identifier

spdxcheck.py complains:

 drivers/power/supply/goldfish_battery.c: 1:28 Invalid License ID: GPL

which is correct because GPL is not a valid identifier. Of course this
could have been caught by checkpatch.pl _before_ submitting or merging the
patch.

 WARNING: 'SPDX-License-Identifier: GPL' is not supported in LICENSES/...
 #19: FILE: drivers/power/supply/goldfish_battery.c:1:
 +// SPDX-License-Identifier: GPL

Which is absolutely hillarious as the commit introducing this wreckage says
in the changelog:

  There was a checkpatch complain:

    "Missing or malformed SPDX-License-Identifier tag".

Oh well. Replacing a checkpatch warning by a different checkpatch warning
is a really useful exercise.

Use the proper GPL-2.0 identifier which is what the boiler plate in the
file had originally.

Fixes: e75e3a125b40 ("drivers: power: supply: goldfish_battery: Put an SPDX tag")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/power/supply/goldfish_battery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/goldfish_battery.c b/drivers/power/supply/goldfish_battery.c
index ad969d9fc9815..c2644a9fe80f1 100644
--- a/drivers/power/supply/goldfish_battery.c
+++ b/drivers/power/supply/goldfish_battery.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Power supply driver for the goldfish emulator
  *
-- 
GitLab


From cfd32acf7875d9dd83f82e1940098e88abeea439 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 12 Apr 2019 19:55:41 -0700
Subject: [PATCH 1105/1861] KVM: x86/mmu: Fix an inverted list_empty() check
 when zapping sptes

A recently introduced helper for handling zap vs. remote flush
incorrectly bails early, effectively leaking defunct shadow pages.
Manifests as a slab BUG when exiting KVM due to the shadow pages
being alive when their associated cache is destroyed.

==========================================================================
BUG kvm_mmu_page_header: Objects remaining in kvm_mmu_page_header on ...
--------------------------------------------------------------------------
Disabling lock debugging due to kernel taint
INFO: Slab 0x00000000fc436387 objects=26 used=23 fp=0x00000000d023caee ...
CPU: 6 PID: 4315 Comm: rmmod Tainted: G    B             5.1.0-rc2+ #19
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
Call Trace:
 dump_stack+0x46/0x5b
 slab_err+0xad/0xd0
 ? on_each_cpu_mask+0x3c/0x50
 ? ksm_migrate_page+0x60/0x60
 ? on_each_cpu_cond_mask+0x7c/0xa0
 ? __kmalloc+0x1ca/0x1e0
 __kmem_cache_shutdown+0x13a/0x310
 shutdown_cache+0xf/0x130
 kmem_cache_destroy+0x1d5/0x200
 kvm_mmu_module_exit+0xa/0x30 [kvm]
 kvm_arch_exit+0x45/0x60 [kvm]
 kvm_exit+0x6f/0x80 [kvm]
 vmx_exit+0x1a/0x50 [kvm_intel]
 __x64_sys_delete_module+0x153/0x1f0
 ? exit_to_usermode_loop+0x88/0xc0
 do_syscall_64+0x4f/0x100
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: a21136345cb6f ("KVM: x86/mmu: Split remote_flush+zap case out of kvm_mmu_flush_or_zap()")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eee455a8a612d..85f7537289534 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2238,7 +2238,7 @@ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
 					struct list_head *invalid_list,
 					bool remote_flush)
 {
-	if (!remote_flush && !list_empty(invalid_list))
+	if (!remote_flush && list_empty(invalid_list))
 		return false;
 
 	if (!list_empty(invalid_list))
-- 
GitLab


From 39036cd2727395c3369b1051005da74059a85317 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 28 Feb 2019 13:59:19 +0100
Subject: [PATCH 1106/1861] arch: add pidfd and io_uring syscalls everywhere

Add the io_uring and pidfd_send_signal system calls to all architectures.

These system calls are designed to handle both native and compat tasks,
so all entries are the same across architectures, only arm-compat and
the generic tale still use an old format.

Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> (s390)
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 4 ++++
 arch/arm/tools/syscall.tbl                  | 4 ++++
 arch/arm64/include/asm/unistd.h             | 2 +-
 arch/arm64/include/asm/unistd32.h           | 8 ++++++++
 arch/ia64/kernel/syscalls/syscall.tbl       | 4 ++++
 arch/m68k/kernel/syscalls/syscall.tbl       | 4 ++++
 arch/microblaze/kernel/syscalls/syscall.tbl | 4 ++++
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 4 ++++
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 4 ++++
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 4 ++++
 arch/parisc/kernel/syscalls/syscall.tbl     | 4 ++++
 arch/powerpc/kernel/syscalls/syscall.tbl    | 4 ++++
 arch/s390/kernel/syscalls/syscall.tbl       | 4 ++++
 arch/sh/kernel/syscalls/syscall.tbl         | 4 ++++
 arch/sparc/kernel/syscalls/syscall.tbl      | 4 ++++
 arch/xtensa/kernel/syscalls/syscall.tbl     | 4 ++++
 16 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 63ed39cbd3bd1..165f268beafc4 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -463,3 +463,7 @@
 532	common	getppid				sys_getppid
 # all other architectures have common numbers for new syscall, alpha
 # is the exception.
+534	common	pidfd_send_signal		sys_pidfd_send_signal
+535	common	io_uring_setup			sys_io_uring_setup
+536	common	io_uring_enter			sys_io_uring_enter
+537	common	io_uring_register		sys_io_uring_register
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 9016f4081bb9c..0393917eaa57a 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -437,3 +437,7 @@
 421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
 422	common	futex_time64			sys_futex
 423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index d1dd93436e1ee..f2a83ff6b73c2 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		424
+#define __NR_compat_syscalls		428
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 5590f26236907..23f1a44acada4 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64)
 __SYSCALL(__NR_futex_time64, sys_futex)
 #define __NR_sched_rr_get_interval_time64 423
 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index ab9cda5f6136a..56e3d0b685e19 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -344,3 +344,7 @@
 332	common	pkey_free			sys_pkey_free
 333	common	rseq				sys_rseq
 # 334 through 423 are reserved to sync up with other architectures
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 125c14178979c..df4ec3ec71d15 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -423,3 +423,7 @@
 421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
 422	common	futex_time64			sys_futex
 423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 8ee3a8c18498e..4964947732af3 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -429,3 +429,7 @@
 421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
 422	common	futex_time64			sys_futex
 423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 15f4117900ee8..9392dfe33f97e 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -362,3 +362,7 @@
 421	n32	rt_sigtimedwait_time64		compat_sys_rt_sigtimedwait_time64
 422	n32	futex_time64			sys_futex
 423	n32	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	n32	pidfd_send_signal		sys_pidfd_send_signal
+425	n32	io_uring_setup			sys_io_uring_setup
+426	n32	io_uring_enter			sys_io_uring_enter
+427	n32	io_uring_register		sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index c85502e67b441..cd0c8aa21fbac 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -338,3 +338,7 @@
 327	n64	rseq				sys_rseq
 328	n64	io_pgetevents			sys_io_pgetevents
 # 329 through 423 are reserved to sync up with other architectures
+424	n64	pidfd_send_signal		sys_pidfd_send_signal
+425	n64	io_uring_setup			sys_io_uring_setup
+426	n64	io_uring_enter			sys_io_uring_enter
+427	n64	io_uring_register		sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 2e063d0f837e7..e849e8ffe4a25 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -411,3 +411,7 @@
 421	o32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
 422	o32	futex_time64			sys_futex			sys_futex
 423	o32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
+424	o32	pidfd_send_signal		sys_pidfd_send_signal
+425	o32	io_uring_setup			sys_io_uring_setup
+426	o32	io_uring_enter			sys_io_uring_enter
+427	o32	io_uring_register		sys_io_uring_register
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index b26766c6647dc..fe8ca623add89 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -420,3 +420,7 @@
 421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
 422	32	futex_time64			sys_futex			sys_futex
 423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index b18abb0c3dae6..00f5a63c8d9a6 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -505,3 +505,7 @@
 421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
 422	32	futex_time64			sys_futex			sys_futex
 423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 02579f95f391b..061418f787c37 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
 421	32	rt_sigtimedwait_time64	-				compat_sys_rt_sigtimedwait_time64
 422	32	futex_time64		-				sys_futex
 423	32	sched_rr_get_interval_time64	-			sys_sched_rr_get_interval
+424  common	pidfd_send_signal	sys_pidfd_send_signal		sys_pidfd_send_signal
+425  common	io_uring_setup		sys_io_uring_setup              sys_io_uring_setup
+426  common	io_uring_enter		sys_io_uring_enter              sys_io_uring_enter
+427  common	io_uring_register	sys_io_uring_register           sys_io_uring_register
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index bfda678576e43..480b057556ee4 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
 421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
 422	common	futex_time64			sys_futex
 423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index b9a5a04b2d2c5..a1dd24307b001 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -469,3 +469,7 @@
 421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
 422	32	futex_time64			sys_futex			sys_futex
 423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 6af49929de857..30084eaf84227 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -394,3 +394,7 @@
 421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
 422	common	futex_time64			sys_futex
 423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
-- 
GitLab


From 5aae7832d1b4ec614996ea0f4fafc4d9855ec0b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 26 Mar 2019 16:49:02 +0200
Subject: [PATCH 1107/1861] drm/i915: Do not enable FEC without DSC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we enable FEC even when DSC is no used. While that is
theoretically valid supposedly there isn't much of a benefit from
this. But more importantly we do not account for the FEC link
bandwidth overhead (2.4%) in the non-DSC link bandwidth computations.
So the code may think we have enough bandwidth when we in fact
do not.

Cc: stable@vger.kernel.org
Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
Cc: Manasi Navare <manasi.d.navare@intel.com>
Fixes: 240999cf339f ("i915/dp/fec: Add fec_enable to the crtc state.")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190326144903.6617-1-ville.syrjala@linux.intel.com
Reviewed-by: Manasi Navare <manasi.d.navare@intel.com>
(cherry picked from commit 6fd3134ae3551d4802a04669c0f39f2f5c56f77d)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 8891f29a8c7ff..48da4a969a0a9 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1886,6 +1886,9 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 	int pipe_bpp;
 	int ret;
 
+	pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) &&
+		intel_dp_supports_fec(intel_dp, pipe_config);
+
 	if (!intel_dp_supports_dsc(intel_dp, pipe_config))
 		return -EINVAL;
 
@@ -2116,9 +2119,6 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return -EINVAL;
 
-	pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) &&
-				  intel_dp_supports_fec(intel_dp, pipe_config);
-
 	ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state);
 	if (ret < 0)
 		return ret;
-- 
GitLab


From f5c58ba18ab8ea2169670ed880e4d31ed772ad10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 11 Apr 2019 19:49:25 +0300
Subject: [PATCH 1108/1861] drm/i915: Restore correct
 bxt_ddi_phy_calc_lane_lat_optim_mask() calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are no longer calling bxt_ddi_phy_calc_lane_lat_optim_mask() when
intel{hdmi,dp}_compute_config() succeeds, and instead only call it
when those fail. This is fallout from the bool->int
.compute_config() conversion which failed to invert the return
value check before calling bxt_ddi_phy_calc_lane_lat_optim_mask().
Let's just replace it with an early bailout so that it's harder
to miss.

This restores the correct latency optim setting calculation
(which could fix some real failures), and avoids the
MISSING_CASE() from bxt_ddi_phy_calc_lane_lat_optim_mask()
after intel{hdmi,dp}_compute_config() has failed.

Cc: Lyude Paul <lyude@redhat.com>
Fixes: 204474a6b859 ("drm/i915: Pass down rc in intel_encoder->compute_config()")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109373
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190411164925.28491-1-ville.syrjala@linux.intel.com
Reviewed-by: Lyude Paul <lyude@redhat.com>
(cherry picked from commit 7a412b8f60cd57ab7dcb72ab701fde2bf81752eb)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_ddi.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index ab4e60dfd6a34..98cea1f4b3bf0 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -3862,14 +3862,16 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 		ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state);
 	else
 		ret = intel_dp_compute_config(encoder, pipe_config, conn_state);
+	if (ret)
+		return ret;
 
-	if (IS_GEN9_LP(dev_priv) && ret)
+	if (IS_GEN9_LP(dev_priv))
 		pipe_config->lane_lat_optim_mask =
 			bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count);
 
 	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
 
-	return ret;
+	return 0;
 
 }
 
-- 
GitLab


From b19062a567266ee1f10f6709325f766bbcc07d1c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 15 Apr 2019 10:49:38 -0600
Subject: [PATCH 1109/1861] io_uring: fix possible deadlock between
 io_uring_{enter,register}

If we have multiple threads, one doing io_uring_enter() while the other
is doing io_uring_register(), we can run into a deadlock between the
two. io_uring_register() must wait for existing users of the io_uring
instance to exit. But it does so while holding the io_uring mutex.
Callers of io_uring_enter() may need this mutex to make progress (and
eventually exit). If we wait for users to exit in io_uring_register(),
we can't do so with the io_uring mutex held without potentially risking
a deadlock.

Drop the io_uring mutex while waiting for existing callers to exit. This
is safe and guaranteed to make forward progress, since we already killed
the percpu ref before doing so. Hence later callers of io_uring_enter()
will be rejected.

Reported-by: syzbot+16dc03452dee970a0c3e@syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f4ddb9d232414..b35300e4c9a79 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2929,11 +2929,23 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
 
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
+	__releases(ctx->uring_lock)
+	__acquires(ctx->uring_lock)
 {
 	int ret;
 
 	percpu_ref_kill(&ctx->refs);
+
+	/*
+	 * Drop uring mutex before waiting for references to exit. If another
+	 * thread is currently inside io_uring_enter() it might need to grab
+	 * the uring_lock to make progress. If we hold it here across the drain
+	 * wait, then we can deadlock. It's safe to drop the mutex here, since
+	 * no new references will come in after we've killed the percpu ref.
+	 */
+	mutex_unlock(&ctx->uring_lock);
 	wait_for_completion(&ctx->ctx_done);
+	mutex_lock(&ctx->uring_lock);
 
 	switch (opcode) {
 	case IORING_REGISTER_BUFFERS:
-- 
GitLab


From 79b4a9cf0e2ea8203ce777c8d5cfa86c71eae86e Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Tue, 9 Apr 2019 16:53:55 +0200
Subject: [PATCH 1110/1861] MIPS: scall64-o32: Fix indirect syscall number load
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 4c21b8fd8f14 (MIPS: seccomp: Handle indirect system calls (o32))
added indirect syscall detection for O32 processes running on MIPS64,
but it did not work correctly for big endian kernel/processes. The
reason is that the syscall number is loaded from ARG1 using the lw
instruction while this is a 64-bit value, so zero is loaded instead of
the syscall number.

Fix the code by using the ld instruction instead. When running a 32-bit
processes on a 64 bit CPU, the values are properly sign-extended, so it
ensures the value passed to syscall_trace_enter is correct.

Recent systemd versions with seccomp enabled whitelist the getpid
syscall for their internal  processes (e.g. systemd-journald), but call
it through syscall(SYS_getpid). This fix therefore allows O32 big endian
systems with a 64-bit kernel to run recent systemd versions.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Cc: <stable@vger.kernel.org> # v3.15+
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/scall64-o32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f158c5894a9a8..feb2653490dfe 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -125,7 +125,7 @@ trace_a_syscall:
 	subu	t1, v0,  __NR_O32_Linux
 	move	a1, v0
 	bnez	t1, 1f /* __NR_syscall at offset 0 */
-	lw	a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
+	ld	a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
 	.set	pop
 
 1:	jal	syscall_trace_enter
-- 
GitLab


From 8ed633b9baf9ec7d593ebb8e256312ff1c70ab37 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai26@huawei.com>
Date: Fri, 12 Apr 2019 16:36:33 -0400
Subject: [PATCH 1111/1861] Revert "net-sysfs: Fix memory leak in
 netdev_register_kobject"

This reverts commit 6b70fc94afd165342876e53fc4b2f7d085009945.

The reverted bugfix will cause another issue.
Reported by syzbot+6024817a931b2830bc93@syzkaller.appspotmail.com.
See https://syzkaller.appspot.com/x/log.txt?x=1737671b200000 for
details.

Signed-off-by: Wang Hai <wanghai26@huawei.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f8f94303a1f57..8f8b7b6c2945a 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1747,20 +1747,16 @@ int netdev_register_kobject(struct net_device *ndev)
 
 	error = device_add(dev);
 	if (error)
-		goto error_put_device;
+		return error;
 
 	error = register_queue_kobjects(ndev);
-	if (error)
-		goto error_device_del;
+	if (error) {
+		device_del(dev);
+		return error;
+	}
 
 	pm_runtime_set_memalloc_noio(dev, true);
 
-	return 0;
-
-error_device_del:
-	device_del(dev);
-error_put_device:
-	put_device(dev);
 	return error;
 }
 
-- 
GitLab


From 92480b3977fd3884649d404cbbaf839b70035699 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 12 Apr 2019 15:04:10 +0200
Subject: [PATCH 1112/1861] bonding: fix event handling for stacked bonds

When a bond is enslaved to another bond, bond_netdev_event() only
handles the event as if the bond is a master, and skips treating the
bond as a slave.

This leads to a refcount leak on the slave, since we don't remove the
adjacency to its master and the master holds a reference on the slave.

Reproducer:
  ip link add bondL type bond
  ip link add bondU type bond
  ip link set bondL master bondU
  ip link del bondL

No "Fixes:" tag, this code is older than git history.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b59708c35fafe..ee610721098e6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3213,8 +3213,12 @@ static int bond_netdev_event(struct notifier_block *this,
 		return NOTIFY_DONE;
 
 	if (event_dev->flags & IFF_MASTER) {
+		int ret;
+
 		netdev_dbg(event_dev, "IFF_MASTER\n");
-		return bond_master_netdev_event(event, event_dev);
+		ret = bond_master_netdev_event(event, event_dev);
+		if (ret != NOTIFY_DONE)
+			return ret;
 	}
 
 	if (event_dev->flags & IFF_SLAVE) {
-- 
GitLab


From 789445b960c16baf626c050f762126189b45b82d Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Sat, 13 Apr 2019 09:52:15 +0200
Subject: [PATCH 1113/1861] MAINTAINERS: normalize Woojung Huh's email address

MAINTAINERS contains a lower-case and upper-case variant of
Woojung Huh' s email address.

Only keep the lower-case variant in MAINTAINERS.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Woojung Huh <woojung.huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 27b0de13506c8..601679a213f6e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10145,7 +10145,7 @@ F:	drivers/spi/spi-at91-usart.c
 F:	Documentation/devicetree/bindings/mfd/atmel-usart.txt
 
 MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
-M:	Woojung Huh <Woojung.Huh@microchip.com>
+M:	Woojung Huh <woojung.huh@microchip.com>
 M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-- 
GitLab


From 9c69a13205151c0d801de9f9d83a818e6e8f60ec Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Sun, 14 Apr 2019 14:21:29 -0700
Subject: [PATCH 1114/1861] route: Avoid crash from dereferencing NULL rt->from

When __ip6_rt_update_pmtu() is called, rt->from is RCU dereferenced, but is
never checked for null - rt6_flush_exceptions() may have removed the entry.

[ 1913.989004] RIP: 0010:ip6_rt_cache_alloc+0x13/0x170
[ 1914.209410] Call Trace:
[ 1914.214798]  <IRQ>
[ 1914.219226]  __ip6_rt_update_pmtu+0xb0/0x190
[ 1914.228649]  ip6_tnl_xmit+0x2c2/0x970 [ip6_tunnel]
[ 1914.239223]  ? ip6_tnl_parse_tlv_enc_lim+0x32/0x1a0 [ip6_tunnel]
[ 1914.252489]  ? __gre6_xmit+0x148/0x530 [ip6_gre]
[ 1914.262678]  ip6gre_tunnel_xmit+0x17e/0x3c7 [ip6_gre]
[ 1914.273831]  dev_hard_start_xmit+0x8d/0x1f0
[ 1914.283061]  sch_direct_xmit+0xfa/0x230
[ 1914.291521]  __qdisc_run+0x154/0x4b0
[ 1914.299407]  net_tx_action+0x10e/0x1f0
[ 1914.307678]  __do_softirq+0xca/0x297
[ 1914.315567]  irq_exit+0x96/0xa0
[ 1914.322494]  smp_apic_timer_interrupt+0x68/0x130
[ 1914.332683]  apic_timer_interrupt+0xf/0x20
[ 1914.341721]  </IRQ>

Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected")
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0302e0eb07af1..7178e32eb15d0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2330,6 +2330,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 
 		rcu_read_lock();
 		from = rcu_dereference(rt6->from);
+		if (!from) {
+			rcu_read_unlock();
+			return;
+		}
 		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
 		if (nrt6) {
 			rt6_do_update_pmtu(nrt6, mtu);
-- 
GitLab


From 614c70f35cd77a9af8e2ca841dcdb121cec3068f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 15 Apr 2019 16:47:03 +0100
Subject: [PATCH 1115/1861] bnx2x: fix spelling mistake "dicline" -> "decline"

There is a spelling mistake in a BNX2X_ERR message, fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index a9bdc21873d32..10ff37d6dc783 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -957,7 +957,7 @@ int bnx2x_vfpf_update_vlan(struct bnx2x *bp, u16 vid, u8 vf_qid, bool add)
 	bnx2x_sample_bulletin(bp);
 
 	if (bp->shadow_bulletin.content.valid_bitmap & 1 << VLAN_VALID) {
-		BNX2X_ERR("Hypervisor will dicline the request, avoiding\n");
+		BNX2X_ERR("Hypervisor will decline the request, avoiding\n");
 		rc = -EINVAL;
 		goto out;
 	}
-- 
GitLab


From 0d00a239f70fa52eb23837ffacf990f8404ecace Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 5 Dec 2018 13:50:22 -0600
Subject: [PATCH 1116/1861] PM / devfreq: Use of_node_name_eq for node name
 comparisons

Convert string compares of DT node names to use of_node_name_eq helper
instead. This removes direct access to the node name pointer.

For instances using of_node_cmp, this has the side effect of now using
case sensitive comparisons. This should not matter for any FDT based
system which all of these are.

Cc: Chanwoo Choi <cw00.choi@samsung.com>
Cc: MyungJoo Ham <myungjoo.ham@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Kukjin Kim <kgene@kernel.org>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: linux-pm@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-samsung-soc@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq-event.c     | 2 +-
 drivers/devfreq/event/exynos-ppmu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c
index d67242d877443..87e93406d7cd9 100644
--- a/drivers/devfreq/devfreq-event.c
+++ b/drivers/devfreq/devfreq-event.c
@@ -240,7 +240,7 @@ struct devfreq_event_dev *devfreq_event_get_edev_by_phandle(struct device *dev,
 	}
 
 	list_for_each_entry(edev, &devfreq_event_list, node) {
-		if (!strcmp(edev->desc->name, node->name))
+		if (of_node_name_eq(node, edev->desc->name))
 			goto out;
 	}
 	edev = NULL;
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index c61de0bdf0532..c2ea949575015 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -529,7 +529,7 @@ static int of_get_devfreq_events(struct device_node *np,
 			if (!ppmu_events[i].name)
 				continue;
 
-			if (!of_node_cmp(node->name, ppmu_events[i].name))
+			if (of_node_name_eq(node, ppmu_events[i].name))
 				break;
 		}
 
-- 
GitLab


From a9487917ba6728dd618ae6d418a7f2197f1b4592 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Sat, 19 Jan 2019 11:04:54 -0500
Subject: [PATCH 1117/1861] PM / devfreq: fix mem leak in devfreq_add_device()

'devfreq' is malloced in devfreq_add_device() and should be freed in
the error handling cases, otherwise it will cause memory leak.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 0ae3de76833b7..fa1bdde89ffc9 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -651,7 +651,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		mutex_unlock(&devfreq->lock);
 		err = set_freq_table(devfreq);
 		if (err < 0)
-			goto err_out;
+			goto err_dev;
 		mutex_lock(&devfreq->lock);
 	}
 
-- 
GitLab


From 25846fa1cedada274b65ffd2413378290a60be47 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Sat, 19 Jan 2019 11:04:53 -0500
Subject: [PATCH 1118/1861] PM / devfreq: fix missing check of return value in
 devfreq_add_device()

devm_kzalloc() could fail, so insert a check of its return value. And
if it fails, returns -ENOMEM.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index fa1bdde89ffc9..4af608a61cd92 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -689,10 +689,22 @@ struct devfreq *devfreq_add_device(struct device *dev,
 					 devfreq->profile->max_state,
 					 devfreq->profile->max_state),
 			     GFP_KERNEL);
+	if (!devfreq->trans_table) {
+		mutex_unlock(&devfreq->lock);
+		err = -ENOMEM;
+		goto err_devfreq;
+	}
+
 	devfreq->time_in_state = devm_kcalloc(&devfreq->dev,
 						devfreq->profile->max_state,
 						sizeof(unsigned long),
 						GFP_KERNEL);
+	if (!devfreq->time_in_state) {
+		mutex_unlock(&devfreq->lock);
+		err = -ENOMEM;
+		goto err_devfreq;
+	}
+
 	devfreq->last_stat_updated = jiffies;
 
 	srcu_init_notifier_head(&devfreq->transition_notifier_list);
@@ -726,7 +738,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 
 err_init:
 	mutex_unlock(&devfreq_list_lock);
-
+err_devfreq:
 	devfreq_remove_device(devfreq);
 	devfreq = NULL;
 err_dev:
-- 
GitLab


From 6d690f77932fe1f3ce5eb2de2c5ac16d33197608 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Mon, 21 Jan 2019 11:11:07 +0900
Subject: [PATCH 1119/1861] PM / devfreq: consistent indentation

Following up with complaints on inconsistent indentation from
Yangtao Li, this fixes indentation inconsistency.

In principle, this tries to put arguments aligned to the left
including the first argument except for the case where
the first argument is on the far-right side.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Yangtao Li <tiny.windzz@gmail.com>
---
 drivers/devfreq/devfreq.c | 49 +++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 4af608a61cd92..428a1de81008b 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -528,7 +528,7 @@ void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay)
 		mutex_lock(&devfreq->lock);
 		if (!devfreq->stop_polling)
 			queue_delayed_work(devfreq_wq, &devfreq->work,
-			      msecs_to_jiffies(devfreq->profile->polling_ms));
+				msecs_to_jiffies(devfreq->profile->polling_ms));
 	}
 out:
 	mutex_unlock(&devfreq->lock);
@@ -537,7 +537,7 @@ EXPORT_SYMBOL(devfreq_interval_update);
 
 /**
  * devfreq_notifier_call() - Notify that the device frequency requirements
- *			   has been changed out of devfreq framework.
+ *			     has been changed out of devfreq framework.
  * @nb:		the notifier_block (supposed to be devfreq->nb)
  * @type:	not used
  * @devp:	not used
@@ -683,12 +683,11 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		goto err_out;
 	}
 
-	devfreq->trans_table =
-		devm_kzalloc(&devfreq->dev,
-			     array3_size(sizeof(unsigned int),
-					 devfreq->profile->max_state,
-					 devfreq->profile->max_state),
-			     GFP_KERNEL);
+	devfreq->trans_table = devm_kzalloc(&devfreq->dev,
+			array3_size(sizeof(unsigned int),
+				    devfreq->profile->max_state,
+				    devfreq->profile->max_state),
+			GFP_KERNEL);
 	if (!devfreq->trans_table) {
 		mutex_unlock(&devfreq->lock);
 		err = -ENOMEM;
@@ -696,9 +695,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	}
 
 	devfreq->time_in_state = devm_kcalloc(&devfreq->dev,
-						devfreq->profile->max_state,
-						sizeof(unsigned long),
-						GFP_KERNEL);
+			devfreq->profile->max_state,
+			sizeof(unsigned long),
+			GFP_KERNEL);
 	if (!devfreq->time_in_state) {
 		mutex_unlock(&devfreq->lock);
 		err = -ENOMEM;
@@ -1184,7 +1183,7 @@ static ssize_t available_governors_show(struct device *d,
 	 */
 	if (df->governor->immutable) {
 		count = scnprintf(&buf[count], DEVFREQ_NAME_LEN,
-				   "%s ", df->governor_name);
+				  "%s ", df->governor_name);
 	/*
 	 * The devfreq device shows the registered governor except for
 	 * immutable governors such as passive governor .
@@ -1497,8 +1496,8 @@ EXPORT_SYMBOL(devfreq_recommended_opp);
 
 /**
  * devfreq_register_opp_notifier() - Helper function to get devfreq notified
- *				   for any changes in the OPP availability
- *				   changes
+ *				     for any changes in the OPP availability
+ *				     changes
  * @dev:	The devfreq user device. (parent of devfreq)
  * @devfreq:	The devfreq object.
  */
@@ -1510,8 +1509,8 @@ EXPORT_SYMBOL(devfreq_register_opp_notifier);
 
 /**
  * devfreq_unregister_opp_notifier() - Helper function to stop getting devfreq
- *				     notified for any changes in the OPP
- *				     availability changes anymore.
+ *				       notified for any changes in the OPP
+ *				       availability changes anymore.
  * @dev:	The devfreq user device. (parent of devfreq)
  * @devfreq:	The devfreq object.
  *
@@ -1530,8 +1529,8 @@ static void devm_devfreq_opp_release(struct device *dev, void *res)
 }
 
 /**
- * devm_ devfreq_register_opp_notifier()
- *		- Resource-managed devfreq_register_opp_notifier()
+ * devm_devfreq_register_opp_notifier() - Resource-managed
+ *					  devfreq_register_opp_notifier()
  * @dev:	The devfreq user device. (parent of devfreq)
  * @devfreq:	The devfreq object.
  */
@@ -1559,8 +1558,8 @@ int devm_devfreq_register_opp_notifier(struct device *dev,
 EXPORT_SYMBOL(devm_devfreq_register_opp_notifier);
 
 /**
- * devm_devfreq_unregister_opp_notifier()
- *		- Resource-managed devfreq_unregister_opp_notifier()
+ * devm_devfreq_unregister_opp_notifier() - Resource-managed
+ *					    devfreq_unregister_opp_notifier()
  * @dev:	The devfreq user device. (parent of devfreq)
  * @devfreq:	The devfreq object.
  */
@@ -1579,8 +1578,8 @@ EXPORT_SYMBOL(devm_devfreq_unregister_opp_notifier);
  * @list:	DEVFREQ_TRANSITION_NOTIFIER.
  */
 int devfreq_register_notifier(struct devfreq *devfreq,
-				struct notifier_block *nb,
-				unsigned int list)
+			      struct notifier_block *nb,
+			      unsigned int list)
 {
 	int ret = 0;
 
@@ -1686,9 +1685,9 @@ EXPORT_SYMBOL(devm_devfreq_register_notifier);
  * @list:	DEVFREQ_TRANSITION_NOTIFIER.
  */
 void devm_devfreq_unregister_notifier(struct device *dev,
-				struct devfreq *devfreq,
-				struct notifier_block *nb,
-				unsigned int list)
+				      struct devfreq *devfreq,
+				      struct notifier_block *nb,
+				      unsigned int list)
 {
 	WARN_ON(devres_release(dev, devm_devfreq_notifier_release,
 			       devm_devfreq_dev_match, devfreq));
-- 
GitLab


From e2794d74f1ec6d31e662b147d55041bd00277278 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Sat, 16 Feb 2019 10:18:24 -0500
Subject: [PATCH 1120/1861] PM / devfreq: rk3399_dmc: remove unneeded semicolon

The semicolon is unneeded, so remove it.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/rk3399_dmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index e795ad2b3f6b8..a228dad2bee4e 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -322,7 +322,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 
 		dev_err(dev, "Cannot get the clk dmc_clk\n");
 		return PTR_ERR(data->dmc_clk);
-	};
+	}
 
 	data->edev = devfreq_event_get_edev_by_phandle(dev, 0);
 	if (IS_ERR(data->edev))
-- 
GitLab


From 726409698feeac20de12fd107159573539b3e063 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Sat, 16 Feb 2019 10:18:25 -0500
Subject: [PATCH 1121/1861] PM / devfreq: rockchip-dfi: remove unneeded
 semicolon

The semicolon is unneeded, so remove it.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/event/rockchip-dfi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c
index 22b113363ffc6..fcbf76ebf55dd 100644
--- a/drivers/devfreq/event/rockchip-dfi.c
+++ b/drivers/devfreq/event/rockchip-dfi.c
@@ -211,7 +211,7 @@ static int rockchip_dfi_probe(struct platform_device *pdev)
 	if (IS_ERR(data->clk)) {
 		dev_err(dev, "Cannot get the clk dmc_clk\n");
 		return PTR_ERR(data->clk);
-	};
+	}
 
 	/* try to find the optional reference to the pmu syscon */
 	node = of_parse_phandle(np, "rockchip,pmu", 0);
-- 
GitLab


From 1d1397c3ec1fd0bae330c376269a1c8c7e981a35 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Sat, 16 Feb 2019 10:18:26 -0500
Subject: [PATCH 1122/1861] PM / devfreq: tegra: remove unneeded variable

This variable is not used after initialization, so
remove it. And in order to unify the code style,
move the location where the dev_get_drvdata is called
by the way.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/tegra-devfreq.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
index c59d2eee5d309..c89ba7b834ffe 100644
--- a/drivers/devfreq/tegra-devfreq.c
+++ b/drivers/devfreq/tegra-devfreq.c
@@ -573,10 +573,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
 static int tegra_governor_event_handler(struct devfreq *devfreq,
 					unsigned int event, void *data)
 {
-	struct tegra_devfreq *tegra;
-	int ret = 0;
-
-	tegra = dev_get_drvdata(devfreq->dev.parent);
+	struct tegra_devfreq *tegra = dev_get_drvdata(devfreq->dev.parent);
 
 	switch (event) {
 	case DEVFREQ_GOV_START:
@@ -600,7 +597,7 @@ static int tegra_governor_event_handler(struct devfreq *devfreq,
 		break;
 	}
 
-	return ret;
+	return 0;
 }
 
 static struct devfreq_governor tegra_devfreq_governor = {
-- 
GitLab


From bc658bef97a70094d4347faab7cabf2f5267d03f Mon Sep 17 00:00:00 2001
From: Saravana Kannan <skannan@codeaurora.org>
Date: Mon, 11 Mar 2019 15:36:30 +0530
Subject: [PATCH 1123/1861] PM / devfreq: Restart previous governor if new
 governor fails to start

If the new governor fails to start, switch back to old governor so that the
devfreq state is not left in some weird limbo.

[Myungjoo: assume fatal on revert failure and set df->governor to NULL]
Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 428a1de81008b..08dfcdc2ac58e 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -1124,7 +1124,7 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr,
 	struct devfreq *df = to_devfreq(dev);
 	int ret;
 	char str_governor[DEVFREQ_NAME_LEN + 1];
-	struct devfreq_governor *governor;
+	const struct devfreq_governor *governor, *prev_governor;
 
 	ret = sscanf(buf, "%" __stringify(DEVFREQ_NAME_LEN) "s", str_governor);
 	if (ret != 1)
@@ -1153,12 +1153,24 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr,
 			goto out;
 		}
 	}
+	prev_governor = df->governor;
 	df->governor = governor;
 	strncpy(df->governor_name, governor->name, DEVFREQ_NAME_LEN);
 	ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL);
-	if (ret)
+	if (ret) {
 		dev_warn(dev, "%s: Governor %s not started(%d)\n",
 			 __func__, df->governor->name, ret);
+		df->governor = prev_governor;
+		strncpy(df->governor_name, prev_governor->name,
+			DEVFREQ_NAME_LEN);
+		ret = df->governor->event_handler(df, DEVFREQ_GOV_START, NULL);
+		if (ret) {
+			dev_err(dev,
+				"%s: reverting to Governor %s failed (%d)\n",
+				__func__, df->governor_name, ret);
+			df->governor = NULL;
+		}
+	}
 out:
 	mutex_unlock(&devfreq_list_lock);
 
-- 
GitLab


From b53b0128052ffd687797d5f4deeb76327e7b5711 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 13 Mar 2019 13:22:53 +0100
Subject: [PATCH 1124/1861] PM / devfreq: Fix static checker warning in
 try_then_request_governor

The patch 23c7b54ca1cd: "PM / devfreq: Fix devfreq_add_device() when
drivers are built as modules." leads to the following static checker
warning:

    drivers/devfreq/devfreq.c:1043 governor_store()
    warn: 'governor' can also be NULL

The reason is that the try_then_request_governor() function returns both
error pointers and NULL. It should just return error pointers, so fix
this by returning a ERR_PTR to the error intead of returning NULL.

Fixes: 23c7b54ca1cd ("PM / devfreq: Fix devfreq_add_device() when drivers are built as modules.")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 08dfcdc2ac58e..8928383a1fa1f 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -228,7 +228,7 @@ static struct devfreq_governor *find_devfreq_governor(const char *name)
  * if is not found. This can happen when both drivers (the governor driver
  * and the driver that call devfreq_add_device) are built as modules.
  * devfreq_list_lock should be held by the caller. Returns the matched
- * governor's pointer.
+ * governor's pointer or an error pointer.
  */
 static struct devfreq_governor *try_then_request_governor(const char *name)
 {
@@ -254,7 +254,7 @@ static struct devfreq_governor *try_then_request_governor(const char *name)
 		/* Restore previous state before return */
 		mutex_lock(&devfreq_list_lock);
 		if (err)
-			return NULL;
+			return ERR_PTR(err);
 
 		governor = find_devfreq_governor(name);
 	}
-- 
GitLab


From fbb9c3c9a5acfdbcc21114de0ba16c7b488968f6 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 21 Mar 2019 10:01:10 +0100
Subject: [PATCH 1125/1861] PM / devfreq: exynos-bus: Suspend all devices on
 system shutdown

Force all Exynos buses to safe operation points before doing the system
reboot operation. There are board on which some aggressive power saving
operation points are behind the capabilities of the bootloader to properly
reset the hardware and boot the board. This way one can avoid board crash
early after reboot.

This fixes reboot issue on OdroidU3 board both with eMMC and SD boot.

Reported-by: Markus Reichl <m.reichl@fivetechno.de>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/exynos-bus.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index c25658b265988..486cc5b422f1d 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -514,6 +514,13 @@ err:
 	return ret;
 }
 
+static void exynos_bus_shutdown(struct platform_device *pdev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(&pdev->dev);
+
+	devfreq_suspend_device(bus->devfreq);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int exynos_bus_resume(struct device *dev)
 {
@@ -556,6 +563,7 @@ MODULE_DEVICE_TABLE(of, exynos_bus_of_match);
 
 static struct platform_driver exynos_bus_platdrv = {
 	.probe		= exynos_bus_probe,
+	.shutdown	= exynos_bus_shutdown,
 	.driver = {
 		.name	= "exynos-bus",
 		.pm	= &exynos_bus_pm,
-- 
GitLab


From adfe3b76608ffe547af5a74415f15499b798f32a Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Thu, 21 Mar 2019 19:14:36 -0400
Subject: [PATCH 1126/1861] PM / devfreq: rockchip-dfi: Move GRF definitions to
 a common place.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some rk3399 GRF (Generic Register Files) definitions can be used for
different drivers. Move these definitions to a common include so we
don't need to duplicate these definitions.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Gaël PORTAY <gael.portay@collabora.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/event/rockchip-dfi.c | 23 +++++++----------------
 include/soc/rockchip/rk3399_grf.h    | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+), 16 deletions(-)
 create mode 100644 include/soc/rockchip/rk3399_grf.h

diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c
index fcbf76ebf55dd..a436ec4901bbe 100644
--- a/drivers/devfreq/event/rockchip-dfi.c
+++ b/drivers/devfreq/event/rockchip-dfi.c
@@ -26,6 +26,8 @@
 #include <linux/list.h>
 #include <linux/of.h>
 
+#include <soc/rockchip/rk3399_grf.h>
+
 #define RK3399_DMC_NUM_CH	2
 
 /* DDRMON_CTRL */
@@ -43,18 +45,6 @@
 #define DDRMON_CH1_COUNT_NUM		0x3c
 #define DDRMON_CH1_DFI_ACCESS_NUM	0x40
 
-/* pmu grf */
-#define PMUGRF_OS_REG2	0x308
-#define DDRTYPE_SHIFT	13
-#define DDRTYPE_MASK	7
-
-enum {
-	DDR3 = 3,
-	LPDDR3 = 6,
-	LPDDR4 = 7,
-	UNUSED = 0xFF
-};
-
 struct dmc_usage {
 	u32 access;
 	u32 total;
@@ -83,16 +73,17 @@ static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev)
 	u32 ddr_type;
 
 	/* get ddr type */
-	regmap_read(info->regmap_pmu, PMUGRF_OS_REG2, &val);
-	ddr_type = (val >> DDRTYPE_SHIFT) & DDRTYPE_MASK;
+	regmap_read(info->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val);
+	ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) &
+		    RK3399_PMUGRF_DDRTYPE_MASK;
 
 	/* clear DDRMON_CTRL setting */
 	writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL);
 
 	/* set ddr type to dfi */
-	if (ddr_type == LPDDR3)
+	if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR3)
 		writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL);
-	else if (ddr_type == LPDDR4)
+	else if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR4)
 		writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL);
 
 	/* enable count, use software mode */
diff --git a/include/soc/rockchip/rk3399_grf.h b/include/soc/rockchip/rk3399_grf.h
new file mode 100644
index 0000000000000..3eebabcb28123
--- /dev/null
+++ b/include/soc/rockchip/rk3399_grf.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Rockchip General Register Files definitions
+ *
+ * Copyright (c) 2018, Collabora Ltd.
+ * Author: Enric Balletbo i Serra <enric.balletbo@collabora.com>
+ */
+
+#ifndef __SOC_RK3399_GRF_H
+#define __SOC_RK3399_GRF_H
+
+/* PMU GRF Registers */
+#define RK3399_PMUGRF_OS_REG2		0x308
+#define RK3399_PMUGRF_DDRTYPE_SHIFT	13
+#define RK3399_PMUGRF_DDRTYPE_MASK	7
+#define RK3399_PMUGRF_DDRTYPE_DDR3	3
+#define RK3399_PMUGRF_DDRTYPE_LPDDR2	5
+#define RK3399_PMUGRF_DDRTYPE_LPDDR3	6
+#define RK3399_PMUGRF_DDRTYPE_LPDDR4	7
+
+#endif
-- 
GitLab


From 9173c5ceb035aab28171cd74dfddf27f47213b99 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Thu, 21 Mar 2019 19:14:38 -0400
Subject: [PATCH 1127/1861] PM / devfreq: rk3399_dmc: Pass ODT and auto power
 down parameters to TF-A.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trusted Firmware-A (TF-A) for rk3399 implements a SiP call to get the
on-die termination (ODT) and auto power down parameters from kernel,
this patch adds the functionality to do this. Also, if DDR clock
frequency is lower than the on-die termination (ODT) disable frequency
this driver should disable the DDR ODT.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Gaël PORTAY <gael.portay@collabora.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/rk3399_dmc.c        | 71 ++++++++++++++++++++++++++++-
 include/soc/rockchip/rockchip_sip.h |  1 +
 2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index a228dad2bee4e..567c034d0301e 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -18,14 +18,17 @@
 #include <linux/devfreq.h>
 #include <linux/devfreq-event.h>
 #include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
+#include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/rwsem.h>
 #include <linux/suspend.h>
 
+#include <soc/rockchip/rk3399_grf.h>
 #include <soc/rockchip/rockchip_sip.h>
 
 struct dram_timing {
@@ -69,8 +72,11 @@ struct rk3399_dmcfreq {
 	struct mutex lock;
 	struct dram_timing timing;
 	struct regulator *vdd_center;
+	struct regmap *regmap_pmu;
 	unsigned long rate, target_rate;
 	unsigned long volt, target_volt;
+	unsigned int odt_dis_freq;
+	int odt_pd_arg0, odt_pd_arg1;
 };
 
 static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
@@ -80,6 +86,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
 	struct dev_pm_opp *opp;
 	unsigned long old_clk_rate = dmcfreq->rate;
 	unsigned long target_volt, target_rate;
+	struct arm_smccc_res res;
+	bool odt_enable = false;
 	int err;
 
 	opp = devfreq_recommended_opp(dev, freq, flags);
@@ -95,6 +103,19 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
 
 	mutex_lock(&dmcfreq->lock);
 
+	if (target_rate >= dmcfreq->odt_dis_freq)
+		odt_enable = true;
+
+	/*
+	 * This makes a SMC call to the TF-A to set the DDR PD (power-down)
+	 * timings and to enable or disable the ODT (on-die termination)
+	 * resistors.
+	 */
+	arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0,
+		      dmcfreq->odt_pd_arg1,
+		      ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD,
+		      odt_enable, 0, 0, 0, &res);
+
 	/*
 	 * If frequency scaling from low to high, adjust voltage first.
 	 * If frequency scaling from high to low, adjust frequency first.
@@ -294,11 +315,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 {
 	struct arm_smccc_res res;
 	struct device *dev = &pdev->dev;
-	struct device_node *np = pdev->dev.of_node;
+	struct device_node *np = pdev->dev.of_node, *node;
 	struct rk3399_dmcfreq *data;
 	int ret, index, size;
 	uint32_t *timing;
 	struct dev_pm_opp *opp;
+	u32 ddr_type;
+	u32 val;
 
 	data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL);
 	if (!data)
@@ -354,10 +377,56 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
 		}
 	}
 
+	node = of_parse_phandle(np, "rockchip,pmu", 0);
+	if (node) {
+		data->regmap_pmu = syscon_node_to_regmap(node);
+		if (IS_ERR(data->regmap_pmu))
+			return PTR_ERR(data->regmap_pmu);
+	}
+
+	regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val);
+	ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) &
+		    RK3399_PMUGRF_DDRTYPE_MASK;
+
+	switch (ddr_type) {
+	case RK3399_PMUGRF_DDRTYPE_DDR3:
+		data->odt_dis_freq = data->timing.ddr3_odt_dis_freq;
+		break;
+	case RK3399_PMUGRF_DDRTYPE_LPDDR3:
+		data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq;
+		break;
+	case RK3399_PMUGRF_DDRTYPE_LPDDR4:
+		data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq;
+		break;
+	default:
+		return -EINVAL;
+	};
+
 	arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
 		      ROCKCHIP_SIP_CONFIG_DRAM_INIT,
 		      0, 0, 0, 0, &res);
 
+	/*
+	 * In TF-A there is a platform SIP call to set the PD (power-down)
+	 * timings and to enable or disable the ODT (on-die termination).
+	 * This call needs three arguments as follows:
+	 *
+	 * arg0:
+	 *     bit[0-7]   : sr_idle
+	 *     bit[8-15]  : sr_mc_gate_idle
+	 *     bit[16-31] : standby idle
+	 * arg1:
+	 *     bit[0-11]  : pd_idle
+	 *     bit[16-27] : srpd_lite_idle
+	 * arg2:
+	 *     bit[0]     : odt enable
+	 */
+	data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) |
+			    ((data->timing.sr_mc_gate_idle & 0xff) << 8) |
+			    ((data->timing.standby_idle & 0xffff) << 16);
+	data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) |
+			    ((data->timing.srpd_lite_idle & 0xfff) << 16);
+
 	/*
 	 * We add a devfreq driver to our parent since it has a device tree node
 	 * with operating points.
diff --git a/include/soc/rockchip/rockchip_sip.h b/include/soc/rockchip/rockchip_sip.h
index 7e28092c4d3db..ad9482c56797f 100644
--- a/include/soc/rockchip/rockchip_sip.h
+++ b/include/soc/rockchip/rockchip_sip.h
@@ -23,5 +23,6 @@
 #define ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE	0x05
 #define ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ	0x06
 #define ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM	0x07
+#define ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD	0x08
 
 #endif
-- 
GitLab


From 1be0730f1dcd2971db4d2fe5497a20f438b837a7 Mon Sep 17 00:00:00 2001
From: Lukasz Luba <l.luba@partner.samsung.com>
Date: Mon, 18 Feb 2019 19:21:08 +0100
Subject: [PATCH 1128/1861] trace: events: add devfreq trace event file

The patch adds a new file for with trace events for devfreq
framework. They are used for performance analysis of the framework.
It also contains updates in MAINTAINERS file adding new entry for
devfreq maintainers.

Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 MAINTAINERS                    |  1 +
 include/trace/events/devfreq.h | 40 ++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 include/trace/events/devfreq.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3671fdea5010e..27ed10966c817 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4552,6 +4552,7 @@ S:	Maintained
 F:	drivers/devfreq/
 F:	include/linux/devfreq.h
 F:	Documentation/devicetree/bindings/devfreq/
+F:	include/trace/events/devfreq.h
 
 DEVICE FREQUENCY EVENT (DEVFREQ-EVENT)
 M:	Chanwoo Choi <cw00.choi@samsung.com>
diff --git a/include/trace/events/devfreq.h b/include/trace/events/devfreq.h
new file mode 100644
index 0000000000000..cf5b8772175d9
--- /dev/null
+++ b/include/trace/events/devfreq.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM devfreq
+
+#if !defined(_TRACE_DEVFREQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DEVFREQ_H
+
+#include <linux/devfreq.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(devfreq_monitor,
+	TP_PROTO(struct devfreq *devfreq),
+
+	TP_ARGS(devfreq),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, freq)
+		__field(unsigned long, busy_time)
+		__field(unsigned long, total_time)
+		__field(unsigned int, polling_ms)
+		__string(dev_name, dev_name(&devfreq->dev))
+	),
+
+	TP_fast_assign(
+		__entry->freq = devfreq->previous_freq;
+		__entry->busy_time = devfreq->last_status.busy_time;
+		__entry->total_time = devfreq->last_status.total_time;
+		__entry->polling_ms = devfreq->profile->polling_ms;
+		__assign_str(dev_name, dev_name(&devfreq->dev));
+	),
+
+	TP_printk("dev_name=%s freq=%lu polling_ms=%u load=%lu",
+		__get_str(dev_name), __entry->freq, __entry->polling_ms,
+		__entry->total_time == 0 ? 0 :
+			(100 * __entry->busy_time) / __entry->total_time)
+);
+#endif /* _TRACE_DEVFREQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
GitLab


From cf451adfa392bd9ba36f31659dbe6a5010b46ef9 Mon Sep 17 00:00:00 2001
From: Lukasz Luba <l.luba@partner.samsung.com>
Date: Mon, 18 Feb 2019 19:21:09 +0100
Subject: [PATCH 1129/1861] PM / devfreq: add tracing for scheduling work

This patch add basic tracing of the devfreq workqueue and delayed work.
It aims to capture changes of the polling intervals and device state.

Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 8928383a1fa1f..6b6991f0e873f 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -29,6 +29,9 @@
 #include <linux/of.h>
 #include "governor.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/devfreq.h>
+
 static struct class *devfreq_class;
 
 /*
@@ -394,6 +397,8 @@ static void devfreq_monitor(struct work_struct *work)
 	queue_delayed_work(devfreq_wq, &devfreq->work,
 				msecs_to_jiffies(devfreq->profile->polling_ms));
 	mutex_unlock(&devfreq->lock);
+
+	trace_devfreq_monitor(devfreq);
 }
 
 /**
-- 
GitLab


From be549d49115422f846b6d96ee8fd7173a5f7ceb0 Mon Sep 17 00:00:00 2001
From: Jaesoo Lee <jalee@purestorage.com>
Date: Tue, 9 Apr 2019 17:02:22 -0700
Subject: [PATCH 1130/1861] scsi: core: set result when the command cannot be
 dispatched

When SCSI blk-mq is enabled, there is a bug in handling errors in
scsi_queue_rq.  Specifically, the bug is not setting result field of
scsi_request correctly when the dispatch of the command has been
failed. Since the upper layer code including the sg_io ioctl expects to
receive any error status from result field of scsi_request, the error is
silently ignored and this could cause data corruptions for some
applications.

Fixes: d285203cf647 ("scsi: add support for a blk-mq based I/O path.")
Cc: <stable@vger.kernel.org>
Signed-off-by: Jaesoo Lee <jalee@purestorage.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 601b9f1de2675..07dfc17d48246 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1706,8 +1706,12 @@ out_put_budget:
 			ret = BLK_STS_DEV_RESOURCE;
 		break;
 	default:
+		if (unlikely(!scsi_device_online(sdev)))
+			scsi_req(req)->result = DID_NO_CONNECT << 16;
+		else
+			scsi_req(req)->result = DID_ERROR << 16;
 		/*
-		 * Make sure to release all allocated ressources when
+		 * Make sure to release all allocated resources when
 		 * we hit an error, as we will never see this command
 		 * again.
 		 */
-- 
GitLab


From 6a03469a1edc94da52b65478f1e00837add869a3 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Mon, 15 Apr 2019 09:49:56 -0700
Subject: [PATCH 1131/1861] x86/build/lto: Fix truncated .bss with
 -fdata-sections

With CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y, we compile the kernel with
-fdata-sections, which also splits the .bss section.

The new section, with a new .bss.* name, which pattern gets missed by the
main x86 linker script which only expects the '.bss' name. This results
in the discarding of the second part and a too small, truncated .bss
section and an unhappy, non-working kernel.

Use the common BSS_MAIN macro in the linker script to properly capture
and merge all the generated BSS sections.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190415164956.124067-1-samitolvanen@google.com
[ Extended the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vmlinux.lds.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6ee..a5127b2c195f9 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -362,7 +362,7 @@ SECTIONS
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 		__bss_start = .;
 		*(.bss..page_aligned)
-		*(.bss)
+		*(BSS_MAIN)
 		BSS_DECRYPTED
 		. = ALIGN(PAGE_SIZE);
 		__bss_stop = .;
-- 
GitLab


From 8b39adbee805c539a461dbf208b125b096152b1c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 15 Apr 2019 10:05:38 -0700
Subject: [PATCH 1132/1861] locking/lockdep: Make lockdep_unregister_key()
 honor 'debug_locks' again

If lockdep_register_key() and lockdep_unregister_key() are called with
debug_locks == false then the following warning is reported:

  WARNING: CPU: 2 PID: 15145 at kernel/locking/lockdep.c:4920 lockdep_unregister_key+0x1ad/0x240

That warning is reported because lockdep_unregister_key() ignores the
value of 'debug_locks' and because the behavior of lockdep_register_key()
depends on whether or not 'debug_locks' is set. Fix this inconsistency
by making lockdep_unregister_key() take 'debug_locks' again into
account.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: shenghui <shhuiw@foxmail.com>
Fixes: 90c1cba2b3b3 ("locking/lockdep: Zap lock classes even with lock debugging disabled")
Link: http://lkml.kernel.org/r/20190415170538.23491-1-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index e16766ff184b5..e221be724fe82 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4907,8 +4907,9 @@ void lockdep_unregister_key(struct lock_class_key *key)
 		return;
 
 	raw_local_irq_save(flags);
-	arch_spin_lock(&lockdep_lock);
-	current->lockdep_recursion = 1;
+	if (!graph_lock())
+		goto out_irq;
+
 	pf = get_pending_free();
 	hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
 		if (k == key) {
@@ -4920,8 +4921,8 @@ void lockdep_unregister_key(struct lock_class_key *key)
 	WARN_ON_ONCE(!found);
 	__lockdep_free_key_range(pf, key, 1);
 	call_rcu_zapped(pf);
-	current->lockdep_recursion = 0;
-	arch_spin_unlock(&lockdep_lock);
+	graph_unlock();
+out_irq:
 	raw_local_irq_restore(flags);
 
 	/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
-- 
GitLab


From 89502a01979033a1c0c0c4d6d9aef07e59021005 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Mon, 15 Apr 2019 17:08:53 +0200
Subject: [PATCH 1133/1861] x86/mm: Fix the 56-bit addresses memory map in
 Documentation/x86/x86_64/mm.txt

This fixes a PT typo, and the following 56-bit address-space
addresses:

  * the hole extends from 0100000000000000 to feffffffffffffff
  * the KASAN shadow memory area stops at fffffbffffffffff (see kasan.h)

Signed-off-by: Stephen Kitt <steve@sk2.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: alex.popov@linux.com
Cc: bhe@redhat.com
Cc: corbet@lwn.net
Cc: kirill.shutemov@linux.intel.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/20190415150853.10354-1-steve@sk2.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/x86/x86_64/mm.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 804f9426ed17b..6cbe652d7a497 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -72,7 +72,7 @@ Complete virtual memory map with 5-level page tables
 Notes:
 
  - With 56-bit addresses, user-space memory gets expanded by a factor of 512x,
-   from 0.125 PB to 64 PB. All kernel mappings shift down to the -64 PT starting
+   from 0.125 PB to 64 PB. All kernel mappings shift down to the -64 PB starting
    offset and many of the regions expand to support the much larger physical
    memory supported.
 
@@ -83,7 +83,7 @@ Notes:
  0000000000000000 |    0       | 00ffffffffffffff |   64 PB | user-space virtual memory, different per mm
 __________________|____________|__________________|_________|___________________________________________________________
                   |            |                  |         |
- 0000800000000000 |  +64    PB | ffff7fffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
+ 0100000000000000 |  +64    PB | feffffffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
                   |            |                  |         |     virtual memory addresses up to the -64 PB
                   |            |                  |         |     starting offset of kernel mappings.
 __________________|____________|__________________|_________|___________________________________________________________
@@ -99,7 +99,7 @@ ____________________________________________________________|___________________
  ffd2000000000000 |  -11.5  PB | ffd3ffffffffffff |  0.5 PB | ... unused hole
  ffd4000000000000 |  -11    PB | ffd5ffffffffffff |  0.5 PB | virtual memory map (vmemmap_base)
  ffd6000000000000 |  -10.5  PB | ffdeffffffffffff | 2.25 PB | ... unused hole
- ffdf000000000000 |   -8.25 PB | fffffdffffffffff |   ~8 PB | KASAN shadow memory
+ ffdf000000000000 |   -8.25 PB | fffffbffffffffff |   ~8 PB | KASAN shadow memory
 __________________|____________|__________________|_________|____________________________________________________________
                                                             |
                                                             | Identical layout to the 47-bit one from here on:
-- 
GitLab


From 5f843ed415581cfad4ef8fefe31c138a8346ca8a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 15 Apr 2019 15:01:25 +0900
Subject: [PATCH 1134/1861] kprobes: Fix error check when reusing optimized
 probes

The following commit introduced a bug in one of our error paths:

  819319fc9346 ("kprobes: Return error if we fail to reuse kprobe instead of BUG_ON()")

it missed to handle the return value of kprobe_optready() as
error-value. In reality, the kprobe_optready() returns a bool
result, so "true" case must be passed instead of 0.

This causes some errors on kprobe boot-time selftests on ARM:

 [   ] Beginning kprobe tests...
 [   ] Probe ARM code
 [   ]     kprobe
 [   ]     kretprobe
 [   ] ARM instruction simulation
 [   ]     Check decoding tables
 [   ]     Run test cases
 [   ] FAIL: test_case_handler not run
 [   ] FAIL: Test andge	r10, r11, r14, asr r7
 [   ] FAIL: Scenario 11
 ...
 [   ] FAIL: Scenario 7
 [   ] Total instruction simulation tests=1631, pass=1433 fail=198
 [   ] kprobe tests failed

This can happen if an optimized probe is unregistered and next
kprobe is registered on same address until the previous probe
is not reclaimed.

If this happens, a hidden aggregated probe may be kept in memory,
and no new kprobe can probe same address. Also, in that case
register_kprobe() will return "1" instead of minus error value,
which can mislead caller logic.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S . Miller <davem@davemloft.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Naveen N . Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org # v5.0+
Fixes: 819319fc9346 ("kprobes: Return error if we fail to reuse kprobe instead of BUG_ON()")
Link: http://lkml.kernel.org/r/155530808559.32517.539898325433642204.stgit@devnote2
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/kprobes.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c83e547271312..b1ea30a5540e9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -709,7 +709,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force)
 static int reuse_unused_kprobe(struct kprobe *ap)
 {
 	struct optimized_kprobe *op;
-	int ret;
 
 	/*
 	 * Unused kprobe MUST be on the way of delayed unoptimizing (means
@@ -720,9 +719,8 @@ static int reuse_unused_kprobe(struct kprobe *ap)
 	/* Enable the probe again */
 	ap->flags &= ~KPROBE_FLAG_DISABLED;
 	/* Optimize it again (remove from op->list) */
-	ret = kprobe_optready(ap);
-	if (ret)
-		return ret;
+	if (!kprobe_optready(ap))
+		return -EINVAL;
 
 	optimize_kprobe(ap);
 	return 0;
-- 
GitLab


From 510bb96fe5b3480b4b22d815786377e54cb701e7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 15 Apr 2019 10:46:07 +0200
Subject: [PATCH 1135/1861] x86/mm: Prevent bogus warnings with "noexec=off"

Xose Vazquez Perez reported boot warnings when NX is disabled on the kernel command line.

__early_set_fixmap() triggers this warning:

  attempted to set unsupported pgprot:    8000000000000163
			       bits:      8000000000000000
			       supported: 7fffffffffffffff

  WARNING: CPU: 0 PID: 0 at arch/x86/include/asm/pgtable.h:537
			    __early_set_fixmap+0xa2/0xff

because it uses __default_kernel_pte_mask to mask out unsupported bits.

Use __supported_pte_mask instead.

Disabling NX on the command line also triggers the NX warning in the page
table mapping check:

  WARNING: CPU: 1 PID: 1 at arch/x86/mm/dump_pagetables.c:262 note_page+0x2ae/0x650
  ....

Make the warning depend on NX set in __supported_pte_mask.

Reported-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
Tested-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1904151037530.1729@nanos.tec.linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/dump_pagetables.c | 3 ++-
 arch/x86/mm/ioremap.c         | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ee8f8ab469417..c0309ea9abee4 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st)
 #endif
 	/* Account the WX pages */
 	st->wx_pages += npages;
-	WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
+	WARN_ONCE(__supported_pte_mask & _PAGE_NX,
+		  "x86/mm: Found insecure W+X mapping at address %pS\n",
 		  (void *)st->start_address);
 }
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0029604af8a41..dd73d5d74393f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
 	pte = early_ioremap_pte(addr);
 
 	/* Sanitize 'prot' against any unsupported bits: */
-	pgprot_val(flags) &= __default_kernel_pte_mask;
+	pgprot_val(flags) &= __supported_pte_mask;
 
 	if (pgprot_val(flags))
 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
-- 
GitLab


From 0082517fa4bce073e7cf542633439f26538a14cc Mon Sep 17 00:00:00 2001
From: Jian-Hong Pan <jian-hong@endlessm.com>
Date: Fri, 12 Apr 2019 16:01:53 +0800
Subject: [PATCH 1136/1861] x86/reboot, efi: Use EFI reboot for Acer TravelMate
 X514-51T

Upon reboot, the Acer TravelMate X514-51T laptop appears to complete the
shutdown process, but then it hangs in BIOS POST with a black screen.

The problem is intermittent - at some points it has appeared related to
Secure Boot settings or different kernel builds, but ultimately we have
not been able to identify the exact conditions that trigger the issue to
come and go.

Besides, the EFI mode cannot be disabled in the BIOS of this model.

However, after extensive testing, we observe that using the EFI reboot
method reliably avoids the issue in all cases.

So add a boot time quirk to use EFI reboot on such systems.

Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=203119
Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Cc: linux@endlessm.com
Link: http://lkml.kernel.org/r/20190412080152.3718-1-jian-hong@endlessm.com
[ Fix !CONFIG_EFI build failure, clarify the code and the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/reboot.c | 21 +++++++++++++++++++++
 include/linux/efi.h      |  7 ++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 725624b6c0c05..8fd3cedd9accd 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
 	return 0;
 }
 
+/*
+ * Some machines don't handle the default ACPI reboot method and
+ * require the EFI reboot method:
+ */
+static int __init set_efi_reboot(const struct dmi_system_id *d)
+{
+	if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
+		reboot_type = BOOT_EFI;
+		pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
+	}
+	return 0;
+}
+
 void __noreturn machine_real_restart(unsigned int type)
 {
 	local_irq_disable();
@@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
 		},
 	},
+	{	/* Handle reboot issue on Acer TravelMate X514-51T */
+		.callback = set_efi_reboot,
+		.ident = "Acer TravelMate X514-51T",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
+		},
+	},
 
 	/* Apple */
 	{	/* Handle problems with rebooting on Apple MacBook5 */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 54357a258b358..6ebc2098cfe17 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1611,7 +1611,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
 			   struct screen_info *si, efi_guid_t *proto,
 			   unsigned long size);
 
-bool efi_runtime_disabled(void);
+#ifdef CONFIG_EFI
+extern bool efi_runtime_disabled(void);
+#else
+static inline bool efi_runtime_disabled(void) { return true; }
+#endif
+
 extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
 extern unsigned long efi_call_virt_save_flags(void);
 
-- 
GitLab


From 780e0106d468a2962b16b52fdf42898f2639e0a0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 16 Apr 2019 10:03:35 +0200
Subject: [PATCH 1137/1861] x86/mm/tlb: Revert "x86/mm: Align TLB invalidation
 info"

Revert the following commit:

  515ab7c41306: ("x86/mm: Align TLB invalidation info")

I found out (the hard way) that under some .config options (notably L1_CACHE_SHIFT=7)
and compiler combinations this on-stack alignment leads to a 320 byte
stack usage, which then triggers a KASAN stack warning elsewhere.

Using 320 bytes of stack space for a 40 byte structure is ludicrous and
clearly not right.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Nadav Amit <namit@vmware.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 515ab7c41306 ("x86/mm: Align TLB invalidation info")
Link: http://lkml.kernel.org/r/20190416080335.GM7905@worktop.programming.kicks-ass.net
[ Minor changelog edits. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/tlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bc4bc7b2f075d..487b8474c01cd 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -728,7 +728,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 {
 	int cpu;
 
-	struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
+	struct flush_tlb_info info = {
 		.mm = mm,
 		.stride_shift = stride_shift,
 		.freed_tables = freed_tables,
-- 
GitLab


From 690908104e39d37947f89d76388c876ce4ec5fda Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 15 Apr 2019 15:16:17 +0200
Subject: [PATCH 1138/1861] KVM: nVMX: allow tests to use bad virtual-APIC page
 address

As mentioned in the comment, there are some special cases where we can simply
clear the TPR shadow bit from the CPU-based execution controls in the vmcs02.
Handle them so that we can remove some XFAILs from vmx.flat.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++---------
 arch/x86/kvm/vmx/vmx.c    |  2 +-
 arch/x86/kvm/vmx/vmx.h    |  2 ++
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7ec9bb1dd7231..a22af5a85540d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2873,20 +2873,27 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 		/*
 		 * If translation failed, VM entry will fail because
 		 * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
-		 * Failing the vm entry is _not_ what the processor
-		 * does but it's basically the only possibility we
-		 * have.  We could still enter the guest if CR8 load
-		 * exits are enabled, CR8 store exits are enabled, and
-		 * virtualize APIC access is disabled; in this case
-		 * the processor would never use the TPR shadow and we
-		 * could simply clear the bit from the execution
-		 * control.  But such a configuration is useless, so
-		 * let's keep the code simple.
 		 */
 		if (!is_error_page(page)) {
 			vmx->nested.virtual_apic_page = page;
 			hpa = page_to_phys(vmx->nested.virtual_apic_page);
 			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
+		} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
+		           nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
+			   !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+			/*
+			 * The processor will never use the TPR shadow, simply
+			 * clear the bit from the execution control.  Such a
+			 * configuration is useless, but it happens in tests.
+			 * For any other configuration, failing the vm entry is
+			 * _not_ what the processor does but it's basically the
+			 * only possibility we have.
+			 */
+			vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+					CPU_BASED_TPR_SHADOW);
+		} else {
+			printk("bad virtual-APIC page address\n");
+			dump_vmcs();
 		}
 	}
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ab432a930ae86..7a8f75fc6b7e6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5603,7 +5603,7 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
 	       vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
 }
 
-static void dump_vmcs(void)
+void dump_vmcs(void)
 {
 	u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
 	u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a1e00d0a2482c..f879529906b48 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -517,4 +517,6 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
 	vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
 }
 
+void dump_vmcs(void);
+
 #endif /* __KVM_X86_VMX_H */
-- 
GitLab


From 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa Mon Sep 17 00:00:00 2001
From: Josh Collier <josh.d.collier@intel.com>
Date: Mon, 15 Apr 2019 11:34:22 -0700
Subject: [PATCH 1139/1861] IB/rdmavt: Fix frwr memory registration

Current implementation was not properly handling frwr memory
registrations. This was uncovered by commit 27f26cec761das ("xprtrdma:
Plant XID in on-the-wire RDMA offset (FRWR)") in which xprtrdma, which is
used for NFS over RDMA, started failing as it was the first ULP to modify
the ib_mr iova resulting in the NFS server getting REMOTE ACCESS ERROR
when attempting to perform RDMA Writes to the client.

The fix is to properly capture the true iova, offset, and length in the
call to ib_map_mr_sg, and then update the iova when processing the
IB_WR_REG_MEM on the send queue.

Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg")
Cc: stable@vger.kernel.org
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Josh Collier <josh.d.collier@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/sw/rdmavt/mr.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 7287950434969..0bb6e39dd03a7 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
 	if (unlikely(mapped_segs == mr->mr.max_segs))
 		return -ENOMEM;
 
-	if (mr->mr.length == 0) {
-		mr->mr.user_base = addr;
-		mr->mr.iova = addr;
-	}
-
 	m = mapped_segs / RVT_SEGSZ;
 	n = mapped_segs % RVT_SEGSZ;
 	mr->mr.map[m]->segs[n].vaddr = (void *)addr;
@@ -630,17 +625,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
  * @sg_nents: number of entries in sg
  * @sg_offset: offset in bytes into sg
  *
+ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
+ *
  * Return: number of sg elements mapped to the memory region
  */
 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 		  int sg_nents, unsigned int *sg_offset)
 {
 	struct rvt_mr *mr = to_imr(ibmr);
+	int ret;
 
 	mr->mr.length = 0;
 	mr->mr.page_shift = PAGE_SHIFT;
-	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
-			      rvt_set_page);
+	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
+	mr->mr.user_base = ibmr->iova;
+	mr->mr.iova = ibmr->iova;
+	mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
+	mr->mr.length = (size_t)ibmr->length;
+	return ret;
 }
 
 /**
@@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
 	ibmr->rkey = key;
 	mr->mr.lkey = key;
 	mr->mr.access_flags = access;
+	mr->mr.iova = ibmr->iova;
 	atomic_set(&mr->mr.lkey_invalid, 0);
 
 	return 0;
-- 
GitLab


From 52a44f83fc2d64a5e74d5d685fad2fecc7b7a321 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 29 Mar 2019 11:12:12 +0200
Subject: [PATCH 1140/1861] perf/core: Fix the address filtering fix

The following recent commit:

  c60f83b813e5 ("perf, pt, coresight: Fix address filters for vmas with non-zero offset")

changes the address filtering logic to communicate filter ranges to the PMU driver
via a single address range object, instead of having the driver do the final bit of
math.

That change forgets to take into account kernel filters, which are not calculated
the same way as DSO based filters.

Fix that by passing the kernel filters the same way as file-based filters.
This doesn't require any additional changes in the drivers.

Reported-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: c60f83b813e5 ("perf, pt, coresight: Fix address filters for vmas with non-zero offset")
Link: https://lkml.kernel.org/r/20190329091212.29870-1-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 534e01e7bc368..dc7dead2d2cc2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9077,26 +9077,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
 	if (task == TASK_TOMBSTONE)
 		return;
 
-	if (!ifh->nr_file_filters)
-		return;
-
-	mm = get_task_mm(event->ctx->task);
-	if (!mm)
-		goto restart;
+	if (ifh->nr_file_filters) {
+		mm = get_task_mm(event->ctx->task);
+		if (!mm)
+			goto restart;
 
-	down_read(&mm->mmap_sem);
+		down_read(&mm->mmap_sem);
+	}
 
 	raw_spin_lock_irqsave(&ifh->lock, flags);
 	list_for_each_entry(filter, &ifh->list, entry) {
-		event->addr_filter_ranges[count].start = 0;
-		event->addr_filter_ranges[count].size = 0;
+		if (filter->path.dentry) {
+			/*
+			 * Adjust base offset if the filter is associated to a
+			 * binary that needs to be mapped:
+			 */
+			event->addr_filter_ranges[count].start = 0;
+			event->addr_filter_ranges[count].size = 0;
 
-		/*
-		 * Adjust base offset if the filter is associated to a binary
-		 * that needs to be mapped:
-		 */
-		if (filter->path.dentry)
 			perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
+		} else {
+			event->addr_filter_ranges[count].start = filter->offset;
+			event->addr_filter_ranges[count].size  = filter->size;
+		}
 
 		count++;
 	}
@@ -9104,9 +9107,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
 	event->addr_filters_gen++;
 	raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
-	up_read(&mm->mmap_sem);
+	if (ifh->nr_file_filters) {
+		up_read(&mm->mmap_sem);
 
-	mmput(mm);
+		mmput(mm);
+	}
 
 restart:
 	perf_event_stop(event, 1);
-- 
GitLab


From 339bc4183596e1f68c2c98a03b87aa124107c317 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 29 Mar 2019 11:13:38 +0200
Subject: [PATCH 1141/1861] perf/ring_buffer: Fix AUX record suppression

The following commit:

  1627314fb54a33e ("perf: Suppress AUX/OVERWRITE records")

has an unintended side-effect of also suppressing all AUX records with no flags
and non-zero size, so all the regular records in the full trace mode.
This breaks some use cases for people.

Fix this by restoring "regular" AUX records.

Reported-by: Ben Gainey <Ben.Gainey@arm.com>
Tested-by: Ben Gainey <Ben.Gainey@arm.com>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 1627314fb54a33e ("perf: Suppress AUX/OVERWRITE records")
Link: https://lkml.kernel.org/r/20190329091338.29999-1-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/ring_buffer.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2545ac08cc77b..5eedb49a65ea2 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -455,24 +455,21 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
 		rb->aux_head += size;
 	}
 
-	if (size || handle->aux_flags) {
-		/*
-		 * Only send RECORD_AUX if we have something useful to communicate
-		 *
-		 * Note: the OVERWRITE records by themselves are not considered
-		 * useful, as they don't communicate any *new* information,
-		 * aside from the short-lived offset, that becomes history at
-		 * the next event sched-in and therefore isn't useful.
-		 * The userspace that needs to copy out AUX data in overwrite
-		 * mode should know to use user_page::aux_head for the actual
-		 * offset. So, from now on we don't output AUX records that
-		 * have *only* OVERWRITE flag set.
-		 */
-
-		if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)
-			perf_event_aux_event(handle->event, aux_head, size,
-			                     handle->aux_flags);
-	}
+	/*
+	 * Only send RECORD_AUX if we have something useful to communicate
+	 *
+	 * Note: the OVERWRITE records by themselves are not considered
+	 * useful, as they don't communicate any *new* information,
+	 * aside from the short-lived offset, that becomes history at
+	 * the next event sched-in and therefore isn't useful.
+	 * The userspace that needs to copy out AUX data in overwrite
+	 * mode should know to use user_page::aux_head for the actual
+	 * offset. So, from now on we don't output AUX records that
+	 * have *only* OVERWRITE flag set.
+	 */
+	if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE))
+		perf_event_aux_event(handle->event, aux_head, size,
+				     handle->aux_flags);
 
 	rb->user_page->aux_head = rb->aux_head;
 	if (rb_need_aux_wakeup(rb))
-- 
GitLab


From 9d5dcc93a6ddfc78124f006ccd3637ce070ef2fc Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:44:58 -0700
Subject: [PATCH 1142/1861] perf/x86: Fix incorrect PEBS_REGS

PEBS_REGS used as mask for the supported registers for large PEBS.
However, the mask cannot filter the sample_regs_user/sample_regs_intr
correctly.

(1ULL << PERF_REG_X86_*) should be used to replace PERF_REG_X86_*, which
is only the index.

Rename PEBS_REGS to PEBS_GP_REGS, because the mask is only for general
purpose registers.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Fixes: 2fe1bc1f501d ("perf/x86: Enable free running PEBS for REGS_USER/INTR")
Link: https://lkml.kernel.org/r/20190402194509.2832-2-kan.liang@linux.intel.com
[ Renamed it to PEBS_GP_REGS - as 'GPRS' is used elsewhere ;-) ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c |  2 +-
 arch/x86/events/perf_event.h | 38 ++++++++++++++++++------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f61dcbef20ffe..f9451566cd9b2 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3131,7 +3131,7 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
 		flags &= ~PERF_SAMPLE_TIME;
 	if (!event->attr.exclude_kernel)
 		flags &= ~PERF_SAMPLE_REGS_USER;
-	if (event->attr.sample_regs_user & ~PEBS_REGS)
+	if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
 		flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
 	return flags;
 }
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a75955741c504..1e98a42b560ad 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -96,25 +96,25 @@ struct amd_nb {
 	PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
 	PERF_SAMPLE_PERIOD)
 
-#define PEBS_REGS \
-	(PERF_REG_X86_AX | \
-	 PERF_REG_X86_BX | \
-	 PERF_REG_X86_CX | \
-	 PERF_REG_X86_DX | \
-	 PERF_REG_X86_DI | \
-	 PERF_REG_X86_SI | \
-	 PERF_REG_X86_SP | \
-	 PERF_REG_X86_BP | \
-	 PERF_REG_X86_IP | \
-	 PERF_REG_X86_FLAGS | \
-	 PERF_REG_X86_R8 | \
-	 PERF_REG_X86_R9 | \
-	 PERF_REG_X86_R10 | \
-	 PERF_REG_X86_R11 | \
-	 PERF_REG_X86_R12 | \
-	 PERF_REG_X86_R13 | \
-	 PERF_REG_X86_R14 | \
-	 PERF_REG_X86_R15)
+#define PEBS_GP_REGS			\
+	((1ULL << PERF_REG_X86_AX)    | \
+	 (1ULL << PERF_REG_X86_BX)    | \
+	 (1ULL << PERF_REG_X86_CX)    | \
+	 (1ULL << PERF_REG_X86_DX)    | \
+	 (1ULL << PERF_REG_X86_DI)    | \
+	 (1ULL << PERF_REG_X86_SI)    | \
+	 (1ULL << PERF_REG_X86_SP)    | \
+	 (1ULL << PERF_REG_X86_BP)    | \
+	 (1ULL << PERF_REG_X86_IP)    | \
+	 (1ULL << PERF_REG_X86_FLAGS) | \
+	 (1ULL << PERF_REG_X86_R8)    | \
+	 (1ULL << PERF_REG_X86_R9)    | \
+	 (1ULL << PERF_REG_X86_R10)   | \
+	 (1ULL << PERF_REG_X86_R11)   | \
+	 (1ULL << PERF_REG_X86_R12)   | \
+	 (1ULL << PERF_REG_X86_R13)   | \
+	 (1ULL << PERF_REG_X86_R14)   | \
+	 (1ULL << PERF_REG_X86_R15))
 
 /*
  * Per register state.
-- 
GitLab


From c68d224e5ed15605e651e2482c6ffd95915ddf58 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 8 Apr 2019 10:32:51 -0700
Subject: [PATCH 1143/1861] perf/core: Add perf_pmu_resched() as global
 function

This patch add perf_pmu_resched() a global function that can be called
to force rescheduling of events for a given PMU. The function locks
both cpuctx and task_ctx internally. This will be used by a subsequent
patch.

Signed-off-by: Stephane Eranian <eranian@google.com>
[ Simplified the calling convention. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: kan.liang@intel.com
Cc: nelson.dsouza@intel.com
Cc: tonyj@suse.com
Link: https://lkml.kernel.org/r/20190408173252.37932-2-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  3 +++
 kernel/events/core.c       | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 085a95e2582ad..f3864e1c55695 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -888,6 +888,9 @@ extern void perf_sched_cb_dec(struct pmu *pmu);
 extern void perf_sched_cb_inc(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
+
+extern void perf_pmu_resched(struct pmu *pmu);
+
 extern int perf_event_refresh(struct perf_event *event, int refresh);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern int perf_event_release_kernel(struct perf_event *event);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 30a572e4c6f18..abbd4b3b96c2a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2478,6 +2478,16 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
 	perf_pmu_enable(cpuctx->ctx.pmu);
 }
 
+void perf_pmu_resched(struct pmu *pmu)
+{
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+	struct perf_event_context *task_ctx = cpuctx->task_ctx;
+
+	perf_ctx_lock(cpuctx, task_ctx);
+	ctx_resched(cpuctx, task_ctx, EVENT_ALL|EVENT_CPU);
+	perf_ctx_unlock(cpuctx, task_ctx);
+}
+
 /*
  * Cross CPU call to install and enable a performance event
  *
-- 
GitLab


From f447e4eb3ad1e60d173ca997fcb2ef2a66f12574 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 8 Apr 2019 10:32:52 -0700
Subject: [PATCH 1144/1861] perf/x86/intel: Force resched when TFA sysctl is
 modified

This patch provides guarantee to the sysadmin that when TFA is disabled, no PMU
event is using PMC3 when the echo command returns. Vice-Versa, when TFA
is enabled, PMU can use PMC3 immediately (to eliminate possible multiplexing).

  $ perf stat -a -I 1000 --no-merge -e branches,branches,branches,branches
     1.000123979    125,768,725,208      branches
     1.000562520    125,631,000,456      branches
     1.000942898    125,487,114,291      branches
     1.001333316    125,323,363,620      branches
     2.004721306    125,514,968,546      branches
     2.005114560    125,511,110,861      branches
     2.005482722    125,510,132,724      branches
     2.005851245    125,508,967,086      branches
     3.006323475    125,166,570,648      branches
     3.006709247    125,165,650,056      branches
     3.007086605    125,164,639,142      branches
     3.007459298    125,164,402,912      branches
     4.007922698    125,045,577,140      branches
     4.008310775    125,046,804,324      branches
     4.008670814    125,048,265,111      branches
     4.009039251    125,048,677,611      branches
     5.009503373    125,122,240,217      branches
     5.009897067    125,122,450,517      branches

Then on another connection, sysadmin does:

  $ echo  1 >/sys/devices/cpu/allow_tsx_force_abort

Then perf stat adjusts the events immediately:

     5.010286029    125,121,393,483      branches
     5.010646308    125,120,556,786      branches
     6.011113588    124,963,351,832      branches
     6.011510331    124,964,267,566      branches
     6.011889913    124,964,829,130      branches
     6.012262996    124,965,841,156      branches
     7.012708299    124,419,832,234      branches [79.69%]
     7.012847908    124,416,363,853      branches [79.73%]
     7.013225462    124,400,723,712      branches [79.73%]
     7.013598191    124,376,154,434      branches [79.70%]
     8.014089834    124,250,862,693      branches [74.98%]
     8.014481363    124,267,539,139      branches [74.94%]
     8.014856006    124,259,519,786      branches [74.98%]
     8.014980848    124,225,457,969      branches [75.04%]
     9.015464576    124,204,235,423      branches [75.03%]
     9.015858587    124,204,988,490      branches [75.04%]
     9.016243680    124,220,092,486      branches [74.99%]
     9.016620104    124,231,260,146      branches [74.94%]

And vice-versa if the syadmin does:

  $ echo  0 >/sys/devices/cpu/allow_tsx_force_abort

Events are again spread over the 4 counters:

    10.017096277    124,276,230,565      branches [74.96%]
    10.017237209    124,228,062,171      branches [75.03%]
    10.017478637    124,178,780,626      branches [75.03%]
    10.017853402    124,198,316,177      branches [75.03%]
    11.018334423    124,602,418,933      branches [85.40%]
    11.018722584    124,602,921,320      branches [85.42%]
    11.019095621    124,603,956,093      branches [85.42%]
    11.019467742    124,595,273,783      branches [85.42%]
    12.019945736    125,110,114,864      branches
    12.020330764    125,109,334,472      branches
    12.020688740    125,109,818,865      branches
    12.021054020    125,108,594,014      branches
    13.021516774    125,109,164,018      branches
    13.021903640    125,108,794,510      branches
    13.022270770    125,107,756,978      branches
    13.022630819    125,109,380,471      branches
    14.023114989    125,133,140,817      branches
    14.023501880    125,133,785,858      branches
    14.023868339    125,133,852,700      branches

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: kan.liang@intel.com
Cc: nelson.dsouza@intel.com
Cc: tonyj@suse.com
Link: https://lkml.kernel.org/r/20190408173252.37932-3-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c       |  4 +++
 arch/x86/events/intel/core.c | 50 ++++++++++++++++++++++++++++++++++--
 arch/x86/events/perf_event.h |  1 +
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 87b50f4be2019..fdd106267fd23 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -661,6 +661,10 @@ static inline int is_x86_event(struct perf_event *event)
 	return event->pmu == &pmu;
 }
 
+struct pmu *x86_get_pmu(void)
+{
+	return &pmu;
+}
 /*
  * Event scheduler state:
  *
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 1bb59c4c59f24..8265b5026a19e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4156,6 +4156,50 @@ done:
 	return count;
 }
 
+static void update_tfa_sched(void *ignored)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	/*
+	 * check if PMC3 is used
+	 * and if so force schedule out for all event types all contexts
+	 */
+	if (test_bit(3, cpuc->active_mask))
+		perf_pmu_resched(x86_get_pmu());
+}
+
+static ssize_t show_sysctl_tfa(struct device *cdev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
+}
+
+static ssize_t set_sysctl_tfa(struct device *cdev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	bool val;
+	ssize_t ret;
+
+	ret = kstrtobool(buf, &val);
+	if (ret)
+		return ret;
+
+	/* no change */
+	if (val == allow_tsx_force_abort)
+		return count;
+
+	allow_tsx_force_abort = val;
+
+	get_online_cpus();
+	on_each_cpu(update_tfa_sched, NULL, 1);
+	put_online_cpus();
+
+	return count;
+}
+
+
 static DEVICE_ATTR_RW(freeze_on_smi);
 
 static ssize_t branches_show(struct device *cdev,
@@ -4188,7 +4232,9 @@ static struct attribute *intel_pmu_caps_attrs[] = {
        NULL
 };
 
-static DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+static DEVICE_ATTR(allow_tsx_force_abort, 0644,
+		   show_sysctl_tfa,
+		   set_sysctl_tfa);
 
 static struct attribute *intel_pmu_attrs[] = {
 	&dev_attr_freeze_on_smi.attr,
@@ -4697,7 +4743,7 @@ __init int intel_pmu_init(void)
 			x86_pmu.get_event_constraints = tfa_get_event_constraints;
 			x86_pmu.enable_all = intel_tfa_pmu_enable_all;
 			x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
-			intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+			intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
 		}
 
 		pr_cont("Skylake events, ");
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e544d83ea4b4f..9e474a5f3b869 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -713,6 +713,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = {		\
 	.event_str_ht	= ht,						\
 }
 
+struct pmu *x86_get_pmu(void);
 extern struct x86_pmu x86_pmu __read_mostly;
 
 static inline bool x86_pmu_has_lbr_callstack(void)
-- 
GitLab


From 878068ea270ea82767ff1d26c91583263c81fba0 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:44:59 -0700
Subject: [PATCH 1145/1861] perf/x86: Support outputting XMM registers

Starting from Icelake, XMM registers can be collected in PEBS record.
But current code only output the pt_regs.

Add a new struct x86_perf_regs for both pt_regs and xmm_regs. The
xmm_regs will be used later to keep a pointer to PEBS record which has
XMM information.

XMM registers are 128 bit. To simplify the code, they are handled like
two different registers, which means setting two bits in the register
bitmap. This also allows only sampling the lower 64bit bits in XMM.

The index of XMM registers starts from 32. There are 16 XMM registers.
So all reserved space for regs are used. Remove REG_RESERVED.

Add PERF_REG_X86_XMM_MAX, which stands for the max number of all x86
regs including both GPRs and XMM.

Add REG_NOSUPPORT for 32bit to exclude unsupported registers.

Previous platforms can not collect XMM information in PEBS record.
Adding pebs_no_xmm_regs to indicate the unsupported platforms.

The common code still validates the supported registers. However, it
cannot check model specific registers, e.g. XMM. Add extra check in
x86_pmu_hw_config() to reject invalid config of regs_user and regs_intr.
The regs_user never supports XMM collection.
The regs_intr only supports XMM collection when sampling PEBS event on
icelake and later platforms.

Originally-by: Andi Kleen <ak@linux.intel.com>
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-3-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c                | 15 +++++++++++++++
 arch/x86/events/intel/ds.c            |  4 +++-
 arch/x86/events/perf_event.h          | 21 ++++++++++++++++++++-
 arch/x86/include/asm/perf_event.h     |  5 +++++
 arch/x86/include/uapi/asm/perf_regs.h | 23 ++++++++++++++++++++++-
 arch/x86/kernel/perf_regs.c           | 27 ++++++++++++++++++++-------
 6 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index fdd106267fd23..de1a924a49145 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -560,6 +560,21 @@ int x86_pmu_hw_config(struct perf_event *event)
 			return -EINVAL;
 	}
 
+	/* sample_regs_user never support XMM registers */
+	if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
+		return -EINVAL;
+	/*
+	 * Besides the general purpose registers, XMM registers may
+	 * be collected in PEBS on some platforms, e.g. Icelake
+	 */
+	if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
+		if (x86_pmu.pebs_no_xmm_regs)
+			return -EINVAL;
+
+		if (!event->attr.precise_ip)
+			return -EINVAL;
+	}
+
 	return x86_setup_perfctr(event);
 }
 
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 10c99ce1feadd..f57e6cb7fd993 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1628,8 +1628,10 @@ void __init intel_ds_init(void)
 	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
 	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
 	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
-	if (x86_pmu.version <= 4)
+	if (x86_pmu.version <= 4) {
 		x86_pmu.pebs_no_isolation = 1;
+		x86_pmu.pebs_no_xmm_regs = 1;
+	}
 	if (x86_pmu.pebs) {
 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
 		int format = x86_pmu.intel_cap.pebs_format;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9e474a5f3b869..7abfadb4f2025 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -115,6 +115,24 @@ struct amd_nb {
 	 (1ULL << PERF_REG_X86_R14)   | \
 	 (1ULL << PERF_REG_X86_R15))
 
+#define PEBS_XMM_REGS                   \
+	((1ULL << PERF_REG_X86_XMM0)  | \
+	 (1ULL << PERF_REG_X86_XMM1)  | \
+	 (1ULL << PERF_REG_X86_XMM2)  | \
+	 (1ULL << PERF_REG_X86_XMM3)  | \
+	 (1ULL << PERF_REG_X86_XMM4)  | \
+	 (1ULL << PERF_REG_X86_XMM5)  | \
+	 (1ULL << PERF_REG_X86_XMM6)  | \
+	 (1ULL << PERF_REG_X86_XMM7)  | \
+	 (1ULL << PERF_REG_X86_XMM8)  | \
+	 (1ULL << PERF_REG_X86_XMM9)  | \
+	 (1ULL << PERF_REG_X86_XMM10) | \
+	 (1ULL << PERF_REG_X86_XMM11) | \
+	 (1ULL << PERF_REG_X86_XMM12) | \
+	 (1ULL << PERF_REG_X86_XMM13) | \
+	 (1ULL << PERF_REG_X86_XMM14) | \
+	 (1ULL << PERF_REG_X86_XMM15))
+
 /*
  * Per register state.
  */
@@ -612,7 +630,8 @@ struct x86_pmu {
 			pebs_broken		:1,
 			pebs_prec_dist		:1,
 			pebs_no_tlb		:1,
-			pebs_no_isolation	:1;
+			pebs_no_isolation	:1,
+			pebs_no_xmm_regs	:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8bdf749022934..d9f5bbe44b3cc 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -248,6 +248,11 @@ extern void perf_events_lapic_init(void);
 #define PERF_EFLAGS_VM		(1UL << 5)
 
 struct pt_regs;
+struct x86_perf_regs {
+	struct pt_regs	regs;
+	u64		*xmm_regs;
+};
+
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index f3329cabce5c6..ac67bbea10cae 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -27,8 +27,29 @@ enum perf_event_x86_regs {
 	PERF_REG_X86_R13,
 	PERF_REG_X86_R14,
 	PERF_REG_X86_R15,
-
+	/* These are the limits for the GPRs. */
 	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
 	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+
+	/* These all need two bits set because they are 128bit */
+	PERF_REG_X86_XMM0  = 32,
+	PERF_REG_X86_XMM1  = 34,
+	PERF_REG_X86_XMM2  = 36,
+	PERF_REG_X86_XMM3  = 38,
+	PERF_REG_X86_XMM4  = 40,
+	PERF_REG_X86_XMM5  = 42,
+	PERF_REG_X86_XMM6  = 44,
+	PERF_REG_X86_XMM7  = 46,
+	PERF_REG_X86_XMM8  = 48,
+	PERF_REG_X86_XMM9  = 50,
+	PERF_REG_X86_XMM10 = 52,
+	PERF_REG_X86_XMM11 = 54,
+	PERF_REG_X86_XMM12 = 56,
+	PERF_REG_X86_XMM13 = 58,
+	PERF_REG_X86_XMM14 = 60,
+	PERF_REG_X86_XMM15 = 62,
+
+	/* These include both GPRs and XMMX registers */
+	PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
 };
 #endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index c06c4c16c6b69..07c30ee174254 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
+	struct x86_perf_regs *perf_regs;
+
+	if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
+		perf_regs = container_of(regs, struct x86_perf_regs, regs);
+		if (!perf_regs->xmm_regs)
+			return 0;
+		return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
+	}
+
 	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
 		return 0;
 
 	return regs_get_register(regs, pt_regs_offset[idx]);
 }
 
-#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
-
 #ifdef CONFIG_X86_32
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
+		       (1ULL << PERF_REG_X86_R9) | \
+		       (1ULL << PERF_REG_X86_R10) | \
+		       (1ULL << PERF_REG_X86_R11) | \
+		       (1ULL << PERF_REG_X86_R12) | \
+		       (1ULL << PERF_REG_X86_R13) | \
+		       (1ULL << PERF_REG_X86_R14) | \
+		       (1ULL << PERF_REG_X86_R15))
+
 int perf_reg_validate(u64 mask)
 {
-	if (!mask || mask & REG_RESERVED)
+	if (!mask || (mask & REG_NOSUPPORT))
 		return -EINVAL;
 
 	return 0;
@@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 
 int perf_reg_validate(u64 mask)
 {
-	if (!mask || mask & REG_RESERVED)
-		return -EINVAL;
-
-	if (mask & REG_NOSUPPORT)
+	if (!mask || (mask & REG_NOSUPPORT))
 		return -EINVAL;
 
 	return 0;
-- 
GitLab


From 48f38aa4cc5a48bc0fe85c5c4b1ab171fbb539b6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:00 -0700
Subject: [PATCH 1146/1861] perf/x86/intel: Extract memory code PEBS parser for
 reuse

Extract some code related to memory profiling from the PEBS record
parser into separate functions. It can be reused by the upcoming
adaptive PEBS parser. No functional changes.
Rename intel_hsw_weight to intel_get_tsx_weight, and
intel_hsw_transaction to intel_get_tsx_transaction. Because the input is
not the hsw pebs format anymore.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-4-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/ds.c | 63 ++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 29 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index f57e6cb7fd993..2f80b7e282ff2 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1125,34 +1125,50 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 	return 0;
 }
 
-static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
+static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
 {
-	if (pebs->tsx_tuning) {
-		union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+	if (tsx_tuning) {
+		union hsw_tsx_tuning tsx = { .value = tsx_tuning };
 		return tsx.cycles_last_block;
 	}
 	return 0;
 }
 
-static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
+static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
 {
-	u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+	u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
 
 	/* For RTM XABORTs also log the abort code from AX */
-	if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
-		txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+	if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
+		txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
 	return txn;
 }
 
+#define PERF_X86_EVENT_PEBS_HSW_PREC \
+		(PERF_X86_EVENT_PEBS_ST_HSW | \
+		 PERF_X86_EVENT_PEBS_LD_HSW | \
+		 PERF_X86_EVENT_PEBS_NA_HSW)
+
+static u64 get_data_src(struct perf_event *event, u64 aux)
+{
+	u64 val = PERF_MEM_NA;
+	int fl = event->hw.flags;
+	bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+
+	if (fl & PERF_X86_EVENT_PEBS_LDLAT)
+		val = load_latency_data(aux);
+	else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
+		val = precise_datala_hsw(event, aux);
+	else if (fst)
+		val = precise_store_data(aux);
+	return val;
+}
+
 static void setup_pebs_sample_data(struct perf_event *event,
 				   struct pt_regs *iregs, void *__pebs,
 				   struct perf_sample_data *data,
 				   struct pt_regs *regs)
 {
-#define PERF_X86_EVENT_PEBS_HSW_PREC \
-		(PERF_X86_EVENT_PEBS_ST_HSW | \
-		 PERF_X86_EVENT_PEBS_LD_HSW | \
-		 PERF_X86_EVENT_PEBS_NA_HSW)
 	/*
 	 * We cast to the biggest pebs_record but are careful not to
 	 * unconditionally access the 'extra' entries.
@@ -1160,17 +1176,13 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct pebs_record_skl *pebs = __pebs;
 	u64 sample_type;
-	int fll, fst, dsrc;
-	int fl = event->hw.flags;
+	int fll;
 
 	if (pebs == NULL)
 		return;
 
 	sample_type = event->attr.sample_type;
-	dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
-
-	fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
-	fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+	fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
 
 	perf_sample_data_init(data, 0, event->hw.last_period);
 
@@ -1185,16 +1197,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	/*
 	 * data.data_src encodes the data source
 	 */
-	if (dsrc) {
-		u64 val = PERF_MEM_NA;
-		if (fll)
-			val = load_latency_data(pebs->dse);
-		else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
-			val = precise_datala_hsw(event, pebs->dse);
-		else if (fst)
-			val = precise_store_data(pebs->dse);
-		data->data_src.val = val;
-	}
+	if (sample_type & PERF_SAMPLE_DATA_SRC)
+		data->data_src.val = get_data_src(event, pebs->dse);
 
 	/*
 	 * We must however always use iregs for the unwinder to stay sane; the
@@ -1281,10 +1285,11 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	if (x86_pmu.intel_cap.pebs_format >= 2) {
 		/* Only set the TSX weight when no memory weight. */
 		if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
-			data->weight = intel_hsw_weight(pebs);
+			data->weight = intel_get_tsx_weight(pebs->tsx_tuning);
 
 		if (sample_type & PERF_SAMPLE_TRANSACTION)
-			data->txn = intel_hsw_transaction(pebs);
+			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
+							      pebs->ax);
 	}
 
 	/*
-- 
GitLab


From 477f00f9617009a9a3a9271885231573b728ca4f Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:01 -0700
Subject: [PATCH 1147/1861] perf/x86/intel/ds: Extract code of event update in
 short period

The drain_pebs() could be called twice in a short period for auto-reload
event in pmu::read(). The intel_pmu_save_and_restart_reload() should be
called to update the event->count.

This case should also be handled on Icelake. Extract the code for
later reuse.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-5-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/ds.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2f80b7e282ff2..9ac73860645df 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1491,6 +1491,25 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	__intel_pmu_pebs_event(event, iregs, at, top, 0, n);
 }
 
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
+{
+	struct perf_event *event;
+	int bit;
+
+	/*
+	 * The drain_pebs() could be called twice in a short period
+	 * for auto-reload event in pmu::read(). There are no
+	 * overflows have happened in between.
+	 * It needs to call intel_pmu_save_and_restart_reload() to
+	 * update the event->count for this case.
+	 */
+	for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
+		event = cpuc->events[bit];
+		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+			intel_pmu_save_and_restart_reload(event, 0);
+	}
+}
+
 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1518,19 +1537,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	}
 
 	if (unlikely(base >= top)) {
-		/*
-		 * The drain_pebs() could be called twice in a short period
-		 * for auto-reload event in pmu::read(). There are no
-		 * overflows have happened in between.
-		 * It needs to call intel_pmu_save_and_restart_reload() to
-		 * update the event->count for this case.
-		 */
-		for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
-				 size) {
-			event = cpuc->events[bit];
-			if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
-				intel_pmu_save_and_restart_reload(event, 0);
-		}
+		intel_pmu_pebs_event_update_no_drain(cpuc, size);
 		return;
 	}
 
-- 
GitLab


From c01c348ecdc66085e44912c97368809612231520 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 15 Apr 2019 11:51:38 -0400
Subject: [PATCH 1148/1861] USB: core: Fix unterminated string returned by
 usb_string()

Some drivers (such as the vub300 MMC driver) expect usb_string() to
return a properly NUL-terminated string, even when an error occurs.
(In fact, vub300's probe routine doesn't bother to check the return
code from usb_string().)  When the driver goes on to use an
unterminated string, it leads to kernel errors such as
stack-out-of-bounds, as found by the syzkaller USB fuzzer.

An out-of-range string index argument is not at all unlikely, given
that some devices don't provide string descriptors and therefore list
0 as the value for their string indexes.  This patch makes
usb_string() return a properly terminated empty string along with the
-EINVAL error code when an out-of-range index is encountered.

And since a USB string index is a single-byte value, indexes >= 256
are just as invalid as values of 0 or below.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: syzbot+b75b85111c10b8d680f1@syzkaller.appspotmail.com
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/message.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 82239f27c4ccf..e844bb7b5676a 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -820,9 +820,11 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
 
 	if (dev->state == USB_STATE_SUSPENDED)
 		return -EHOSTUNREACH;
-	if (size <= 0 || !buf || !index)
+	if (size <= 0 || !buf)
 		return -EINVAL;
 	buf[0] = 0;
+	if (index <= 0 || index >= 256)
+		return -EINVAL;
 	tbuf = kmalloc(256, GFP_NOIO);
 	if (!tbuf)
 		return -ENOMEM;
-- 
GitLab


From c22497f5838c237e3094a4dfb99d1c5de6353239 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:02 -0700
Subject: [PATCH 1149/1861] perf/x86/intel: Support adaptive PEBS v4

Adaptive PEBS is a new way to report PEBS sampling information. Instead
of a fixed size record for all PEBS events it allows to configure the
PEBS record to only include the information needed. Events can then opt
in to use such an extended record, or stay with a basic record which
only contains the IP.

The major new feature is to support LBRs in PEBS record.
Besides normal LBR, this allows (much faster) large PEBS, while still
supporting callstacks through callstack LBR. So essentially a lot of
profiling can now be done without frequent interrupts, dropping the
overhead significantly.

The main requirement still is to use a period, and not use frequency
mode, because frequency mode requires reevaluating the frequency on each
overflow.

The floating point state (XMM) is also supported, which allows efficient
profiling of FP function arguments.

Introduce specific drain function to handle variable length records.
Use a new callback to parse the new record format, and also handle the
STATUS field now being at a different offset.

Add code to set up the configuration register. Since there is only a
single register, all events either get the full super set of all events,
or only the basic record.

Originally-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-6-kan.liang@linux.intel.com
[ Renamed GPRS => GP. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c      |   7 +
 arch/x86/events/intel/ds.c        | 380 ++++++++++++++++++++++++++++--
 arch/x86/events/intel/lbr.c       |  22 ++
 arch/x86/events/perf_event.h      |  11 +-
 arch/x86/include/asm/msr-index.h  |   1 +
 arch/x86/include/asm/perf_event.h |  43 ++++
 6 files changed, 438 insertions(+), 26 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8265b5026a19e..bdc366d709aac 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2145,6 +2145,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
 	bits <<= (idx * 4);
 	mask = 0xfULL << (idx * 4);
 
+	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
+		bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+		mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+	}
+
 	rdmsrl(hwc->config_base, ctrl_val);
 	ctrl_val &= ~mask;
 	ctrl_val |= bits;
@@ -3510,6 +3515,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
 
 int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
 {
+	cpuc->pebs_record_size = x86_pmu.pebs_record_size;
+
 	if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
 		cpuc->shared_regs = allocate_shared_regs(cpu);
 		if (!cpuc->shared_regs)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 9ac73860645df..6436452d6342e 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -906,17 +906,87 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 
 	if (cpuc->n_pebs == cpuc->n_large_pebs) {
 		threshold = ds->pebs_absolute_maximum -
-			reserved * x86_pmu.pebs_record_size;
+			reserved * cpuc->pebs_record_size;
 	} else {
-		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+		threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
 	}
 
 	ds->pebs_interrupt_threshold = threshold;
 }
 
+static void adaptive_pebs_record_size_update(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 pebs_data_cfg = cpuc->pebs_data_cfg;
+	int sz = sizeof(struct pebs_basic);
+
+	if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
+		sz += sizeof(struct pebs_meminfo);
+	if (pebs_data_cfg & PEBS_DATACFG_GP)
+		sz += sizeof(struct pebs_gprs);
+	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
+		sz += sizeof(struct pebs_xmm);
+	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
+		sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
+
+	cpuc->pebs_record_size = sz;
+}
+
+#define PERF_PEBS_MEMINFO_TYPE	(PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
+				PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
+				PERF_SAMPLE_TRANSACTION)
+
+static u64 pebs_update_adaptive_cfg(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	u64 sample_type = attr->sample_type;
+	u64 pebs_data_cfg = 0;
+	bool gprs, tsx_weight;
+
+	if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
+	    attr->precise_ip > 1)
+		return pebs_data_cfg;
+
+	if (sample_type & PERF_PEBS_MEMINFO_TYPE)
+		pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
+
+	/*
+	 * We need GPRs when:
+	 * + user requested them
+	 * + precise_ip < 2 for the non event IP
+	 * + For RTM TSX weight we need GPRs for the abort code.
+	 */
+	gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
+	       (attr->sample_regs_intr & PEBS_GP_REGS);
+
+	tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
+		     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
+		      x86_pmu.rtm_abort_event);
+
+	if (gprs || (attr->precise_ip < 2) || tsx_weight)
+		pebs_data_cfg |= PEBS_DATACFG_GP;
+
+	if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
+	    (attr->sample_regs_intr & PEBS_XMM_REGS))
+		pebs_data_cfg |= PEBS_DATACFG_XMMS;
+
+	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+		/*
+		 * For now always log all LBRs. Could configure this
+		 * later.
+		 */
+		pebs_data_cfg |= PEBS_DATACFG_LBRS |
+			((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
+	}
+
+	return pebs_data_cfg;
+}
+
 static void
-pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
+pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
+		  struct perf_event *event, bool add)
 {
+	struct pmu *pmu = event->ctx->pmu;
 	/*
 	 * Make sure we get updated with the first PEBS
 	 * event. It will trigger also during removal, but
@@ -933,6 +1003,29 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
 		update = true;
 	}
 
+	/*
+	 * The PEBS record doesn't shrink on pmu::del(). Doing so would require
+	 * iterating all remaining PEBS events to reconstruct the config.
+	 */
+	if (x86_pmu.intel_cap.pebs_baseline && add) {
+		u64 pebs_data_cfg;
+
+		/* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
+		if (cpuc->n_pebs == 1) {
+			cpuc->pebs_data_cfg = 0;
+			cpuc->pebs_record_size = sizeof(struct pebs_basic);
+		}
+
+		pebs_data_cfg = pebs_update_adaptive_cfg(event);
+
+		/* Update pebs_record_size if new event requires more data. */
+		if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
+			cpuc->pebs_data_cfg |= pebs_data_cfg;
+			adaptive_pebs_record_size_update();
+			update = true;
+		}
+	}
+
 	if (update)
 		pebs_update_threshold(cpuc);
 }
@@ -947,7 +1040,7 @@ void intel_pmu_pebs_add(struct perf_event *event)
 	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
 		cpuc->n_large_pebs++;
 
-	pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
+	pebs_update_state(needed_cb, cpuc, event, true);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -965,6 +1058,14 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled |= 1ULL << 63;
 
+	if (x86_pmu.intel_cap.pebs_baseline) {
+		hwc->config |= ICL_EVENTSEL_ADAPTIVE;
+		if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
+			wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
+			cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
+		}
+	}
+
 	/*
 	 * Use auto-reload if possible to save a MSR write in the PMI.
 	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
@@ -991,7 +1092,7 @@ void intel_pmu_pebs_del(struct perf_event *event)
 	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
 		cpuc->n_large_pebs--;
 
-	pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
+	pebs_update_state(needed_cb, cpuc, event, false);
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
@@ -1144,6 +1245,13 @@ static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
 	return txn;
 }
 
+static inline u64 get_pebs_status(void *n)
+{
+	if (x86_pmu.intel_cap.pebs_format < 4)
+		return ((struct pebs_record_nhm *)n)->status;
+	return ((struct pebs_basic *)n)->applicable_counters;
+}
+
 #define PERF_X86_EVENT_PEBS_HSW_PREC \
 		(PERF_X86_EVENT_PEBS_ST_HSW | \
 		 PERF_X86_EVENT_PEBS_LD_HSW | \
@@ -1164,7 +1272,7 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
 	return val;
 }
 
-static void setup_pebs_sample_data(struct perf_event *event,
+static void setup_pebs_fixed_sample_data(struct perf_event *event,
 				   struct pt_regs *iregs, void *__pebs,
 				   struct perf_sample_data *data,
 				   struct pt_regs *regs)
@@ -1306,6 +1414,140 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		data->br_stack = &cpuc->lbr_stack;
 }
 
+static void adaptive_pebs_save_regs(struct pt_regs *regs,
+				    struct pebs_gprs *gprs)
+{
+	regs->ax = gprs->ax;
+	regs->bx = gprs->bx;
+	regs->cx = gprs->cx;
+	regs->dx = gprs->dx;
+	regs->si = gprs->si;
+	regs->di = gprs->di;
+	regs->bp = gprs->bp;
+	regs->sp = gprs->sp;
+#ifndef CONFIG_X86_32
+	regs->r8 = gprs->r8;
+	regs->r9 = gprs->r9;
+	regs->r10 = gprs->r10;
+	regs->r11 = gprs->r11;
+	regs->r12 = gprs->r12;
+	regs->r13 = gprs->r13;
+	regs->r14 = gprs->r14;
+	regs->r15 = gprs->r15;
+#endif
+}
+
+/*
+ * With adaptive PEBS the layout depends on what fields are configured.
+ */
+
+static void setup_pebs_adaptive_sample_data(struct perf_event *event,
+					    struct pt_regs *iregs, void *__pebs,
+					    struct perf_sample_data *data,
+					    struct pt_regs *regs)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct pebs_basic *basic = __pebs;
+	void *next_record = basic + 1;
+	u64 sample_type;
+	u64 format_size;
+	struct pebs_meminfo *meminfo = NULL;
+	struct pebs_gprs *gprs = NULL;
+	struct x86_perf_regs *perf_regs;
+
+	if (basic == NULL)
+		return;
+
+	perf_regs = container_of(regs, struct x86_perf_regs, regs);
+	perf_regs->xmm_regs = NULL;
+
+	sample_type = event->attr.sample_type;
+	format_size = basic->format_size;
+	perf_sample_data_init(data, 0, event->hw.last_period);
+	data->period = event->hw.last_period;
+
+	if (event->attr.use_clockid == 0)
+		data->time = native_sched_clock_from_tsc(basic->tsc);
+
+	/*
+	 * We must however always use iregs for the unwinder to stay sane; the
+	 * record BP,SP,IP can point into thin air when the record is from a
+	 * previous PMI context or an (I)RET happened between the record and
+	 * PMI.
+	 */
+	if (sample_type & PERF_SAMPLE_CALLCHAIN)
+		data->callchain = perf_callchain(event, iregs);
+
+	*regs = *iregs;
+	/* The ip in basic is EventingIP */
+	set_linear_ip(regs, basic->ip);
+	regs->flags = PERF_EFLAGS_EXACT;
+
+	/*
+	 * The record for MEMINFO is in front of GP
+	 * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
+	 * Save the pointer here but process later.
+	 */
+	if (format_size & PEBS_DATACFG_MEMINFO) {
+		meminfo = next_record;
+		next_record = meminfo + 1;
+	}
+
+	if (format_size & PEBS_DATACFG_GP) {
+		gprs = next_record;
+		next_record = gprs + 1;
+
+		if (event->attr.precise_ip < 2) {
+			set_linear_ip(regs, gprs->ip);
+			regs->flags &= ~PERF_EFLAGS_EXACT;
+		}
+
+		if (sample_type & PERF_SAMPLE_REGS_INTR)
+			adaptive_pebs_save_regs(regs, gprs);
+	}
+
+	if (format_size & PEBS_DATACFG_MEMINFO) {
+		if (sample_type & PERF_SAMPLE_WEIGHT)
+			data->weight = meminfo->latency ?:
+				intel_get_tsx_weight(meminfo->tsx_tuning);
+
+		if (sample_type & PERF_SAMPLE_DATA_SRC)
+			data->data_src.val = get_data_src(event, meminfo->aux);
+
+		if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
+			data->addr = meminfo->address;
+
+		if (sample_type & PERF_SAMPLE_TRANSACTION)
+			data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
+							  gprs ? gprs->ax : 0);
+	}
+
+	if (format_size & PEBS_DATACFG_XMMS) {
+		struct pebs_xmm *xmm = next_record;
+
+		next_record = xmm + 1;
+		perf_regs->xmm_regs = xmm->xmm;
+	}
+
+	if (format_size & PEBS_DATACFG_LBRS) {
+		struct pebs_lbr *lbr = next_record;
+		int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
+					& 0xff) + 1;
+		next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
+
+		if (has_branch_stack(event)) {
+			intel_pmu_store_pebs_lbrs(lbr);
+			data->br_stack = &cpuc->lbr_stack;
+		}
+	}
+
+	WARN_ONCE(next_record != __pebs + (format_size >> 48),
+			"PEBS record size %llu, expected %llu, config %llx\n",
+			format_size >> 48,
+			(u64)(next_record - __pebs),
+			basic->format_size);
+}
+
 static inline void *
 get_next_pebs_record_by_bit(void *base, void *top, int bit)
 {
@@ -1323,19 +1565,19 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
 	if (base == NULL)
 		return NULL;
 
-	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
-		struct pebs_record_nhm *p = at;
+	for (at = base; at < top; at += cpuc->pebs_record_size) {
+		unsigned long status = get_pebs_status(at);
 
-		if (test_bit(bit, (unsigned long *)&p->status)) {
+		if (test_bit(bit, (unsigned long *)&status)) {
 			/* PEBS v3 has accurate status bits */
 			if (x86_pmu.intel_cap.pebs_format >= 3)
 				return at;
 
-			if (p->status == (1 << bit))
+			if (status == (1 << bit))
 				return at;
 
 			/* clear non-PEBS bit and re-check */
-			pebs_status = p->status & cpuc->pebs_enabled;
+			pebs_status = status & cpuc->pebs_enabled;
 			pebs_status &= PEBS_COUNTER_MASK;
 			if (pebs_status == (1 << bit))
 				return at;
@@ -1415,11 +1657,18 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
 static void __intel_pmu_pebs_event(struct perf_event *event,
 				   struct pt_regs *iregs,
 				   void *base, void *top,
-				   int bit, int count)
+				   int bit, int count,
+				   void (*setup_sample)(struct perf_event *,
+						struct pt_regs *,
+						void *,
+						struct perf_sample_data *,
+						struct pt_regs *))
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct perf_sample_data data;
-	struct pt_regs regs;
+	struct x86_perf_regs perf_regs;
+	struct pt_regs *regs = &perf_regs.regs;
 	void *at = get_next_pebs_record_by_bit(base, top, bit);
 
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
@@ -1434,20 +1683,20 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		return;
 
 	while (count > 1) {
-		setup_pebs_sample_data(event, iregs, at, &data, &regs);
-		perf_event_output(event, &data, &regs);
-		at += x86_pmu.pebs_record_size;
+		setup_sample(event, iregs, at, &data, regs);
+		perf_event_output(event, &data, regs);
+		at += cpuc->pebs_record_size;
 		at = get_next_pebs_record_by_bit(at, top, bit);
 		count--;
 	}
 
-	setup_pebs_sample_data(event, iregs, at, &data, &regs);
+	setup_sample(event, iregs, at, &data, regs);
 
 	/*
 	 * All but the last records are processed.
 	 * The last one is left to be able to call the overflow handler.
 	 */
-	if (perf_event_overflow(event, &data, &regs)) {
+	if (perf_event_overflow(event, &data, regs)) {
 		x86_pmu_stop(event, 0);
 		return;
 	}
@@ -1488,7 +1737,8 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 		return;
 	}
 
-	__intel_pmu_pebs_event(event, iregs, at, top, 0, n);
+	__intel_pmu_pebs_event(event, iregs, at, top, 0, n,
+			       setup_pebs_fixed_sample_data);
 }
 
 static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
@@ -1550,8 +1800,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 
 		/* PEBS v3 has more accurate status bits */
 		if (x86_pmu.intel_cap.pebs_format >= 3) {
-			for_each_set_bit(bit, (unsigned long *)&pebs_status,
-					 size)
+			for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
 				counts[bit]++;
 
 			continue;
@@ -1590,8 +1839,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 		 * If collision happened, the record will be dropped.
 		 */
 		if (p->status != (1ULL << bit)) {
-			for_each_set_bit(i, (unsigned long *)&pebs_status,
-					 x86_pmu.max_pebs_events)
+			for_each_set_bit(i, (unsigned long *)&pebs_status, size)
 				error[i]++;
 			continue;
 		}
@@ -1599,7 +1847,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 		counts[bit]++;
 	}
 
-	for (bit = 0; bit < size; bit++) {
+	for_each_set_bit(bit, (unsigned long *)&mask, size) {
 		if ((counts[bit] == 0) && (error[bit] == 0))
 			continue;
 
@@ -1620,11 +1868,66 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 
 		if (counts[bit]) {
 			__intel_pmu_pebs_event(event, iregs, base,
-					       top, bit, counts[bit]);
+					       top, bit, counts[bit],
+					       setup_pebs_fixed_sample_data);
 		}
 	}
 }
 
+static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs)
+{
+	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct perf_event *event;
+	void *base, *at, *top;
+	int bit, size;
+	u64 mask;
+
+	if (!x86_pmu.pebs_active)
+		return;
+
+	base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
+
+	ds->pebs_index = ds->pebs_buffer_base;
+
+	mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
+	       (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
+	size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
+
+	if (unlikely(base >= top)) {
+		intel_pmu_pebs_event_update_no_drain(cpuc, size);
+		return;
+	}
+
+	for (at = base; at < top; at += cpuc->pebs_record_size) {
+		u64 pebs_status;
+
+		pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
+		pebs_status &= mask;
+
+		for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
+			counts[bit]++;
+	}
+
+	for_each_set_bit(bit, (unsigned long *)&mask, size) {
+		if (counts[bit] == 0)
+			continue;
+
+		event = cpuc->events[bit];
+		if (WARN_ON_ONCE(!event))
+			continue;
+
+		if (WARN_ON_ONCE(!event->attr.precise_ip))
+			continue;
+
+		__intel_pmu_pebs_event(event, iregs, base,
+				       top, bit, counts[bit],
+				       setup_pebs_adaptive_sample_data);
+	}
+}
+
 /*
  * BTS, PEBS probe and setup
  */
@@ -1646,8 +1949,12 @@ void __init intel_ds_init(void)
 	}
 	if (x86_pmu.pebs) {
 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
+		char *pebs_qual = "";
 		int format = x86_pmu.intel_cap.pebs_format;
 
+		if (format < 4)
+			x86_pmu.intel_cap.pebs_baseline = 0;
+
 		switch (format) {
 		case 0:
 			pr_cont("PEBS fmt0%c, ", pebs_type);
@@ -1683,6 +1990,29 @@ void __init intel_ds_init(void)
 			x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
 			break;
 
+		case 4:
+			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
+			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
+			if (x86_pmu.intel_cap.pebs_baseline) {
+				x86_pmu.large_pebs_flags |=
+					PERF_SAMPLE_BRANCH_STACK |
+					PERF_SAMPLE_TIME;
+				x86_pmu.flags |= PMU_FL_PEBS_ALL;
+				pebs_qual = "-baseline";
+			} else {
+				/* Only basic record supported */
+				x86_pmu.pebs_no_xmm_regs = 1;
+				x86_pmu.large_pebs_flags &=
+					~(PERF_SAMPLE_ADDR |
+					  PERF_SAMPLE_TIME |
+					  PERF_SAMPLE_DATA_SRC |
+					  PERF_SAMPLE_TRANSACTION |
+					  PERF_SAMPLE_REGS_USER |
+					  PERF_SAMPLE_REGS_INTR);
+			}
+			pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+			break;
+
 		default:
 			pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
 			x86_pmu.pebs = 0;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 580c1b91c4540..07b7175fc3786 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1080,6 +1080,28 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	}
 }
 
+void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int i;
+
+	cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
+	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+		u64 info = lbr->lbr[i].info;
+		struct perf_branch_entry *e = &cpuc->lbr_entries[i];
+
+		e->from		= lbr->lbr[i].from;
+		e->to		= lbr->lbr[i].to;
+		e->mispred	= !!(info & LBR_INFO_MISPRED);
+		e->predicted	= !(info & LBR_INFO_MISPRED);
+		e->in_tx	= !!(info & LBR_INFO_IN_TX);
+		e->abort	= !!(info & LBR_INFO_ABORT);
+		e->cycles	= info & LBR_INFO_CYCLES;
+		e->reserved	= 0;
+	}
+	intel_pmu_lbr_filter(cpuc);
+}
+
 /*
  * Map interface branch filters onto LBR filters
  */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 7abfadb4f2025..2059c143946f1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -224,6 +224,11 @@ struct cpu_hw_events {
 	int			n_pebs;
 	int			n_large_pebs;
 
+	/* Current super set of events hardware configuration */
+	u64			pebs_data_cfg;
+	u64			active_pebs_data_cfg;
+	int			pebs_record_size;
+
 	/*
 	 * Intel LBR bits
 	 */
@@ -490,6 +495,7 @@ union perf_capabilities {
 		 * values > 32bit.
 		 */
 		u64	full_width_write:1;
+		u64     pebs_baseline:1;
 	};
 	u64	capabilities;
 };
@@ -634,11 +640,12 @@ struct x86_pmu {
 			pebs_no_xmm_regs	:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
+	int		max_pebs_events;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
-	int 		max_pebs_events;
 	unsigned long	large_pebs_flags;
+	u64		rtm_abort_event;
 
 	/*
 	 * Intel LBR
@@ -978,6 +985,8 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
 
 void intel_pmu_auto_reload_read(struct perf_event *event);
 
+void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
+
 void intel_ds_init(void);
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ca5bc0eacb95f..1378518cf63ff 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -116,6 +116,7 @@
 #define LBR_INFO_CYCLES			0xffff
 
 #define MSR_IA32_PEBS_ENABLE		0x000003f1
+#define MSR_PEBS_DATA_CFG		0x000003f2
 #define MSR_IA32_DS_AREA		0x00000600
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index d9f5bbe44b3cc..997a6587d7cf5 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -32,6 +32,8 @@
 
 #define HSW_IN_TX					(1ULL << 32)
 #define HSW_IN_TX_CHECKPOINTED				(1ULL << 33)
+#define ICL_EVENTSEL_ADAPTIVE				(1ULL << 34)
+#define ICL_FIXED_0_ADAPTIVE				(1ULL << 32)
 
 #define AMD64_EVENTSEL_INT_CORE_ENABLE			(1ULL << 36)
 #define AMD64_EVENTSEL_GUESTONLY			(1ULL << 40)
@@ -87,6 +89,12 @@
 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED		6
 #define ARCH_PERFMON_EVENTS_COUNT			7
 
+#define PEBS_DATACFG_MEMINFO	BIT_ULL(0)
+#define PEBS_DATACFG_GP	BIT_ULL(1)
+#define PEBS_DATACFG_XMMS	BIT_ULL(2)
+#define PEBS_DATACFG_LBRS	BIT_ULL(3)
+#define PEBS_DATACFG_LBR_SHIFT	24
+
 /*
  * Intel "Architectural Performance Monitoring" CPUID
  * detection/enumeration details:
@@ -176,6 +184,41 @@ struct x86_pmu_capability {
 #define GLOBAL_STATUS_LBRS_FROZEN			BIT_ULL(58)
 #define GLOBAL_STATUS_TRACE_TOPAPMI			BIT_ULL(55)
 
+/*
+ * Adaptive PEBS v4
+ */
+
+struct pebs_basic {
+	u64 format_size;
+	u64 ip;
+	u64 applicable_counters;
+	u64 tsc;
+};
+
+struct pebs_meminfo {
+	u64 address;
+	u64 aux;
+	u64 latency;
+	u64 tsx_tuning;
+};
+
+struct pebs_gprs {
+	u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+	u64 r8, r9, r10, r11, r12, r13, r14, r15;
+};
+
+struct pebs_xmm {
+	u64 xmm[16*2];	/* two entries for each register */
+};
+
+struct pebs_lbr_entry {
+	u64 from, to, info;
+};
+
+struct pebs_lbr {
+	struct pebs_lbr_entry lbr[0]; /* Variable length */
+};
+
 /*
  * IBS cpuid feature detection
  */
-- 
GitLab


From d3617b98b04583df222f34992e65712862a77bf1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:03 -0700
Subject: [PATCH 1150/1861] perf/x86/lbr: Avoid reading the LBRs when adaptive
 PEBS handles them

With adaptive PEBS the CPU can directly supply the LBR information,
so we don't need to read it again. But the LBRs still need to be
enabled. Add a special count to the cpuc that distinguishes these
two cases, and avoid reading the LBRs unnecessarily when PEBS is
active.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-7-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/lbr.c  | 13 ++++++++++++-
 arch/x86/events/perf_event.h |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 07b7175fc3786..6f814a27416b4 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -488,6 +488,8 @@ void intel_pmu_lbr_add(struct perf_event *event)
 	 * be 'new'. Conversely, a new event can get installed through the
 	 * context switch path for the first time.
 	 */
+	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
+		cpuc->lbr_pebs_users++;
 	perf_sched_cb_inc(event->ctx->pmu);
 	if (!cpuc->lbr_users++ && !event->total_time_running)
 		intel_pmu_lbr_reset();
@@ -507,8 +509,11 @@ void intel_pmu_lbr_del(struct perf_event *event)
 		task_ctx->lbr_callstack_users--;
 	}
 
+	if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
+		cpuc->lbr_pebs_users--;
 	cpuc->lbr_users--;
 	WARN_ON_ONCE(cpuc->lbr_users < 0);
+	WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
 	perf_sched_cb_dec(event->ctx->pmu);
 }
 
@@ -658,7 +663,13 @@ void intel_pmu_lbr_read(void)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
-	if (!cpuc->lbr_users)
+	/*
+	 * Don't read when all LBRs users are using adaptive PEBS.
+	 *
+	 * This could be smarter and actually check the event,
+	 * but this simple approach seems to work for now.
+	 */
+	if (!cpuc->lbr_users || cpuc->lbr_users == cpuc->lbr_pebs_users)
 		return;
 
 	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 2059c143946f1..dced915821475 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -233,6 +233,7 @@ struct cpu_hw_events {
 	 * Intel LBR bits
 	 */
 	int				lbr_users;
+	int				lbr_pebs_users;
 	struct perf_branch_stack	lbr_stack;
 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
 	struct er_account		*lbr_sel;
-- 
GitLab


From 63b79f6ebc464afb730bc45762c820795e276da1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 2 Apr 2019 12:45:04 -0700
Subject: [PATCH 1151/1861] perf/x86: Support constraint ranges

Icelake extended the general counters to 8, even when SMT is enabled.
However only a (large) subset of the events can be used on all 8
counters.

The events that can or cannot be used on all counters are organized
in ranges.

A lot of scheduler constraints are required to handle all this.

To avoid blowing up the tables add event code ranges to the constraint
tables, and a new inline function to match them.

Originally-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> # developer hat on
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> # maintainer hat on
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-8-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c |  2 +-
 arch/x86/events/intel/ds.c   |  2 +-
 arch/x86/events/perf_event.h | 43 +++++++++++++++++++++++++++++++-----
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index bdc366d709aac..d4b52896f173f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2693,7 +2693,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 
 	if (x86_pmu.event_constraints) {
 		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if ((event->hw.config & c->cmask) == c->code) {
+			if (constraint_match(c, event->hw.config)) {
 				event->hw.flags |= c->flags;
 				return c;
 			}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 6436452d6342e..4429bfa92fbc3 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -858,7 +858,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 
 	if (x86_pmu.pebs_constraints) {
 		for_each_event_constraint(c, x86_pmu.pebs_constraints) {
-			if ((event->hw.config & c->cmask) == c->code) {
+			if (constraint_match(c, event->hw.config)) {
 				event->hw.flags |= c->flags;
 				return c;
 			}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index dced915821475..0ff0c5ae8c290 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -49,13 +49,19 @@ struct event_constraint {
 		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 		u64		idxmsk64;
 	};
-	u64	code;
-	u64	cmask;
-	int	weight;
-	int	overlap;
-	int	flags;
+	u64		code;
+	u64		cmask;
+	int		weight;
+	int		overlap;
+	int		flags;
+	unsigned int	size;
 };
 
+static inline bool constraint_match(struct event_constraint *c, u64 ecode)
+{
+	return ((ecode & c->cmask) - c->code) <= (u64)c->size;
+}
+
 /*
  * struct hw_perf_event.flags flags
  */
@@ -280,18 +286,29 @@ struct cpu_hw_events {
 	void				*kfree_on_online[X86_PERF_KFREE_MAX];
 };
 
-#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) {	\
 	{ .idxmsk64 = (n) },		\
 	.code = (c),			\
+	.size = (e) - (c),		\
 	.cmask = (m),			\
 	.weight = (w),			\
 	.overlap = (o),			\
 	.flags = f,			\
 }
 
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \
+	__EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f)
+
 #define EVENT_CONSTRAINT(c, n, m)	\
 	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
 
+/*
+ * The constraint_match() function only works for 'simple' event codes
+ * and not for extended (AMD64_EVENTSEL_EVENT) events codes.
+ */
+#define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
+	__EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
+
 #define INTEL_EXCLEVT_CONSTRAINT(c, n)	\
 	__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
 			   0, PERF_X86_EVENT_EXCL)
@@ -326,6 +343,12 @@ struct cpu_hw_events {
 #define INTEL_EVENT_CONSTRAINT(c, n)	\
 	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
 
+/*
+ * Constraint on a range of Event codes
+ */
+#define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n)			\
+	EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT)
+
 /*
  * Constraint on the Event code + UMask + fixed-mask
  *
@@ -373,6 +396,9 @@ struct cpu_hw_events {
 #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
 	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
 
+#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n)			\
+	EVENT_CONSTRAINT_RANGE(c, e, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
 /* Check only flags, but allow all event/umask */
 #define INTEL_ALL_EVENT_CONSTRAINT(code, n)	\
 	EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
@@ -389,6 +415,11 @@ struct cpu_hw_events {
 			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
 
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \
+	__EVENT_CONSTRAINT_RANGE(code, end, n,				\
+			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
 	__EVENT_CONSTRAINT(code, n,			\
 			  ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-- 
GitLab


From 6017608936c1825ff5d7325270484042f597edff Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:05 -0700
Subject: [PATCH 1152/1861] perf/x86/intel: Add Icelake support

Add Icelake core PMU perf code, including constraint tables and the main
enable code.

Icelake expanded the generic counters to always 8 even with HT on, but a
range of events cannot be scheduled on the extra 4 counters.
Add new constraint ranges to describe this to the scheduler.
The number of constraints that need to be checked is larger now than
with earlier CPUs.
At some point we may need a new data structure to look them up more
efficiently than with linear search. So far it still seems to be
acceptable however.

Icelake added a new fixed counter SLOTS. Full support for it is added
later in the patch series.

The cache events table is identical to Skylake.

Compare to PEBS instruction event on generic counter, fixed counter 0
has less skid. Force instruction:ppp always in fixed counter 0.

Originally-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-9-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c      | 111 ++++++++++++++++++++++++++++++
 arch/x86/events/intel/ds.c        |  25 ++++++-
 arch/x86/events/perf_event.h      |   2 +
 arch/x86/include/asm/intel_ds.h   |   2 +-
 arch/x86/include/asm/perf_event.h |   2 +-
 5 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d4b52896f173f..156c6ae53d3c6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -239,6 +239,35 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
+static struct event_constraint intel_icl_event_constraints[] = {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
+	INTEL_UEVENT_CONSTRAINT(0x1c0, 0),	/* INST_RETIRED.PREC_DIST */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
+	INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
+	INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
+	INTEL_EVENT_CONSTRAINT(0x32, 0xf),	/* SW_PREFETCH_ACCESS.* */
+	INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
+	INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
+	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_TOTAL */
+	INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
+	INTEL_EVENT_CONSTRAINT(0xa3, 0xf),      /* CYCLE_ACTIVITY.* */
+	INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
+	INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
+	INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
+	INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
+	EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
+	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+	EVENT_EXTRA_END
+};
+
 EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
 EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
@@ -3374,6 +3403,9 @@ static struct event_constraint counter0_constraint =
 static struct event_constraint counter2_constraint =
 			EVENT_CONSTRAINT(0, 0x4, 0);
 
+static struct event_constraint fixed0_constraint =
+			FIXED_EVENT_CONSTRAINT(0x00c0, 0);
+
 static struct event_constraint *
 hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
@@ -3392,6 +3424,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	return c;
 }
 
+static struct event_constraint *
+icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
+{
+	/*
+	 * Fixed counter 0 has less skid.
+	 * Force instruction:ppp in Fixed counter 0
+	 */
+	if ((event->attr.precise_ip == 3) &&
+	    constraint_match(&fixed0_constraint, event->hw.config))
+		return &fixed0_constraint;
+
+	return hsw_get_event_constraints(cpuc, idx, event);
+}
+
 static struct event_constraint *
 glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
@@ -4124,6 +4171,42 @@ static struct attribute *hsw_tsx_events_attrs[] = {
 	NULL
 };
 
+EVENT_ATTR_STR(tx-capacity-read,  tx_capacity_read,  "event=0x54,umask=0x80");
+EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-capacity-read,  el_capacity_read,  "event=0x54,umask=0x80");
+EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
+
+static struct attribute *icl_events_attrs[] = {
+	EVENT_PTR(mem_ld_hsw),
+	EVENT_PTR(mem_st_hsw),
+	NULL,
+};
+
+static struct attribute *icl_tsx_events_attrs[] = {
+	EVENT_PTR(tx_start),
+	EVENT_PTR(tx_abort),
+	EVENT_PTR(tx_commit),
+	EVENT_PTR(tx_capacity_read),
+	EVENT_PTR(tx_capacity_write),
+	EVENT_PTR(tx_conflict),
+	EVENT_PTR(el_start),
+	EVENT_PTR(el_abort),
+	EVENT_PTR(el_commit),
+	EVENT_PTR(el_capacity_read),
+	EVENT_PTR(el_capacity_write),
+	EVENT_PTR(el_conflict),
+	EVENT_PTR(cycles_t),
+	EVENT_PTR(cycles_ct),
+	NULL,
+};
+
+static __init struct attribute **get_icl_events_attrs(void)
+{
+	return boot_cpu_has(X86_FEATURE_RTM) ?
+		merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
+		icl_events_attrs;
+}
+
 static ssize_t freeze_on_smi_show(struct device *cdev,
 				  struct device_attribute *attr,
 				  char *buf)
@@ -4757,6 +4840,34 @@ __init int intel_pmu_init(void)
 		name = "skylake";
 		break;
 
+	case INTEL_FAM6_ICELAKE_MOBILE:
+		x86_pmu.late_ack = true;
+		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+		intel_pmu_lbr_init_skl();
+
+		x86_pmu.event_constraints = intel_icl_event_constraints;
+		x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_icl_extra_regs;
+		x86_pmu.pebs_aliases = NULL;
+		x86_pmu.pebs_prec_dist = true;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+		x86_pmu.hw_config = hsw_hw_config;
+		x86_pmu.get_event_constraints = icl_get_event_constraints;
+		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+			hsw_format_attr : nhm_format_attr;
+		extra_attr = merge_attr(extra_attr, skl_format_attr);
+		x86_pmu.cpu_events = get_icl_events_attrs();
+		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
+		x86_pmu.lbr_pt_coexist = true;
+		intel_pmu_pebs_data_source_skl(false);
+		pr_cont("Icelake events, ");
+		name = "icelake";
+		break;
+
 	default:
 		switch (x86_pmu.version) {
 		case 1:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 4429bfa92fbc3..7a9f5dac5abe4 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -849,6 +849,26 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_icl_pebs_event_constraints[] = {
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL),	/* SLOTS */
+
+	INTEL_PLD_CONSTRAINT(0x1cd, 0xff),			/* MEM_TRANS_RETIRED.LOAD_LATENCY */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),	/* MEM_INST_RETIRED.LOAD */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),	/* MEM_INST_RETIRED.STORE */
+
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
+
+	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),		/* MEM_INST_RETIRED.* */
+
+	/*
+	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
+	 * need the full constraints from the main table.
+	 */
+
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
@@ -1053,7 +1073,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 
 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
-	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
 		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled |= 1ULL << 63;
@@ -1105,7 +1125,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 
 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 
-	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
+	    (x86_pmu.version < 5))
 		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled &= ~(1ULL << 63);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 0ff0c5ae8c290..07fc84bb85c1e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -999,6 +999,8 @@ extern struct event_constraint intel_bdw_pebs_event_constraints[];
 
 extern struct event_constraint intel_skl_pebs_event_constraints[];
 
+extern struct event_constraint intel_icl_pebs_event_constraints[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
 void intel_pmu_pebs_add(struct perf_event *event);
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index ae26df1c27896..8380c3ddd4b2e 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -8,7 +8,7 @@
 
 /* The maximal number of PEBS events: */
 #define MAX_PEBS_EVENTS		8
-#define MAX_FIXED_PEBS_EVENTS	3
+#define MAX_FIXED_PEBS_EVENTS	4
 
 /*
  * A debug store configuration.
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 997a6587d7cf5..04768a3a54548 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -7,7 +7,7 @@
  */
 
 #define INTEL_PMC_MAX_GENERIC				       32
-#define INTEL_PMC_MAX_FIXED					3
+#define INTEL_PMC_MAX_FIXED					4
 #define INTEL_PMC_IDX_FIXED				       32
 
 #define X86_PMC_IDX_MAX					       64
-- 
GitLab


From f08c47d1f86c6dc666c7e659d94bf6d4492aa9d7 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:06 -0700
Subject: [PATCH 1153/1861] perf/x86/intel/cstate: Add Icelake support

Icelake uses the same C-state residency events as Sandy Bridge.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-10-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/cstate.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 94a4b7fc75d0e..dd5658ec31d5e 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -578,6 +578,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
 
 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
+
+	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
-- 
GitLab


From b3377c3acb9e54cf86efcfe25f2e792bca599ed4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:07 -0700
Subject: [PATCH 1154/1861] perf/x86/intel/rapl: Add Icelake support

Icelake support the same RAPL counters as Skylake.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-11-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/rapl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 94dc564146ca8..37ebf6fc5415b 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -775,6 +775,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
+
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE,  skl_rapl_init),
 	{},
 };
 
-- 
GitLab


From cf50d79a8cfe5adae37fec026220b009559bbeed Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:08 -0700
Subject: [PATCH 1155/1861] perf/x86/msr: Add Icelake support

Icelake is the same as the existing Skylake parts.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-12-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/msr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index a878e6286e4af..f3f4c2263501d 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -89,6 +89,7 @@ static bool test_intel(int idx)
 	case INTEL_FAM6_SKYLAKE_X:
 	case INTEL_FAM6_KABYLAKE_MOBILE:
 	case INTEL_FAM6_KABYLAKE_DESKTOP:
+	case INTEL_FAM6_ICELAKE_MOBILE:
 		if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
 			return true;
 		break;
-- 
GitLab


From 6e394376ee89233508fa21d006546357f8efee31 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 2 Apr 2019 12:45:09 -0700
Subject: [PATCH 1156/1861] perf/x86/intel/uncore: Add Intel Icelake uncore
 support

Add Intel Icelake uncore support:

 - The init code is based on Skylake
 - Add new PCI id for IMC
 - New MSR address for CBOX
 - Get CBOX# from CNL_UNC_CBO_CONFIG MSR directly
 - Create a new PMU for fixed clocktick counter

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-13-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore.c     |  6 ++
 arch/x86/events/intel/uncore.h     |  1 +
 arch/x86/events/intel/uncore_snb.c | 91 ++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9fe64c01a2e5a..fc40a1473058e 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1367,6 +1367,11 @@ static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
 	.pci_init = skx_uncore_pci_init,
 };
 
+static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
+	.cpu_init = icl_uncore_cpu_init,
+	.pci_init = skl_uncore_pci_init,
+};
+
 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,	  nhm_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,	  nhm_uncore_init),
@@ -1393,6 +1398,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,      skx_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 853a49a8ccf67..79eb2e21e4f04 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -512,6 +512,7 @@ int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 void skl_uncore_cpu_init(void);
+void icl_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
 
 /* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 13493f43b2473..f8431819b3e12 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -34,6 +34,8 @@
 #define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC	0x3e33
 #define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC	0x3eca
 #define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC	0x3e32
+#define PCI_DEVICE_ID_INTEL_ICL_U_IMC		0x8a02
+#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC		0x8a12
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -93,6 +95,12 @@
 #define SKL_UNC_PERF_GLOBAL_CTL			0xe01
 #define SKL_UNC_GLOBAL_CTL_CORE_ALL		((1 << 5) - 1)
 
+/* ICL Cbo register */
+#define ICL_UNC_CBO_CONFIG			0x396
+#define ICL_UNC_NUM_CBO_MASK			0xf
+#define ICL_UNC_CBO_0_PER_CTR0			0x702
+#define ICL_UNC_CBO_MSR_OFFSET			0x8
+
 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -280,6 +288,70 @@ void skl_uncore_cpu_init(void)
 	snb_uncore_arb.ops = &skl_uncore_msr_ops;
 }
 
+static struct intel_uncore_type icl_uncore_cbox = {
+	.name		= "cbox",
+	.num_counters   = 4,
+	.perf_ctr_bits	= 44,
+	.perf_ctr	= ICL_UNC_CBO_0_PER_CTR0,
+	.event_ctl	= SNB_UNC_CBO_0_PERFEVTSEL0,
+	.event_mask	= SNB_UNC_RAW_EVENT_MASK,
+	.msr_offset	= ICL_UNC_CBO_MSR_OFFSET,
+	.ops		= &skl_uncore_msr_ops,
+	.format_group	= &snb_uncore_format_group,
+};
+
+static struct uncore_event_desc icl_uncore_events[] = {
+	INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff"),
+	{ /* end: all zeroes */ },
+};
+
+static struct attribute *icl_uncore_clock_formats_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group icl_uncore_clock_format_group = {
+	.name = "format",
+	.attrs = icl_uncore_clock_formats_attr,
+};
+
+static struct intel_uncore_type icl_uncore_clockbox = {
+	.name		= "clock",
+	.num_counters	= 1,
+	.num_boxes	= 1,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= SNB_UNC_FIXED_CTR,
+	.fixed_ctl	= SNB_UNC_FIXED_CTR_CTRL,
+	.single_fixed	= 1,
+	.event_mask	= SNB_UNC_CTL_EV_SEL_MASK,
+	.format_group	= &icl_uncore_clock_format_group,
+	.ops		= &skl_uncore_msr_ops,
+	.event_descs	= icl_uncore_events,
+};
+
+static struct intel_uncore_type *icl_msr_uncores[] = {
+	&icl_uncore_cbox,
+	&snb_uncore_arb,
+	&icl_uncore_clockbox,
+	NULL,
+};
+
+static int icl_get_cbox_num(void)
+{
+	u64 num_boxes;
+
+	rdmsrl(ICL_UNC_CBO_CONFIG, num_boxes);
+
+	return num_boxes & ICL_UNC_NUM_CBO_MASK;
+}
+
+void icl_uncore_cpu_init(void)
+{
+	uncore_msr_uncores = icl_msr_uncores;
+	icl_uncore_cbox.num_boxes = icl_get_cbox_num();
+	snb_uncore_arb.ops = &skl_uncore_msr_ops;
+}
+
 enum {
 	SNB_PCI_UNCORE_IMC,
 };
@@ -668,6 +740,18 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
 	{ /* end: all zeroes */ },
 };
 
+static const struct pci_device_id icl_uncore_pci_ids[] = {
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U2_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
 	.name		= "snb_uncore",
 	.id_table	= snb_uncore_pci_ids,
@@ -693,6 +777,11 @@ static struct pci_driver skl_uncore_pci_driver = {
 	.id_table	= skl_uncore_pci_ids,
 };
 
+static struct pci_driver icl_uncore_pci_driver = {
+	.name		= "icl_uncore",
+	.id_table	= icl_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
 	__u32 pci_id;
 	struct pci_driver *driver;
@@ -732,6 +821,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 4 Cores Server */
 	IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 6 Cores Server */
 	IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 8 Cores Server */
+	IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver),	/* 10th Gen Core Mobile */
+	IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver),	/* 10th Gen Core Mobile */
 	{  /* end marker */ }
 };
 
-- 
GitLab


From 6daeb8737f8a93c6d3a3ae57e23dd3dbe8b239da Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Wed, 10 Apr 2019 11:57:09 -0700
Subject: [PATCH 1157/1861] perf/x86/intel: Add Tremont core PMU support

Add perf core PMU support for Intel Tremont CPU.

The init code is based on Goldmont plus.

The generic purpose counter 0 and fixed counter 0 have less skid.
Force :ppp events on generic purpose counter 0.
Force instruction:ppp on generic purpose counter 0 and fixed counter 0.

Updates LLC cache event table and OFFCORE_RESPONSE mask.

Adaptive PEBS, which is already enabled on ICL, is also supported
on Tremont. No extra code required.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/1554922629-126287-3-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 91 ++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 156c6ae53d3c6..4b4dac089635b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1856,6 +1856,45 @@ static __initconst const u64 glp_hw_cache_extra_regs
 	},
 };
 
+#define TNT_LOCAL_DRAM			BIT_ULL(26)
+#define TNT_DEMAND_READ			GLM_DEMAND_DATA_RD
+#define TNT_DEMAND_WRITE		GLM_DEMAND_RFO
+#define TNT_LLC_ACCESS			GLM_ANY_RESPONSE
+#define TNT_SNP_ANY			(SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
+					 SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
+#define TNT_LLC_MISS			(TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
+
+static __initconst const u64 tnt_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= TNT_DEMAND_READ|
+						  TNT_LLC_ACCESS,
+			[C(RESULT_MISS)]	= TNT_DEMAND_READ|
+						  TNT_LLC_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= TNT_DEMAND_WRITE|
+						  TNT_LLC_ACCESS,
+			[C(RESULT_MISS)]	= TNT_DEMAND_WRITE|
+						  TNT_LLC_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+};
+
+static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1),
+	EVENT_EXTRA_END
+};
+
 #define KNL_OT_L2_HITE		BIT_ULL(19) /* Other Tile L2 Hit */
 #define KNL_OT_L2_HITF		BIT_ULL(20) /* Other Tile L2 Hit */
 #define KNL_MCDRAM_LOCAL	BIT_ULL(21)
@@ -3406,6 +3445,9 @@ static struct event_constraint counter2_constraint =
 static struct event_constraint fixed0_constraint =
 			FIXED_EVENT_CONSTRAINT(0x00c0, 0);
 
+static struct event_constraint fixed0_counter0_constraint =
+			INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
+
 static struct event_constraint *
 hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
@@ -3454,6 +3496,29 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	return c;
 }
 
+static struct event_constraint *
+tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	/*
+	 * :ppp means to do reduced skid PEBS,
+	 * which is available on PMC0 and fixed counter 0.
+	 */
+	if (event->attr.precise_ip == 3) {
+		/* Force instruction:ppp on PMC0 and Fixed counter 0 */
+		if (constraint_match(&fixed0_constraint, event->hw.config))
+			return &fixed0_counter0_constraint;
+
+		return &counter0_constraint;
+	}
+
+	c = intel_get_event_constraints(cpuc, idx, event);
+
+	return c;
+}
+
 static bool allow_tsx_force_abort = true;
 
 static struct event_constraint *
@@ -4585,6 +4650,32 @@ __init int intel_pmu_init(void)
 		name = "goldmont_plus";
 		break;
 
+	case INTEL_FAM6_ATOM_TREMONT_X:
+		x86_pmu.late_ack = true;
+		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+		intel_pmu_lbr_init_skl();
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.extra_regs = intel_tnt_extra_regs;
+		/*
+		 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+		 * for precise cycles.
+		 */
+		x86_pmu.pebs_aliases = NULL;
+		x86_pmu.pebs_prec_dist = true;
+		x86_pmu.lbr_pt_coexist = true;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.get_event_constraints = tnt_get_event_constraints;
+		extra_attr = slm_format_attr;
+		pr_cont("Tremont events, ");
+		name = "Tremont";
+		break;
+
 	case INTEL_FAM6_WESTMERE:
 	case INTEL_FAM6_WESTMERE_EP:
 	case INTEL_FAM6_WESTMERE_EX:
-- 
GitLab


From ba696429d290690db967e5f49463df4b2c1314a4 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 3 Apr 2019 19:03:09 +0200
Subject: [PATCH 1158/1861] x86/hyper-v: Implement EOI assist

Hyper-V TLFS suggests an optimization to avoid imminent VMExit on EOI:
"The OS performs an EOI by atomically writing zero to the EOI Assist field
of the virtual VP assist page and checking whether the "No EOI required"
field was previously zero. If it was, the OS must write to the
HV_X64_APIC_EOI MSR thereby triggering an intercept into the hypervisor."

Implement the optimization in Linux.

Tested-by: Long Li <longli@microsoft.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Kelley (EOSG) <Michael.H.Kelley@microsoft.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Simon Xiao <sixiao@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-hyperv@vger.kernel.org
Link: http://lkml.kernel.org/r/20190403170309.4107-1-vkuznets@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/hyperv/hv_apic.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 8eb6fbee8e135..5c056b8aebefc 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -86,6 +86,11 @@ static void hv_apic_write(u32 reg, u32 val)
 
 static void hv_apic_eoi_write(u32 reg, u32 val)
 {
+	struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()];
+
+	if (hvp && (xchg(&hvp->apic_assist, 0) & 0x1))
+		return;
+
 	wrmsr(HV_X64_MSR_EOI, val, 0);
 }
 
-- 
GitLab


From 08b7c2f9208f0e2a32159e4e7a4831b7adb10a3e Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 15 Apr 2019 12:10:14 +0100
Subject: [PATCH 1159/1861] staging: comedi: vmk80xx: Fix use of uninitialized
 semaphore

If `vmk80xx_auto_attach()` returns an error, the core comedi module code
will call `vmk80xx_detach()` to clean up.  If `vmk80xx_auto_attach()`
successfully allocated the comedi device private data,
`vmk80xx_detach()` assumes that a `struct semaphore limit_sem` contained
in the private data has been initialized and uses it.  Unfortunately,
there are a couple of places where `vmk80xx_auto_attach()` can return an
error after allocating the device private data but before initializing
the semaphore, so this assumption is invalid.  Fix it by initializing
the semaphore just after allocating the private data in
`vmk80xx_auto_attach()` before any other errors can be returned.

I believe this was the cause of the following syzbot crash report
<https://syzkaller.appspot.com/bug?extid=54c2f58f15fe6876b6ad>:

usb 1-1: config 0 has no interface number 0
usb 1-1: New USB device found, idVendor=10cf, idProduct=8068, bcdDevice=e6.8d
usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
usb 1-1: config 0 descriptor??
vmk80xx 1-1:0.117: driver 'vmk80xx' failed to auto-configure device.
INFO: trying to register non-static key.
the code is fine but needs lockdep annotation.
turning off the locking correctness validator.
CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.1.0-rc4-319354-g9a33b36 #3
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: usb_hub_wq hub_event
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xe8/0x16e lib/dump_stack.c:113
 assign_lock_key kernel/locking/lockdep.c:786 [inline]
 register_lock_class+0x11b8/0x1250 kernel/locking/lockdep.c:1095
 __lock_acquire+0xfb/0x37c0 kernel/locking/lockdep.c:3582
 lock_acquire+0x10d/0x2f0 kernel/locking/lockdep.c:4211
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0x44/0x60 kernel/locking/spinlock.c:152
 down+0x12/0x80 kernel/locking/semaphore.c:58
 vmk80xx_detach+0x59/0x100 drivers/staging/comedi/drivers/vmk80xx.c:829
 comedi_device_detach+0xed/0x800 drivers/staging/comedi/drivers.c:204
 comedi_device_cleanup.part.0+0x68/0x140 drivers/staging/comedi/comedi_fops.c:156
 comedi_device_cleanup drivers/staging/comedi/comedi_fops.c:187 [inline]
 comedi_free_board_dev.part.0+0x16/0x90 drivers/staging/comedi/comedi_fops.c:190
 comedi_free_board_dev drivers/staging/comedi/comedi_fops.c:189 [inline]
 comedi_release_hardware_device+0x111/0x140 drivers/staging/comedi/comedi_fops.c:2880
 comedi_auto_config.cold+0x124/0x1b0 drivers/staging/comedi/drivers.c:1068
 usb_probe_interface+0x31d/0x820 drivers/usb/core/driver.c:361
 really_probe+0x2da/0xb10 drivers/base/dd.c:509
 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671
 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778
 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454
 __device_attach+0x223/0x3a0 drivers/base/dd.c:844
 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514
 device_add+0xad2/0x16e0 drivers/base/core.c:2106
 usb_set_configuration+0xdf7/0x1740 drivers/usb/core/message.c:2021
 generic_probe+0xa2/0xda drivers/usb/core/generic.c:210
 usb_probe_device+0xc0/0x150 drivers/usb/core/driver.c:266
 really_probe+0x2da/0xb10 drivers/base/dd.c:509
 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671
 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778
 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454
 __device_attach+0x223/0x3a0 drivers/base/dd.c:844
 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514
 device_add+0xad2/0x16e0 drivers/base/core.c:2106
 usb_new_device.cold+0x537/0xccf drivers/usb/core/hub.c:2534
 hub_port_connect drivers/usb/core/hub.c:5089 [inline]
 hub_port_connect_change drivers/usb/core/hub.c:5204 [inline]
 port_event drivers/usb/core/hub.c:5350 [inline]
 hub_event+0x138e/0x3b00 drivers/usb/core/hub.c:5432
 process_one_work+0x90f/0x1580 kernel/workqueue.c:2269
 worker_thread+0x9b/0xe20 kernel/workqueue.c:2415
 kthread+0x313/0x420 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

Reported-by: syzbot+54c2f58f15fe6876b6ad@syzkaller.appspotmail.com
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/vmk80xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c
index 6234b649d887c..b035d662390b6 100644
--- a/drivers/staging/comedi/drivers/vmk80xx.c
+++ b/drivers/staging/comedi/drivers/vmk80xx.c
@@ -800,6 +800,8 @@ static int vmk80xx_auto_attach(struct comedi_device *dev,
 
 	devpriv->model = board->model;
 
+	sema_init(&devpriv->limit_sem, 8);
+
 	ret = vmk80xx_find_usb_endpoints(dev);
 	if (ret)
 		return ret;
@@ -808,8 +810,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev,
 	if (ret)
 		return ret;
 
-	sema_init(&devpriv->limit_sem, 8);
-
 	usb_set_intfdata(intf, devpriv);
 
 	if (devpriv->model == VMK8055_MODEL)
-- 
GitLab


From 663d294b4768bfd89e529e069bffa544a830b5bf Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 15 Apr 2019 12:52:30 +0100
Subject: [PATCH 1160/1861] staging: comedi: vmk80xx: Fix possible double-free
 of ->usb_rx_buf

`vmk80xx_alloc_usb_buffers()` is called from `vmk80xx_auto_attach()` to
allocate RX and TX buffers for USB transfers.  It allocates
`devpriv->usb_rx_buf` followed by `devpriv->usb_tx_buf`.  If the
allocation of `devpriv->usb_tx_buf` fails, it frees
`devpriv->usb_rx_buf`,  leaving the pointer set dangling, and returns an
error.  Later, `vmk80xx_detach()` will be called from the core comedi
module code to clean up.  `vmk80xx_detach()` also frees both
`devpriv->usb_rx_buf` and `devpriv->usb_tx_buf`, but
`devpriv->usb_rx_buf` may have already been freed, leading to a
double-free error.  Fix it by removing the call to
`kfree(devpriv->usb_rx_buf)` from `vmk80xx_alloc_usb_buffers()`, relying
on `vmk80xx_detach()` to free the memory.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/vmk80xx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c
index b035d662390b6..65dc6c51037e3 100644
--- a/drivers/staging/comedi/drivers/vmk80xx.c
+++ b/drivers/staging/comedi/drivers/vmk80xx.c
@@ -682,10 +682,8 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev)
 
 	size = usb_endpoint_maxp(devpriv->ep_tx);
 	devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
-	if (!devpriv->usb_tx_buf) {
-		kfree(devpriv->usb_rx_buf);
+	if (!devpriv->usb_tx_buf)
 		return -ENOMEM;
-	}
 
 	return 0;
 }
-- 
GitLab


From a943245adc9ae31942af752e879fbbc182166573 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 16 Apr 2019 11:57:51 +0100
Subject: [PATCH 1161/1861] x86/Kconfig: Fix spelling mistake "effectivness" ->
 "effectiveness"

The Kconfig text contains a spelling mistake, fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-janitors@vger.kernel.org
Link: http://lkml.kernel.org/r/20190416105751.18899-1-colin.king@canonical.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1f9b3cf437c3..01dbb05bc4987 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1499,7 +1499,7 @@ config X86_CPA_STATISTICS
 	depends on DEBUG_FS
 	---help---
 	  Expose statistics about the Change Page Attribute mechanims, which
-	  helps to determine the effectivness of preserving large and huge
+	  helps to determine the effectiveness of preserving large and huge
 	  page mappings when mapping protections are changed.
 
 config ARCH_HAS_MEM_ENCRYPT
-- 
GitLab


From f4e97f5d4c9ece362b9379d3158cf5e4c02404dc Mon Sep 17 00:00:00 2001
From: Gao Xiang <gaoxiang25@huawei.com>
Date: Fri, 12 Apr 2019 17:53:14 +0800
Subject: [PATCH 1162/1861] staging: erofs: fix unexpected out-of-bound data
 access

Unexpected out-of-bound data will be read in erofs_read_raw_page
after commit 07173c3ec276 ("block: enable multipage bvecs") since
one iovec could have multiple pages.

Let's fix as what Ming's pointed out in the previous email [1].

[1] https://lore.kernel.org/lkml/20190411080953.GE421@ming.t460p/

Suggested-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
Fixes: 07173c3ec276 ("block: enable multipage bvecs")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/erofs/data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
index 526e0dbea5b57..81af768e7248e 100644
--- a/drivers/staging/erofs/data.c
+++ b/drivers/staging/erofs/data.c
@@ -298,7 +298,7 @@ submit_bio_retry:
 	*last_block = current_block;
 
 	/* shift in advance in case of it followed by too many gaps */
-	if (unlikely(bio->bi_vcnt >= bio->bi_max_vecs)) {
+	if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
 		/* err should reassign to 0 after submitting */
 		err = 0;
 		goto submit_bio_out;
-- 
GitLab


From 4d86c9f73c5a9a7c3c0661e922509c2c51801671 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 26 Mar 2019 22:01:27 -0700
Subject: [PATCH 1163/1861] clocksource/drivers/timer-ti-dm: Remove
 omap_dm_timer_set_load_start

Commit 008258d995a6 ("clocksource/drivers/timer-ti-dm: Make
omap_dm_timer_set_load_start() static") made omap_dm_time_set_load_start
static because its prototype was not defined in a header. Unfortunately,
this causes a build warning on multi_v7_defconfig because this function
is not used anywhere in this translation unit:

drivers/clocksource/timer-ti-dm.c:589:12: error: unused function
'omap_dm_timer_set_load_start' [-Werror,-Wunused-function]

In fact, omap_dm_timer_set_load_start hasn't been used anywhere since
commit f190be7f39a5 ("staging: tidspbridge: remove driver") and the
prototype was removed in commit 592ea6bd1fad ("clocksource: timer-ti-dm:
Make unexported functions static"), which is probably where this should
have happened.

Fixes: 592ea6bd1fad ("clocksource: timer-ti-dm: Make unexported functions static")
Fixes: 008258d995a6 ("clocksource/drivers/timer-ti-dm: Make omap_dm_timer_set_load_start() static")
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-ti-dm.c | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index 3352da6ed61f3..ee8ec5a8cb166 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -585,34 +585,6 @@ static int omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
 	return 0;
 }
 
-/* Optimized set_load which removes costly spin wait in timer_start */
-static int omap_dm_timer_set_load_start(struct omap_dm_timer *timer,
-					int autoreload, unsigned int load)
-{
-	u32 l;
-
-	if (unlikely(!timer))
-		return -EINVAL;
-
-	omap_dm_timer_enable(timer);
-
-	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG);
-	if (autoreload) {
-		l |= OMAP_TIMER_CTRL_AR;
-		omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
-	} else {
-		l &= ~OMAP_TIMER_CTRL_AR;
-	}
-	l |= OMAP_TIMER_CTRL_ST;
-
-	__omap_dm_timer_load_start(timer, l, load, timer->posted);
-
-	/* Save the context */
-	timer->context.tclr = l;
-	timer->context.tldr = load;
-	timer->context.tcrr = load;
-	return 0;
-}
 static int omap_dm_timer_set_match(struct omap_dm_timer *timer, int enable,
 				   unsigned int match)
 {
-- 
GitLab


From ace965696da2611af759f0284e26342b7b6cec89 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 1 Apr 2019 13:25:10 +0200
Subject: [PATCH 1164/1861] serial: sh-sci: Fix HSCIF RX sampling point
 calculation

There are several issues with the formula used for calculating the
deviation from the intended rate:
  1. While min_err and last_stop are signed, srr and baud are unsigned.
     Hence the signed values are promoted to unsigned, which will lead
     to a bogus value of deviation if min_err is negative,
  2. Srr is the register field value, which is one less than the actual
     sampling rate factor,
  3. The divisions do not use rounding.

Fix this by casting unsigned variables to int, adding one to srr, and
using a single DIV_ROUND_CLOSEST().

Fixes: 63ba1e00f178a448 ("serial: sh-sci: Support for HSCIF RX sampling point adjustment")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Ulrich Hecht <uli+renesas@fpond.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 2d1c626312cd8..55ef6e577f460 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2512,7 +2512,9 @@ done:
 			 * center of the last stop bit in sampling clocks.
 			 */
 			int last_stop = bits * 2 - 1;
-			int deviation = min_err * srr * last_stop / 2 / baud;
+			int deviation = DIV_ROUND_CLOSEST(min_err * last_stop *
+							  (int)(srr + 1),
+							  2 * (int)baud);
 
 			if (abs(deviation) >= 2) {
 				/* At least two sampling clocks off at the
-- 
GitLab


From 6b87784b53592a90d21576be8eff688b56d93cce Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 29 Mar 2019 10:10:26 +0100
Subject: [PATCH 1165/1861] serial: sh-sci: Fix HSCIF RX sampling point
 adjustment

The calculation of the sampling point has min() and max() exchanged.
Fix this by using the clamp() helper instead.

Fixes: 63ba1e00f178a448 ("serial: sh-sci: Support for HSCIF RX sampling point adjustment")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Ulrich Hecht <uli+renesas@fpond.eu>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Dirk Behme <dirk.behme@de.bosch.com>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 55ef6e577f460..3cd139752d3f7 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2521,7 +2521,7 @@ done:
 				 * last stop bit; we can increase the error
 				 * margin by shifting the sampling point.
 				 */
-				int shift = min(-8, max(7, deviation / 2));
+				int shift = clamp(deviation / 2, -8, 7);
 
 				hssrr |= (shift << HSCIF_SRHP_SHIFT) &
 					 HSCIF_SRHP_MASK;
-- 
GitLab


From e00164a0f000de893944981f41a568c981aca658 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Tue, 9 Apr 2019 16:16:38 +0800
Subject: [PATCH 1166/1861] sc16is7xx: move label 'err_spi' to correct section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

err_spi is used when SERIAL_SC16IS7XX_SPI is enabled, so make
the label only available under SERIAL_SC16IS7XX_SPI option.
Otherwise, the below warning appears.

drivers/tty/serial/sc16is7xx.c:1523:1: warning: label ‘err_spi’ defined but not used [-Wunused-label]
 err_spi:
  ^~~~~~~

Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Fixes: ac0cdb3d9901 ("sc16is7xx: missing unregister/delete driver on error in sc16is7xx_init()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sc16is7xx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 09a183dfc5264..22381a8c72e43 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1520,10 +1520,12 @@ static int __init sc16is7xx_init(void)
 #endif
 	return ret;
 
+#ifdef CONFIG_SERIAL_SC16IS7XX_SPI
 err_spi:
 #ifdef CONFIG_SERIAL_SC16IS7XX_I2C
 	i2c_del_driver(&sc16is7xx_i2c_uart_driver);
 #endif
+#endif
 err_i2c:
 	uart_unregister_driver(&sc16is7xx_uart);
 	return ret;
-- 
GitLab


From 2b27924bb1d48e3775f432b70bdad5e6dd4e7798 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 15 Apr 2019 15:57:19 +0200
Subject: [PATCH 1167/1861] KVM: nVMX: always use early vmcs check when EPT is
 disabled

The remaining failures of vmx.flat when EPT is disabled are caused by
incorrectly reflecting VMfails to the L1 hypervisor.  What happens is
that nested_vmx_restore_host_state corrupts the guest CR3, reloading it
with the host's shadow CR3 instead, because it blindly loads GUEST_CR3
from the vmcs01.

For simplicity let's just always use hardware VMCS checks when EPT is
disabled.  This way, nested_vmx_restore_host_state is not reached at
all (or at least shouldn't be reached).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/vmx.h |  1 +
 arch/x86/kvm/vmx/nested.c       | 22 ++++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd3982..d213ec5c3766d 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -146,6 +146,7 @@
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2
+#define VMX_ABORT_VMCS_CORRUPTED             3
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
 
 #endif /* _UAPIVMX_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a22af5a85540d..6401eb7ef19ce 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3796,8 +3796,18 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
 	vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
 
 	nested_ept_uninit_mmu_context(vcpu);
-	vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
-	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+	/*
+	 * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
+	 * points to shadow pages!  Fortunately we only get here after a WARN_ON
+	 * if EPT is disabled, so a VMabort is perfectly fine.
+	 */
+	if (enable_ept) {
+		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+		__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	} else {
+		nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
+	}
 
 	/*
 	 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -5745,6 +5755,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
 {
 	int i;
 
+	/*
+	 * Without EPT it is not possible to restore L1's CR3 and PDPTR on
+	 * VMfail, because they are not available in vmcs01.  Just always
+	 * use hardware checks.
+	 */
+	if (!enable_ept)
+		nested_early_check = 1;
+
 	if (!cpu_has_vmx_shadow_vmcs())
 		enable_shadow_vmcs = 0;
 	if (enable_shadow_vmcs) {
-- 
GitLab


From bc8a3d8925a8fa09fa550e0da115d95851ce33c6 Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Mon, 8 Apr 2019 11:07:30 -0700
Subject: [PATCH 1168/1861] kvm: mmu: Fix overflow on kvm mmu page limit
 calculation

KVM bases its memory usage limits on the total number of guest pages
across all memslots. However, those limits, and the calculations to
produce them, use 32 bit unsigned integers. This can result in overflow
if a VM has more guest pages that can be represented by a u32. As a
result of this overflow, KVM can use a low limit on the number of MMU
pages it will allocate. This makes KVM unable to map all of guest memory
at once, prompting spurious faults.

Tested: Ran all kvm-unit-tests on an Intel Haswell machine. This patch
	introduced no new failures.

Signed-off-by: Ben Gardon <bgardon@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 12 ++++++------
 arch/x86/kvm/mmu.c              | 13 ++++++-------
 arch/x86/kvm/mmu.h              |  2 +-
 arch/x86/kvm/x86.c              |  4 ++--
 4 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 159b5988292f3..9b7b731a00321 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 }
 
 #define KVM_PERMILLE_MMU_PAGES 20
-#define KVM_MIN_ALLOC_MMU_PAGES 64
+#define KVM_MIN_ALLOC_MMU_PAGES 64UL
 #define KVM_MMU_HASH_SHIFT 12
 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
 #define KVM_MIN_FREE_MMU_PAGES 5
@@ -844,9 +844,9 @@ enum kvm_irqchip_mode {
 };
 
 struct kvm_arch {
-	unsigned int n_used_mmu_pages;
-	unsigned int n_requested_mmu_pages;
-	unsigned int n_max_mmu_pages;
+	unsigned long n_used_mmu_pages;
+	unsigned long n_requested_mmu_pages;
+	unsigned long n_max_mmu_pages;
 	unsigned int indirect_shadow_pages;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	/*
@@ -1256,8 +1256,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 				   gfn_t gfn_offset, unsigned long mask);
 void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
 
 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
 bool pdptrs_changed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 85f7537289534..e10962dfc2032 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt)
  * aggregate version in order to make the slab shrinker
  * faster
  */
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
 {
 	kvm->arch.n_used_mmu_pages += nr;
 	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
@@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
  * Changing the number of mmu pages allocated to the vm
  * Note: if goal_nr_mmu_pages is too small, you will get dead lock
  */
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
 {
 	LIST_HEAD(invalid_list);
 
@@ -6031,10 +6031,10 @@ out:
 /*
  * Calculate mmu pages needed for kvm.
  */
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
 {
-	unsigned int nr_mmu_pages;
-	unsigned int  nr_pages = 0;
+	unsigned long nr_mmu_pages;
+	unsigned long nr_pages = 0;
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 	int i;
@@ -6047,8 +6047,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
 	}
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
-	nr_mmu_pages = max(nr_mmu_pages,
-			   (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+	nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
 
 	return nr_mmu_pages;
 }
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index bbdc60f2fae89..54c2a377795be 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
 int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 				u64 fault_address, char *insn, int insn_len);
 
-static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
+static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
 {
 	if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
 		return kvm->arch.n_max_mmu_pages -
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 099b851dabafd..455f156f56ede 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4270,7 +4270,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
 }
 
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
-					  u32 kvm_nr_mmu_pages)
+					 unsigned long kvm_nr_mmu_pages)
 {
 	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 		return -EINVAL;
@@ -4284,7 +4284,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 	return 0;
 }
 
-static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
+static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 {
 	return kvm->arch.n_max_mmu_pages;
 }
-- 
GitLab


From 4a58038b9e420276157785afa0a0bbb4b9bc2265 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Wed, 20 Mar 2019 08:12:28 +0000
Subject: [PATCH 1169/1861] Revert "svm: Fix AVIC incomplete IPI emulation"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit bb218fbcfaaa3b115d4cd7a43c0ca164f3a96e57.

As Oren Twaig pointed out the old discussion:

  https://patchwork.kernel.org/patch/8292231/

that the change coud potentially cause an extra IPI to be sent to
the destination vcpu because the AVIC hardware already set the IRR bit
before the incomplete IPI #VMEXIT with id=1 (target vcpu is not running).
Since writting to ICR and ICR2 will also set the IRR. If something triggers
the destination vcpu to get scheduled before the emulation finishes, then
this could result in an additional IPI.

Also, the issue mentioned in the commit bb218fbcfaaa was misdiagnosed.

Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reported-by: Oren Twaig <oren@scalemp.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e0a791c3d4fcc..d7b14c9020526 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4517,14 +4517,25 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
 		kvm_lapic_reg_write(apic, APIC_ICR, icrl);
 		break;
 	case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+		int i;
+		struct kvm_vcpu *vcpu;
+		struct kvm *kvm = svm->vcpu.kvm;
 		struct kvm_lapic *apic = svm->vcpu.arch.apic;
 
 		/*
-		 * Update ICR high and low, then emulate sending IPI,
-		 * which is handled when writing APIC_ICR.
+		 * At this point, we expect that the AVIC HW has already
+		 * set the appropriate IRR bits on the valid target
+		 * vcpus. So, we just need to kick the appropriate vcpu.
 		 */
-		kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
-		kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			bool m = kvm_apic_match_dest(vcpu, apic,
+						     icrl & KVM_APIC_SHORT_MASK,
+						     GET_APIC_DEST_FIELD(icrh),
+						     icrl & KVM_APIC_DEST_MASK);
+
+			if (m && !avic_vcpu_is_running(vcpu))
+				kvm_vcpu_wake_up(vcpu);
+		}
 		break;
 	}
 	case AVIC_IPI_FAILURE_INVALID_TARGET:
-- 
GitLab


From e44e3eacccfd2294a1ce279f68452b1635d7fa82 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Tue, 26 Mar 2019 03:57:37 +0000
Subject: [PATCH 1170/1861] svm/avic: Fix invalidate logical APIC id entry

Only clear the valid bit when invalidate logical APIC id entry.
The current logic clear the valid bit, but also set the rest of
the bits (including reserved bits) to 1.

Fixes: 98d90582be2e ('svm: Fix AVIC DFR and LDR handling')
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d7b14c9020526..933f19d840fe2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -262,6 +262,7 @@ struct amd_svm_iommu_ir {
 };
 
 #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT			31
 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK		(1 << 31)
 
 #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK	(0xFFULL)
@@ -4607,7 +4608,7 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
 	u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
 
 	if (entry)
-		WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK);
+		clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
 }
 
 static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
-- 
GitLab


From 99c221796a810055974b54c02e8f53297e48d146 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 3 Apr 2019 16:06:42 +0200
Subject: [PATCH 1171/1861] KVM: x86: svm: make sure NMI is injected after
 nmi_singlestep

I noticed that apic test from kvm-unit-tests always hangs on my EPYC 7401P,
the hanging test nmi-after-sti is trying to deliver 30000 NMIs and tracing
shows that we're sometimes able to deliver a few but never all.

When we're trying to inject an NMI we may fail to do so immediately for
various reasons, however, we still need to inject it so enable_nmi_window()
arms nmi_singlestep mode. #DB occurs as expected, but we're not checking
for pending NMIs before entering the guest and unless there's a different
event to process, the NMI will never get delivered.

Make KVM_REQ_EVENT request on the vCPU from db_interception() to make sure
pending NMIs are checked and possibly injected.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 933f19d840fe2..c6815aef2cac2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2693,6 +2693,7 @@ static int npf_interception(struct vcpu_svm *svm)
 static int db_interception(struct vcpu_svm *svm)
 {
 	struct kvm_run *kvm_run = svm->vcpu.run;
+	struct kvm_vcpu *vcpu = &svm->vcpu;
 
 	if (!(svm->vcpu.guest_debug &
 	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
@@ -2703,6 +2704,8 @@ static int db_interception(struct vcpu_svm *svm)
 
 	if (svm->nmi_singlestep) {
 		disable_nmi_singlestep(svm);
+		/* Make sure we check for pending NMIs upon entry */
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
 	}
 
 	if (svm->vcpu.guest_debug &
-- 
GitLab


From 1811d979c71621aafc7b879477202d286f7e863b Mon Sep 17 00:00:00 2001
From: WANG Chao <chao.wang@ucloud.cn>
Date: Fri, 12 Apr 2019 15:55:39 +0800
Subject: [PATCH 1172/1861] x86/kvm: move kvm_load/put_guest_xcr0 into atomic
 context

guest xcr0 could leak into host when MCE happens in guest mode. Because
do_machine_check() could schedule out at a few places.

For example:

kvm_load_guest_xcr0
...
kvm_x86_ops->run(vcpu) {
  vmx_vcpu_run
    vmx_complete_atomic_exit
      kvm_machine_check
        do_machine_check
          do_memory_failure
            memory_failure
              lock_page

In this case, host_xcr0 is 0x2ff, guest vcpu xcr0 is 0xff. After schedule
out, host cpu has guest xcr0 loaded (0xff).

In __switch_to {
     switch_fpu_finish
       copy_kernel_to_fpregs
         XRSTORS

If any bit i in XSTATE_BV[i] == 1 and xcr0[i] == 0, XRSTORS will
generate #GP (In this case, bit 9). Then ex_handler_fprestore kicks in
and tries to reinitialize fpu by restoring init fpu state. Same story as
last #GP, except we get DOUBLE FAULT this time.

Cc: stable@vger.kernel.org
Signed-off-by: WANG Chao <chao.wang@ucloud.cn>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c     |  2 ++
 arch/x86/kvm/vmx/vmx.c |  4 ++++
 arch/x86/kvm/x86.c     | 10 ++++------
 arch/x86/kvm/x86.h     |  2 ++
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c6815aef2cac2..675cecb3fa9c2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5636,6 +5636,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
 	clgi();
+	kvm_load_guest_xcr0(vcpu);
 
 	/*
 	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -5781,6 +5782,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_before_interrupt(&svm->vcpu);
 
+	kvm_put_guest_xcr0(vcpu);
 	stgi();
 
 	/* Any pending NMI will happen here */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7a8f75fc6b7e6..88060a621db27 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
+	kvm_load_guest_xcr0(vcpu);
+
 	if (static_cpu_has(X86_FEATURE_PKU) &&
 	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
 	    vcpu->arch.pkru != vmx->host_pkru)
@@ -6506,6 +6508,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 			__write_pkru(vmx->host_pkru);
 	}
 
+	kvm_put_guest_xcr0(vcpu);
+
 	vmx->nested.nested_run_pending = 0;
 	vmx->idt_vectoring_info = 0;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 455f156f56ede..f05891b8df7c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
-static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
 {
 	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
 			!vcpu->guest_xcr0_loaded) {
@@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
 		vcpu->guest_xcr0_loaded = 1;
 	}
 }
+EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
 
-static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->guest_xcr0_loaded) {
 		if (vcpu->arch.xcr0 != host_xcr0)
@@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 		vcpu->guest_xcr0_loaded = 0;
 	}
 }
+EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
 
 static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 {
@@ -7865,8 +7867,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		goto cancel_injection;
 	}
 
-	kvm_load_guest_xcr0(vcpu);
-
 	if (req_immediate_exit) {
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 		kvm_x86_ops->request_immediate_exit(vcpu);
@@ -7919,8 +7919,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
-	kvm_put_guest_xcr0(vcpu);
-
 	kvm_before_interrupt(vcpu);
 	kvm_x86_ops->handle_external_intr(vcpu);
 	kvm_after_interrupt(vcpu);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 28406aa1136d7..aedc5d0d4989b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
 	__this_cpu_write(current_vcpu, NULL);
 }
 
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
 #endif
-- 
GitLab


From 672ff6cff80ca43bf3258410d2b887036969df5f Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 25 Mar 2019 21:10:17 +0200
Subject: [PATCH 1173/1861] KVM: x86: Raise #GP when guest vCPU do not support
 PMU

Before this change, reading a VMware pseduo PMC will succeed even when
PMU is not supported by guest. This can easily be seen by running
kvm-unit-test vmware_backdoors with "-cpu host,-pmu" option.

Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/pmu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 58ead7db71a31..e39741997893a 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -281,9 +281,13 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 {
 	bool fast_mode = idx & (1u << 31);
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
 	u64 ctr_val;
 
+	if (!pmu->version)
+		return 1;
+
 	if (is_vmware_backdoor_pmc(idx))
 		return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
 
-- 
GitLab


From e51bfdb68725dc052d16241ace40ea3140f938aa Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 25 Mar 2019 21:09:17 +0200
Subject: [PATCH 1174/1861] KVM: nVMX: Expose RDPMC-exiting only when guest
 supports PMU

Issue was discovered when running kvm-unit-tests on KVM running as L1 on
top of Hyper-V.

When vmx_instruction_intercept unit-test attempts to run RDPMC to test
RDPMC-exiting, it is intercepted by L1 KVM which it's EXIT_REASON_RDPMC
handler raise #GP because vCPU exposed by Hyper-V doesn't support PMU.
Instead of unit-test expectation to be reflected with EXIT_REASON_RDPMC.

The reason vmx_instruction_intercept unit-test attempts to run RDPMC
even though Hyper-V doesn't support PMU is because L1 expose to L2
support for RDPMC-exiting. Which is reasonable to assume that is
supported only in case CPU supports PMU to being with.

Above issue can easily be simulated by modifying
vmx_instruction_intercept config in x86/unittests.cfg to run QEMU with
"-cpu host,+vmx,-pmu" and run unit-test.

To handle issue, change KVM to expose RDPMC-exiting only when guest
supports PMU.

Reported-by: Saar Amar <saaramar@microsoft.com>
Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 88060a621db27..5866e9e9f1e03 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6856,6 +6856,30 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
 	}
 }
 
+static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *entry;
+	union cpuid10_eax eax;
+
+	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+	if (!entry)
+		return false;
+
+	eax.full = entry->eax;
+	return (eax.split.version_id > 0);
+}
+
+static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	bool pmu_enabled = guest_cpuid_has_pmu(vcpu);
+
+	if (pmu_enabled)
+		vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING;
+	else
+		vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING;
+}
+
 static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6944,6 +6968,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 	if (nested_vmx_allowed(vcpu)) {
 		nested_vmx_cr_fixed1_bits_update(vcpu);
 		nested_vmx_entry_exit_ctls_update(vcpu);
+		nested_vmx_procbased_ctls_update(vcpu);
 	}
 
 	if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
-- 
GitLab


From ed19321fb6571214f410b30322e4ad6e6b7c3915 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 2 Apr 2019 08:03:09 -0700
Subject: [PATCH 1175/1861] KVM: x86: Load SMRAM in a single shot when leaving
 SMM

RSM emulation is currently broken on VMX when the interrupted guest has
CR4.VMXE=1.  Rather than dance around the issue of HF_SMM_MASK being set
when loading SMSTATE into architectural state, ideally RSM emulation
itself would be reworked to clear HF_SMM_MASK prior to loading non-SMM
architectural state.

Ostensibly, the only motivation for having HF_SMM_MASK set throughout
the loading of state from the SMRAM save state area is so that the
memory accesses from GET_SMSTATE() are tagged with role.smm.  Load
all of the SMRAM save state area from guest memory at the beginning of
RSM emulation, and load state from the buffer instead of reading guest
memory one-by-one.

This paves the way for clearing HF_SMM_MASK prior to loading state,
and also aligns RSM with the enter_smm() behavior, which fills a
buffer and writes SMRAM save state in a single go.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |   3 +-
 arch/x86/include/asm/kvm_host.h    |   5 +-
 arch/x86/kvm/emulate.c             | 149 ++++++++++++++---------------
 arch/x86/kvm/svm.c                 |  20 ++--
 arch/x86/kvm/vmx/vmx.c             |   2 +-
 arch/x86/kvm/x86.c                 |   5 +-
 6 files changed, 92 insertions(+), 92 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 93c4bf598fb06..ec489c4328509 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -226,7 +226,8 @@ struct x86_emulate_ops {
 
 	unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
 	void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
-	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
+	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
+			     const char *smstate);
 
 };
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9b7b731a00321..a9d03af340307 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1182,7 +1182,7 @@ struct kvm_x86_ops {
 
 	int (*smi_allowed)(struct kvm_vcpu *vcpu);
 	int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
-	int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
+	int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
 	int (*enable_smi_window)(struct kvm_vcpu *vcpu);
 
 	int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
@@ -1592,4 +1592,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define put_smstate(type, buf, offset, val)                      \
 	*(type *)((buf) + (offset) - 0x7e00) = val
 
+#define GET_SMSTATE(type, buf, offset)		\
+	(*(type *)((buf) + (offset) - 0x7e00))
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c338984c850d2..ae0d289b50fef 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2339,16 +2339,6 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
 	return edx & bit(X86_FEATURE_LM);
 }
 
-#define GET_SMSTATE(type, smbase, offset)				  \
-	({								  \
-	 type __val;							  \
-	 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val,      \
-				      sizeof(__val));			  \
-	 if (r != X86EMUL_CONTINUE)					  \
-		 return X86EMUL_UNHANDLEABLE;				  \
-	 __val;								  \
-	})
-
 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
 {
 	desc->g    = (flags >> 23) & 1;
@@ -2361,27 +2351,29 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
 	desc->type = (flags >>  8) & 15;
 }
 
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+			   int n)
 {
 	struct desc_struct desc;
 	int offset;
 	u16 selector;
 
-	selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+	selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
 
 	if (n < 3)
 		offset = 0x7f84 + n * 12;
 	else
 		offset = 0x7f2c + (n - 3) * 12;
 
-	set_desc_base(&desc,      GET_SMSTATE(u32, smbase, offset + 8));
-	set_desc_limit(&desc,     GET_SMSTATE(u32, smbase, offset + 4));
-	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
+	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
+	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
 	ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
 	return X86EMUL_CONTINUE;
 }
 
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+			   int n)
 {
 	struct desc_struct desc;
 	int offset;
@@ -2390,11 +2382,11 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
 
 	offset = 0x7e00 + n * 16;
 
-	selector =                GET_SMSTATE(u16, smbase, offset);
-	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
-	set_desc_limit(&desc,     GET_SMSTATE(u32, smbase, offset + 4));
-	set_desc_base(&desc,      GET_SMSTATE(u32, smbase, offset + 8));
-	base3 =                   GET_SMSTATE(u32, smbase, offset + 12);
+	selector =                GET_SMSTATE(u16, smstate, offset);
+	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
+	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
+	base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
 
 	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
 	return X86EMUL_CONTINUE;
@@ -2445,7 +2437,8 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
 	return X86EMUL_CONTINUE;
 }
 
-static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+			     const char *smstate)
 {
 	struct desc_struct desc;
 	struct desc_ptr dt;
@@ -2453,53 +2446,54 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
 	u32 val, cr0, cr3, cr4;
 	int i;
 
-	cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
-	cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
-	ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
-	ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
+	cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
+	cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
+	ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+	ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
 
 	for (i = 0; i < 8; i++)
-		*reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+		*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
 
-	val = GET_SMSTATE(u32, smbase, 0x7fcc);
+	val = GET_SMSTATE(u32, smstate, 0x7fcc);
 	ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
-	val = GET_SMSTATE(u32, smbase, 0x7fc8);
+	val = GET_SMSTATE(u32, smstate, 0x7fc8);
 	ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
 
-	selector =                 GET_SMSTATE(u32, smbase, 0x7fc4);
-	set_desc_base(&desc,       GET_SMSTATE(u32, smbase, 0x7f64));
-	set_desc_limit(&desc,      GET_SMSTATE(u32, smbase, 0x7f60));
-	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smbase, 0x7f5c));
+	selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
+	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
+	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
+	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
 	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
 
-	selector =                 GET_SMSTATE(u32, smbase, 0x7fc0);
-	set_desc_base(&desc,       GET_SMSTATE(u32, smbase, 0x7f80));
-	set_desc_limit(&desc,      GET_SMSTATE(u32, smbase, 0x7f7c));
-	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smbase, 0x7f78));
+	selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
+	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
+	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
+	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
 	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
 
-	dt.address =               GET_SMSTATE(u32, smbase, 0x7f74);
-	dt.size =                  GET_SMSTATE(u32, smbase, 0x7f70);
+	dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
+	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
 	ctxt->ops->set_gdt(ctxt, &dt);
 
-	dt.address =               GET_SMSTATE(u32, smbase, 0x7f58);
-	dt.size =                  GET_SMSTATE(u32, smbase, 0x7f54);
+	dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
+	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
 	ctxt->ops->set_idt(ctxt, &dt);
 
 	for (i = 0; i < 6; i++) {
-		int r = rsm_load_seg_32(ctxt, smbase, i);
+		int r = rsm_load_seg_32(ctxt, smstate, i);
 		if (r != X86EMUL_CONTINUE)
 			return r;
 	}
 
-	cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+	cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
 
-	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
 
 	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
 }
 
-static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+			     const char *smstate)
 {
 	struct desc_struct desc;
 	struct desc_ptr dt;
@@ -2509,43 +2503,43 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 	int i, r;
 
 	for (i = 0; i < 16; i++)
-		*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+		*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
 
-	ctxt->_eip   = GET_SMSTATE(u64, smbase, 0x7f78);
-	ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+	ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
+	ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
 
-	val = GET_SMSTATE(u32, smbase, 0x7f68);
+	val = GET_SMSTATE(u32, smstate, 0x7f68);
 	ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
-	val = GET_SMSTATE(u32, smbase, 0x7f60);
+	val = GET_SMSTATE(u32, smstate, 0x7f60);
 	ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
 
-	cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
-	cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
-	cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
-	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
-	val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
+	cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
+	cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
+	cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
+	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+	val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
 	ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
 
-	selector =                  GET_SMSTATE(u32, smbase, 0x7e90);
-	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smbase, 0x7e92) << 8);
-	set_desc_limit(&desc,       GET_SMSTATE(u32, smbase, 0x7e94));
-	set_desc_base(&desc,        GET_SMSTATE(u32, smbase, 0x7e98));
-	base3 =                     GET_SMSTATE(u32, smbase, 0x7e9c);
+	selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
+	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
+	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
+	base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
 	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
 
-	dt.size =                   GET_SMSTATE(u32, smbase, 0x7e84);
-	dt.address =                GET_SMSTATE(u64, smbase, 0x7e88);
+	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
+	dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
 	ctxt->ops->set_idt(ctxt, &dt);
 
-	selector =                  GET_SMSTATE(u32, smbase, 0x7e70);
-	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smbase, 0x7e72) << 8);
-	set_desc_limit(&desc,       GET_SMSTATE(u32, smbase, 0x7e74));
-	set_desc_base(&desc,        GET_SMSTATE(u32, smbase, 0x7e78));
-	base3 =                     GET_SMSTATE(u32, smbase, 0x7e7c);
+	selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
+	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
+	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
+	base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
 	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
 
-	dt.size =                   GET_SMSTATE(u32, smbase, 0x7e64);
-	dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
+	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
+	dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
 	ctxt->ops->set_gdt(ctxt, &dt);
 
 	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
@@ -2553,7 +2547,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 		return r;
 
 	for (i = 0; i < 6; i++) {
-		r = rsm_load_seg_64(ctxt, smbase, i);
+		r = rsm_load_seg_64(ctxt, smstate, i);
 		if (r != X86EMUL_CONTINUE)
 			return r;
 	}
@@ -2564,12 +2558,19 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 static int em_rsm(struct x86_emulate_ctxt *ctxt)
 {
 	unsigned long cr0, cr4, efer;
+	char buf[512];
 	u64 smbase;
 	int ret;
 
 	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
 		return emulate_ud(ctxt);
 
+	smbase = ctxt->ops->get_smbase(ctxt);
+
+	ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+	if (ret != X86EMUL_CONTINUE)
+		return X86EMUL_UNHANDLEABLE;
+
 	/*
 	 * Get back to real mode, to prepare a safe state in which to load
 	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
@@ -2605,20 +2606,18 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	efer = 0;
 	ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
 
-	smbase = ctxt->ops->get_smbase(ctxt);
-
 	/*
 	 * Give pre_leave_smm() a chance to make ISA-specific changes to the
 	 * vCPU state (e.g. enter guest mode) before loading state from the SMM
 	 * state-save area.
 	 */
-	if (ctxt->ops->pre_leave_smm(ctxt, smbase))
+	if (ctxt->ops->pre_leave_smm(ctxt, buf))
 		return X86EMUL_UNHANDLEABLE;
 
 	if (emulator_has_longmode(ctxt))
-		ret = rsm_load_state_64(ctxt, smbase + 0x8000);
+		ret = rsm_load_state_64(ctxt, buf);
 	else
-		ret = rsm_load_state_32(ctxt, smbase + 0x8000);
+		ret = rsm_load_state_32(ctxt, buf);
 
 	if (ret != X86EMUL_CONTINUE) {
 		/* FIXME: should triple fault */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 675cecb3fa9c2..6b1cd73e4053d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6232,27 +6232,23 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 	return 0;
 }
 
-static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *nested_vmcb;
 	struct page *page;
-	struct {
-		u64 guest;
-		u64 vmcb;
-	} svm_state_save;
+	u64 guest;
+	u64 vmcb;
 	int ret;
 
-	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
-				  sizeof(svm_state_save));
-	if (ret)
-		return ret;
+	guest = GET_SMSTATE(u64, smstate, 0x7ed8);
+	vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
 
-	if (svm_state_save.guest) {
+	if (guest) {
 		vcpu->arch.hflags &= ~HF_SMM_MASK;
-		nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
+		nested_vmcb = nested_svm_map(svm, vmcb, &page);
 		if (nested_vmcb)
-			enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
+			enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
 		else
 			ret = 1;
 		vcpu->arch.hflags |= HF_SMM_MASK;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5866e9e9f1e03..14ea25eadde8b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7398,7 +7398,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 	return 0;
 }
 
-static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int ret;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f05891b8df7c1..6ee1f9e5d3fb7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5963,9 +5963,10 @@ static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_fla
 	kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
 }
 
-static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
+				  const char *smstate)
 {
-	return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
+	return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
 }
 
 static const struct x86_emulate_ops emulate_ops = {
-- 
GitLab


From c5833c7a43a66bfe2f36439cb2f1281a588668af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 2 Apr 2019 08:03:10 -0700
Subject: [PATCH 1176/1861] KVM: x86: Open code kvm_set_hflags

Prepare for clearing HF_SMM_MASK prior to loading state from the SMRAM
save state map, i.e. kvm_smm_changed() needs to be called after state
has been loaded and so cannot be done automatically when setting
hflags from RSM.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  1 +
 arch/x86/kvm/emulate.c             |  3 +++
 arch/x86/kvm/x86.c                 | 33 ++++++++++++++----------------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index ec489c4328509..feab24cac610e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -228,6 +228,7 @@ struct x86_emulate_ops {
 	void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
 	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
 			     const char *smstate);
+	void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
 
 };
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ae0d289b50fef..a6b2828532530 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2629,6 +2629,9 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 
 	ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
 		~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
+
+	ctxt->ops->post_leave_smm(ctxt);
+
 	return X86EMUL_CONTINUE;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6ee1f9e5d3fb7..472bbbbe153a3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3530,7 +3530,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 	memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
+static void kvm_smm_changed(struct kvm_vcpu *vcpu);
 
 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 					      struct kvm_vcpu_events *events)
@@ -3590,12 +3590,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 		vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
 	if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
-		u32 hflags = vcpu->arch.hflags;
-		if (events->smi.smm)
-			hflags |= HF_SMM_MASK;
-		else
-			hflags &= ~HF_SMM_MASK;
-		kvm_set_hflags(vcpu, hflags);
+		if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+			if (events->smi.smm)
+				vcpu->arch.hflags |= HF_SMM_MASK;
+			else
+				vcpu->arch.hflags &= ~HF_SMM_MASK;
+			kvm_smm_changed(vcpu);
+		}
 
 		vcpu->arch.smi_pending = events->smi.pending;
 
@@ -5960,7 +5961,7 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
 
 static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
 {
-	kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+	emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
 }
 
 static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -5969,6 +5970,11 @@ static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
 	return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
 }
 
+static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+	kvm_smm_changed(emul_to_vcpu(ctxt));
+}
+
 static const struct x86_emulate_ops emulate_ops = {
 	.read_gpr            = emulator_read_gpr,
 	.write_gpr           = emulator_write_gpr,
@@ -6009,6 +6015,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.get_hflags          = emulator_get_hflags,
 	.set_hflags          = emulator_set_hflags,
 	.pre_leave_smm       = emulator_pre_leave_smm,
+	.post_leave_smm      = emulator_post_leave_smm,
 };
 
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -6250,16 +6257,6 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
 	kvm_mmu_reset_context(vcpu);
 }
 
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
-{
-	unsigned changed = vcpu->arch.hflags ^ emul_flags;
-
-	vcpu->arch.hflags = emul_flags;
-
-	if (changed & HF_SMM_MASK)
-		kvm_smm_changed(vcpu);
-}
-
 static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 				unsigned long *db)
 {
-- 
GitLab


From 9ec19493fb86d6d5fbf9286b94ff21e56ef66376 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 2 Apr 2019 08:03:11 -0700
Subject: [PATCH 1177/1861] KVM: x86: clear SMM flags before loading state
 while leaving SMM

RSM emulation is currently broken on VMX when the interrupted guest has
CR4.VMXE=1.  Stop dancing around the issue of HF_SMM_MASK being set when
loading SMSTATE into architectural state, e.g. by toggling it for
problematic flows, and simply clear HF_SMM_MASK prior to loading
architectural state (from SMRAM save state area).

Reported-by: Jon Doron <arilou@gmail.com>
Cc: Jim Mattson <jmattson@google.com>
Cc: Liran Alon <liran.alon@oracle.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Fixes: 5bea5123cbf0 ("KVM: VMX: check nested state and CR4.VMXE against SMM")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 12 ++++++------
 arch/x86/kvm/svm.c     | 12 ++++--------
 arch/x86/kvm/vmx/vmx.c |  2 --
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a6b2828532530..f526acee2eed6 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2571,6 +2571,12 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	if (ret != X86EMUL_CONTINUE)
 		return X86EMUL_UNHANDLEABLE;
 
+	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+		ctxt->ops->set_nmi_mask(ctxt, false);
+
+	ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
+		~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
+
 	/*
 	 * Get back to real mode, to prepare a safe state in which to load
 	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
@@ -2624,12 +2630,6 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 		return X86EMUL_UNHANDLEABLE;
 	}
 
-	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
-		ctxt->ops->set_nmi_mask(ctxt, false);
-
-	ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
-		~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
-
 	ctxt->ops->post_leave_smm(ctxt);
 
 	return X86EMUL_CONTINUE;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6b1cd73e4053d..406b558abfef7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6239,21 +6239,17 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 	struct page *page;
 	u64 guest;
 	u64 vmcb;
-	int ret;
 
 	guest = GET_SMSTATE(u64, smstate, 0x7ed8);
 	vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
 
 	if (guest) {
-		vcpu->arch.hflags &= ~HF_SMM_MASK;
 		nested_vmcb = nested_svm_map(svm, vmcb, &page);
-		if (nested_vmcb)
-			enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
-		else
-			ret = 1;
-		vcpu->arch.hflags |= HF_SMM_MASK;
+		if (!nested_vmcb)
+			return 1;
+		enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
 	}
-	return ret;
+	return 0;
 }
 
 static int enable_smi_window(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 14ea25eadde8b..b4e7d645275a2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7409,9 +7409,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 	}
 
 	if (vmx->nested.smm.guest_mode) {
-		vcpu->arch.hflags &= ~HF_SMM_MASK;
 		ret = nested_vmx_enter_non_root_mode(vcpu, false);
-		vcpu->arch.hflags |= HF_SMM_MASK;
 		if (ret)
 			return ret;
 
-- 
GitLab


From 8f4dc2e77cdfaf7e644ef29693fa229db29ee1de Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 2 Apr 2019 08:10:47 -0700
Subject: [PATCH 1178/1861] KVM: x86: Don't clear EFER during SMM transitions
 for 32-bit vCPU

Neither AMD nor Intel CPUs have an EFER field in the legacy SMRAM save
state area, i.e. don't save/restore EFER across SMM transitions.  KVM
somewhat models this, e.g. doesn't clear EFER on entry to SMM if the
guest doesn't support long mode.  But during RSM, KVM unconditionally
clears EFER so that it can get back to pure 32-bit mode in order to
start loading CRs with their actual non-SMM values.

Clear EFER only when it will be written when loading the non-SMM state
so as to preserve bits that can theoretically be set on 32-bit vCPUs,
e.g. KVM always emulates EFER_SCE.

And because CR4.PAE is cleared only to play nice with EFER, wrap that
code in the long mode check as well.  Note, this may result in a
compiler warning about cr4 being consumed uninitialized.  Re-read CR4
even though it's technically unnecessary, as doing so allows for more
readable code and RSM emulation is not a performance critical path.

Fixes: 660a5d517aaab ("KVM: x86: save/load state on SMM switch")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f526acee2eed6..f3284827c432d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2582,15 +2582,13 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
 	 * supports long mode.
 	 */
-	cr4 = ctxt->ops->get_cr(ctxt, 4);
 	if (emulator_has_longmode(ctxt)) {
 		struct desc_struct cs_desc;
 
 		/* Zero CR4.PCIDE before CR0.PG.  */
-		if (cr4 & X86_CR4_PCIDE) {
+		cr4 = ctxt->ops->get_cr(ctxt, 4);
+		if (cr4 & X86_CR4_PCIDE)
 			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
-			cr4 &= ~X86_CR4_PCIDE;
-		}
 
 		/* A 32-bit code segment is required to clear EFER.LMA.  */
 		memset(&cs_desc, 0, sizeof(cs_desc));
@@ -2604,13 +2602,16 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	if (cr0 & X86_CR0_PE)
 		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
 
-	/* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
-	if (cr4 & X86_CR4_PAE)
-		ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+	if (emulator_has_longmode(ctxt)) {
+		/* Clear CR4.PAE before clearing EFER.LME. */
+		cr4 = ctxt->ops->get_cr(ctxt, 4);
+		if (cr4 & X86_CR4_PAE)
+			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
 
-	/* And finally go back to 32-bit mode.  */
-	efer = 0;
-	ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+		/* And finally go back to 32-bit mode.  */
+		efer = 0;
+		ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+	}
 
 	/*
 	 * Give pre_leave_smm() a chance to make ISA-specific changes to the
-- 
GitLab


From b68f3cc7d978943fcf85148165b00594c38db776 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 2 Apr 2019 08:10:48 -0700
Subject: [PATCH 1179/1861] KVM: x86: Always use 32-bit SMRAM save state for
 32-bit kernels

Invoking the 64-bit variation on a 32-bit kenrel will crash the guest,
trigger a WARN, and/or lead to a buffer overrun in the host, e.g.
rsm_load_state_64() writes r8-r15 unconditionally, but enum kvm_reg and
thus x86_emulate_ctxt._regs only define r8-r15 for CONFIG_X86_64.

KVM allows userspace to report long mode support via CPUID, even though
the guest is all but guaranteed to crash if it actually tries to enable
long mode.  But, a pure 32-bit guest that is ignorant of long mode will
happily plod along.

SMM complicates things as 64-bit CPUs use a different SMRAM save state
area.  KVM handles this correctly for 64-bit kernels, e.g. uses the
legacy save state map if userspace has hid long mode from the guest,
but doesn't fare well when userspace reports long mode support on a
32-bit host kernel (32-bit KVM doesn't support 64-bit guests).

Since the alternative is to crash the guest, e.g. by not loading state
or explicitly requesting shutdown, unconditionally use the legacy SMRAM
save state map for 32-bit KVM.  If a guest has managed to get far enough
to handle SMIs when running under a weird/buggy userspace hypervisor,
then don't deliberately crash the guest since there are no downsides
(from KVM's perspective) to allow it to continue running.

Fixes: 660a5d517aaab ("KVM: x86: save/load state on SMM switch")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 10 ++++++++++
 arch/x86/kvm/x86.c     | 10 ++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f3284827c432d..d0d5dd44b4f47 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2331,12 +2331,16 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
 
 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
 {
+#ifdef CONFIG_X86_64
 	u32 eax, ebx, ecx, edx;
 
 	eax = 0x80000001;
 	ecx = 0;
 	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
 	return edx & bit(X86_FEATURE_LM);
+#else
+	return false;
+#endif
 }
 
 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
@@ -2372,6 +2376,7 @@ static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
 	return X86EMUL_CONTINUE;
 }
 
+#ifdef CONFIG_X86_64
 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
 			   int n)
 {
@@ -2391,6 +2396,7 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
 	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
 	return X86EMUL_CONTINUE;
 }
+#endif
 
 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
 				    u64 cr0, u64 cr3, u64 cr4)
@@ -2492,6 +2498,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
 	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
 }
 
+#ifdef CONFIG_X86_64
 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
 			     const char *smstate)
 {
@@ -2554,6 +2561,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
 
 	return X86EMUL_CONTINUE;
 }
+#endif
 
 static int em_rsm(struct x86_emulate_ctxt *ctxt)
 {
@@ -2621,9 +2629,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->pre_leave_smm(ctxt, buf))
 		return X86EMUL_UNHANDLEABLE;
 
+#ifdef CONFIG_X86_64
 	if (emulator_has_longmode(ctxt))
 		ret = rsm_load_state_64(ctxt, buf);
 	else
+#endif
 		ret = rsm_load_state_32(ctxt, buf);
 
 	if (ret != X86EMUL_CONTINUE) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 472bbbbe153a3..f10fef561573c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7441,9 +7441,9 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
 	put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
 }
 
+#ifdef CONFIG_X86_64
 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 {
-#ifdef CONFIG_X86_64
 	struct desc_ptr dt;
 	struct kvm_segment seg;
 	unsigned long val;
@@ -7493,10 +7493,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 
 	for (i = 0; i < 6; i++)
 		enter_smm_save_seg_64(vcpu, buf, i);
-#else
-	WARN_ON_ONCE(1);
-#endif
 }
+#endif
 
 static void enter_smm(struct kvm_vcpu *vcpu)
 {
@@ -7507,9 +7505,11 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 
 	trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
 	memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
 		enter_smm_save_state_64(vcpu, buf);
 	else
+#endif
 		enter_smm_save_state_32(vcpu, buf);
 
 	/*
@@ -7567,8 +7567,10 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
 	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
 
+#ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
 		kvm_x86_ops->set_efer(vcpu, 0);
+#endif
 
 	kvm_update_cpuid(vcpu);
 	kvm_mmu_reset_context(vcpu);
-- 
GitLab


From c68c21ca929771a1f128d886359f9229d31cf80c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2019 15:57:14 +0200
Subject: [PATCH 1180/1861] selftests: kvm/evmcs_test: complete I/O before
 migrating guest state

Starting state migration after an IO exit without first completing IO
may result in test failures.  We already have two tests that need this
(this patch in fact fixes evmcs_test, similar to what was fixed for
state_test in commit 0f73bbc851ed, "KVM: selftests: complete IO before
migrating guest state", 2019-03-13) and a third is coming.  So, move the
code to vcpu_save_state, and while at it do not access register state
until after I/O is complete.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c        |  5 +++++
 .../testing/selftests/kvm/lib/x86_64/processor.c  |  8 ++++++++
 tools/testing/selftests/kvm/x86_64/evmcs_test.c   |  5 +++--
 tools/testing/selftests/kvm/x86_64/state_test.c   | 15 +--------------
 4 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index efa0aad8b3c69..4ca96b228e46b 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -91,6 +91,11 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
 	if (vm->kvm_fd < 0)
 		exit(KSFT_SKIP);
 
+	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
+		fprintf(stderr, "immediate_exit not available, skipping test\n");
+		exit(KSFT_SKIP);
+	}
+
 	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
 	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
 		"rc: %i errno: %i", vm->fd, errno);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f28127f4a3af6..b363c9611bd64 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1030,6 +1030,14 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
 			    nested_size, sizeof(state->nested_));
 	}
 
+	/*
+	 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+	 * guest state is consistent only after userspace re-enters the
+	 * kernel with KVM_RUN.  Complete IO prior to migrating state
+	 * to a new VM.
+	 */
+	vcpu_run_complete_io(vm, vcpuid);
+
 	nmsrs = kvm_get_num_msrs(vm);
 	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
 	list->nmsrs = nmsrs;
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index c49c2a28b0eb2..36669684eca58 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -123,8 +123,6 @@ int main(int argc, char *argv[])
 			    stage, run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		memset(&regs1, 0, sizeof(regs1));
-		vcpu_regs_get(vm, VCPU_ID, &regs1);
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_ABORT:
 			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
@@ -144,6 +142,9 @@ int main(int argc, char *argv[])
 			    stage, (ulong)uc.args[1]);
 
 		state = vcpu_save_state(vm, VCPU_ID);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+
 		kvm_vm_release(vm);
 
 		/* Restore state in a new VM.  */
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 30f75856cf398..e0a3c0204b7cd 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -134,11 +134,6 @@ int main(int argc, char *argv[])
 
 	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
-	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
-		fprintf(stderr, "immediate_exit not available, skipping test\n");
-		exit(KSFT_SKIP);
-	}
-
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
@@ -179,18 +174,10 @@ int main(int argc, char *argv[])
 			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
 			    stage, (ulong)uc.args[1]);
 
-		/*
-		 * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
-		 * guest state is consistent only after userspace re-enters the
-		 * kernel with KVM_RUN.  Complete IO prior to migrating state
-		 * to a new VM.
-		 */
-		vcpu_run_complete_io(vm, VCPU_ID);
-
+		state = vcpu_save_state(vm, VCPU_ID);
 		memset(&regs1, 0, sizeof(regs1));
 		vcpu_regs_get(vm, VCPU_ID, &regs1);
 
-		state = vcpu_save_state(vm, VCPU_ID);
 		kvm_vm_release(vm);
 
 		/* Restore state in a new VM.  */
-- 
GitLab


From c2390f16fc5b847a22f442a190d459beba07e86f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2019 15:51:19 +0200
Subject: [PATCH 1181/1861] selftests: kvm: fix for compilers that do not
 support -no-pie

-no-pie was added to GCC at the same time as their configuration option
--enable-default-pie.  Compilers that were built before do not have
-no-pie, but they also do not need it.  Detect the option at build
time.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 7514fcea91a73..2d2e10b219d55 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,3 +1,5 @@
+include ../../../../scripts/Kbuild.include
+
 all:
 
 top_srcdir = ../../../..
@@ -30,7 +32,11 @@ INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
 LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
 CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
-LDFLAGS += -pthread -no-pie
+
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+        $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += -pthread $(no-pie-option)
 
 # After inclusion, $(OUTPUT) is defined and
 # $(TEST_GEN_PROGS) starts with $(OUTPUT)/
-- 
GitLab


From 79904c9de0d1a6cd66853b3af802343b0ba3720c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 10 Apr 2019 11:38:33 +0200
Subject: [PATCH 1182/1861] selftests: kvm: add a selftest for SMM

Add a simple test for SMM, based on VMX.  The test implements its own
sync between the guest and the host as using our ucall library seems to
be too cumbersome: SMI handler is happening in real-address mode.

This patch also fixes KVM_SET_NESTED_STATE to happen after
KVM_SET_VCPU_EVENTS, in fact it places it last.  This is because
KVM needs to know whether the processor is in SMM or not.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/include/x86_64/processor.h  |  27 +++
 .../selftests/kvm/lib/x86_64/processor.c      |  12 +-
 tools/testing/selftests/kvm/x86_64/smm_test.c | 157 ++++++++++++++++++
 4 files changed, 191 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/x86_64/smm_test.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 2d2e10b219d55..f8588cca2bef4 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -19,6 +19,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index e2884c2b81fff..6063d5b2f3561 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -778,6 +778,33 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
 #define MSR_IA32_APICBASE_ENABLE	(1<<11)
 #define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
 
+#define APIC_BASE_MSR	0x800
+#define X2APIC_ENABLE	(1UL << 10)
+#define	APIC_ICR	0x300
+#define		APIC_DEST_SELF		0x40000
+#define		APIC_DEST_ALLINC	0x80000
+#define		APIC_DEST_ALLBUT	0xC0000
+#define		APIC_ICR_RR_MASK	0x30000
+#define		APIC_ICR_RR_INVALID	0x00000
+#define		APIC_ICR_RR_INPROG	0x10000
+#define		APIC_ICR_RR_VALID	0x20000
+#define		APIC_INT_LEVELTRIG	0x08000
+#define		APIC_INT_ASSERT		0x04000
+#define		APIC_ICR_BUSY		0x01000
+#define		APIC_DEST_LOGICAL	0x00800
+#define		APIC_DEST_PHYSICAL	0x00000
+#define		APIC_DM_FIXED		0x00000
+#define		APIC_DM_FIXED_MASK	0x00700
+#define		APIC_DM_LOWEST		0x00100
+#define		APIC_DM_SMI		0x00200
+#define		APIC_DM_REMRD		0x00300
+#define		APIC_DM_NMI		0x00400
+#define		APIC_DM_INIT		0x00500
+#define		APIC_DM_STARTUP		0x00600
+#define		APIC_DM_EXTINT		0x00700
+#define		APIC_VECTOR_MASK	0x000FF
+#define	APIC_ICR2	0x310
+
 #define MSR_IA32_TSCDEADLINE		0x000006e0
 
 #define MSR_IA32_UCODE_WRITE		0x00000079
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index b363c9611bd64..dc7fae9fa424c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1101,12 +1101,6 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int r;
 
-	if (state->nested.size) {
-		r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
-		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
-			r);
-	}
-
 	r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
         TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
                 r);
@@ -1138,4 +1132,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
 	r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
         TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
                 r);
+
+	if (state->nested.size) {
+		r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+			r);
+	}
 }
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
new file mode 100644
index 0000000000000..fb8086964d83b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for SMM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID	      1
+
+#define PAGE_SIZE  4096
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+#define SYNC_PORT 0xe
+#define DONE 0xff
+
+/*
+ * This is compiled as normal 64-bit code, however, SMI handler is executed
+ * in real-address mode. To stay simple we're limiting ourselves to a mode
+ * independent subset of asm here.
+ * SMI handler always report back fixed stage SMRAM_STAGE.
+ */
+uint8_t smi_handler[] = {
+	0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
+	0xe4, SYNC_PORT,      /* in $SYNC_PORT, %al */
+	0x0f, 0xaa,           /* rsm */
+};
+
+void sync_with_host(uint64_t phase)
+{
+	asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
+		     : : "a" (phase));
+}
+
+void self_smi(void)
+{
+	wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
+	      APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+
+	sync_with_host(1);
+
+	wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
+
+	sync_with_host(2);
+
+	self_smi();
+
+	sync_with_host(4);
+
+	if (vmx_pages) {
+		GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+		sync_with_host(5);
+
+		self_smi();
+
+		sync_with_host(7);
+	}
+
+	sync_with_host(DONE);
+}
+
+int main(int argc, char *argv[])
+{
+	struct vmx_pages *vmx_pages = NULL;
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	int stage, stage_reported;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
+				    SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+	TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
+		    == SMRAM_GPA, "could not allocate guest physical addresses?");
+
+	memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
+	memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
+	       sizeof(smi_handler));
+
+	vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	} else {
+		printf("will skip SMM test with VMX enabled\n");
+		vcpu_args_set(vm, VCPU_ID, 1, 0);
+	}
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		memset(&regs, 0, sizeof(regs));
+		vcpu_regs_get(vm, VCPU_ID, &regs);
+
+		stage_reported = regs.rax & 0xff;
+
+		if (stage_reported == DONE)
+			goto done;
+
+		TEST_ASSERT(stage_reported == stage ||
+			    stage_reported == SMRAM_STAGE,
+			    "Unexpected stage: #%x, got %x",
+			    stage, stage_reported);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		kvm_vm_release(vm);
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID, 0, 0);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
-- 
GitLab


From be43c440eb5d0ccfdb0d67d5a4c9d579ff988b75 Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hariprasad.kelam@gmail.com>
Date: Sat, 6 Apr 2019 15:06:58 +0530
Subject: [PATCH 1183/1861] KVM: x86: fix warning Using plain integer as NULL
 pointer

Changed passing argument as "0 to NULL" which resolves below sparse warning

arch/x86/kvm/x86.c:3096:61: warning: Using plain integer as NULL pointer

Signed-off-by: Hariprasad Kelam <hariprasad.kelam@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f10fef561573c..a0d1fc80ac5a8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3095,7 +3095,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 	case KVM_CAP_NESTED_STATE:
 		r = kvm_x86_ops->get_nested_state ?
-			kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+			kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
 		break;
 	default:
 		break;
-- 
GitLab


From 1d487e9bf8ba66a7174c56a0029c54b1eca8f99c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2019 11:16:47 +0200
Subject: [PATCH 1184/1861] KVM: fix spectrev1 gadgets

These were found with smatch, and then generalized when applicable.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c     |  4 +++-
 include/linux/kvm_host.h | 10 ++++++----
 virt/kvm/irqchip.c       |  5 +++--
 virt/kvm/kvm_main.c      |  6 ++++--
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 991fdf7fc17fb..9bf70cf845648 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -138,6 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 		if (offset <= max_apic_id) {
 			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 
+			offset = array_index_nospec(offset, map->max_apic_id + 1);
 			*cluster = &map->phys_map[offset];
 			*mask = dest_id & (0xffff >> (16 - cluster_size));
 		} else {
@@ -901,7 +902,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 		if (irq->dest_id > map->max_apic_id) {
 			*bitmap = 0;
 		} else {
-			*dst = &map->phys_map[irq->dest_id];
+			u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
+			*dst = &map->phys_map[dest_id];
 			*bitmap = 1;
 		}
 		return true;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9d55c63db09b5..640a03642766b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -28,6 +28,7 @@
 #include <linux/irqbypass.h>
 #include <linux/swait.h>
 #include <linux/refcount.h>
+#include <linux/nospec.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -513,10 +514,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
 
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 {
-	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
-	 * the caller has read kvm->online_vcpus before (as is the case
-	 * for kvm_for_each_vcpu, for example).
-	 */
+	int num_vcpus = atomic_read(&kvm->online_vcpus);
+	i = array_index_nospec(i, num_vcpus);
+
+	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu.  */
 	smp_rmb();
 	return kvm->vcpus[i];
 }
@@ -600,6 +601,7 @@ void kvm_put_kvm(struct kvm *kvm);
 
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
+	as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
 	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
 			lockdep_is_held(&kvm->slots_lock) ||
 			!refcount_read(&kvm->users_count));
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3547b0d8c91ea..79e59e4fa3dc6 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm,
 {
 	struct kvm_kernel_irq_routing_entry *ei;
 	int r;
+	u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES);
 
 	/*
 	 * Do not allow GSI to be mapped to the same irqchip more than once.
 	 * Allow only one to one mapping between GSI and non-irqchip routing.
 	 */
-	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
+	hlist_for_each_entry(ei, &rt->map[gsi], link)
 		if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
 		    ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
 		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
 			return -EINVAL;
 
-	e->gsi = ue->gsi;
+	e->gsi = gsi;
 	e->type = ue->type;
 	r = kvm_set_routing_entry(kvm, e, ue);
 	if (r)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 55fe8e20d8fd9..dc8edc97ba850 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2977,12 +2977,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	struct kvm_device_ops *ops = NULL;
 	struct kvm_device *dev;
 	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+	int type;
 	int ret;
 
 	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
 		return -ENODEV;
 
-	ops = kvm_device_ops_table[cd->type];
+	type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table));
+	ops = kvm_device_ops_table[type];
 	if (ops == NULL)
 		return -ENODEV;
 
@@ -2997,7 +2999,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	dev->kvm = kvm;
 
 	mutex_lock(&kvm->lock);
-	ret = ops->create(dev, cd->type);
+	ret = ops->create(dev, type);
 	if (ret < 0) {
 		mutex_unlock(&kvm->lock);
 		kfree(dev);
-- 
GitLab


From 7a223e06b1a411cef6c4cd7a9b9a33c8d225b10e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 27 Mar 2019 15:12:20 +0100
Subject: [PATCH 1185/1861] KVM: x86: avoid misreporting level-triggered irqs
 as edge-triggered in tracing

In __apic_accept_irq() interface trig_mode is int and actually on some code
paths it is set above u8:

kvm_apic_set_irq() extracts it from 'struct kvm_lapic_irq' where trig_mode
is u16. This is done on purpose as e.g. kvm_set_msi_irq() sets it to
(1 << 15) & e->msi.data

kvm_apic_local_deliver sets it to reg & (1 << 15).

Fix the immediate issue by making 'tm' into u16. We may also want to adjust
__apic_accept_irq() interface and use proper sizes for vector, level,
trig_mode but this is not urgent.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/trace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6432d08c7de79..4d47a2631d1fb 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi,
 );
 
 TRACE_EVENT(kvm_apic_accept_irq,
-	    TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+	    TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
 	    TP_ARGS(apicid, dm, tm, vec),
 
 	TP_STRUCT__entry(
 		__field(	__u32,		apicid		)
 		__field(	__u16,		dm		)
-		__field(	__u8,		tm		)
+		__field(	__u16,		tm		)
 		__field(	__u8,		vec		)
 	),
 
-- 
GitLab


From 8c2f870890fd28e023b0fcf49dcee333f2c8bad7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 16 Apr 2019 15:25:00 +0200
Subject: [PATCH 1186/1861] ALSA: info: Fix racy addition/deletion of nodes

The ALSA proc helper manages the child nodes in a linked list, but its
addition and deletion is done without any lock.  This leads to a
corruption if they are operated concurrently.  Usually this isn't a
problem because the proc entries are added sequentially in the driver
probe procedure itself.  But the card registrations are done often
asynchronously, and the crash could be actually reproduced with
syzkaller.

This patch papers over it by protecting the link addition and deletion
with the parent's mutex.  There is "access" mutex that is used for the
file access, and this can be reused for this purpose as well.

Reported-by: syzbot+48df349490c36f9f54ab@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/info.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sound/core/info.c b/sound/core/info.c
index 96a074019c33c..0eb169acc8503 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -713,8 +713,11 @@ snd_info_create_entry(const char *name, struct snd_info_entry *parent,
 	INIT_LIST_HEAD(&entry->list);
 	entry->parent = parent;
 	entry->module = module;
-	if (parent)
+	if (parent) {
+		mutex_lock(&parent->access);
 		list_add_tail(&entry->list, &parent->children);
+		mutex_unlock(&parent->access);
+	}
 	return entry;
 }
 
@@ -792,7 +795,12 @@ void snd_info_free_entry(struct snd_info_entry * entry)
 	list_for_each_entry_safe(p, n, &entry->children, list)
 		snd_info_free_entry(p);
 
-	list_del(&entry->list);
+	p = entry->parent;
+	if (p) {
+		mutex_lock(&p->access);
+		list_del(&entry->list);
+		mutex_unlock(&p->access);
+	}
 	kfree(entry->name);
 	if (entry->private_free)
 		entry->private_free(entry);
-- 
GitLab


From 14c9b31a925a9f5c647523a12e2b575b97c0aa47 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 8 Apr 2019 12:33:55 -0500
Subject: [PATCH 1187/1861] perf header: Fix lock/unlock imbalances when
 processing BPF/BTF info

Fix lock/unlock imbalances by refactoring the code a bit and adding
calls to up_write() before return.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Addresses-Coverity-ID: 1444315 ("Missing unlock")
Addresses-Coverity-ID: 1444316 ("Missing unlock")
Fixes: a70a1123174a ("perf bpf: Save BTF information as headers to perf.data")
Fixes: 606f972b1361 ("perf bpf: Save bpf_prog_info information as headers to perf.data")
Link: http://lkml.kernel.org/r/20190408173355.GA10501@embeddedor
[ Simplified the exit path to have just one up_write() + return ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/header.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b9e693825873a..2d2af2ac2b1e9 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2606,6 +2606,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
 		perf_env__insert_bpf_prog_info(env, info_node);
 	}
 
+	up_write(&env->bpf_progs.lock);
 	return 0;
 out:
 	free(info_linear);
@@ -2623,7 +2624,9 @@ static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data _
 static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
 {
 	struct perf_env *env = &ff->ph->env;
+	struct btf_node *node = NULL;
 	u32 count, i;
+	int err = -1;
 
 	if (ff->ph->needs_swap) {
 		pr_warning("interpreting btf from systems with endianity is not yet supported\n");
@@ -2636,31 +2639,32 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
 	down_write(&env->bpf_progs.lock);
 
 	for (i = 0; i < count; ++i) {
-		struct btf_node *node;
 		u32 id, data_size;
 
 		if (do_read_u32(ff, &id))
-			return -1;
+			goto out;
 		if (do_read_u32(ff, &data_size))
-			return -1;
+			goto out;
 
 		node = malloc(sizeof(struct btf_node) + data_size);
 		if (!node)
-			return -1;
+			goto out;
 
 		node->id = id;
 		node->data_size = data_size;
 
-		if (__do_read(ff, node->data, data_size)) {
-			free(node);
-			return -1;
-		}
+		if (__do_read(ff, node->data, data_size))
+			goto out;
 
 		perf_env__insert_btf(env, node);
+		node = NULL;
 	}
 
+	err = 0;
+out:
 	up_write(&env->bpf_progs.lock);
-	return 0;
+	free(node);
+	return err;
 }
 
 struct feature_ops {
-- 
GitLab


From 6e4b1cac30d297718218dc268199ed20df074b98 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 9 Apr 2019 09:25:57 +0300
Subject: [PATCH 1188/1861] perf scripts python: export-to-sqlite.py: Fix use
 of parent_id in calls_view

Fix following error using calls_view:

 Query failed: ambiguous column name: parent_id Unable to execute statement

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Fixes: 8ce9a7251d11 ("perf scripts python: export-to-sqlite.py: Export calls parent_id")
Link: http://lkml.kernel.org/r/20190409062557.26138-1-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/export-to-sqlite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index 3b71902a5a211..bf271fbc3a885 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -331,7 +331,7 @@ if perf_db_export_calls:
 			'return_id,'
 			'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,'
 			'parent_call_path_id,'
-			'parent_id'
+			'calls.parent_id'
 		' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
 
 do_query(query, 'CREATE VIEW samples_view AS '
-- 
GitLab


From 8002a63f9ace7e9c958408f77f0a4dd4a8414511 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 9 Apr 2019 12:01:56 +0200
Subject: [PATCH 1189/1861] perf stat: Disable DIR_FORMAT feature for 'perf
 stat record'

Arnaldo reported assertion in perf stat record:

  assertion failed at util/header.c:875

There's no support for this in the 'perf state record' command, disable
the feature for that case.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 258031c017c3 ("perf header: Add DIR_FORMAT feature to describe directory data")
Link: http://lkml.kernel.org/r/20190409100156.20303-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 49ee3c2033ecb..c3625ec374e06 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1308,6 +1308,7 @@ static void init_features(struct perf_session *session)
 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 		perf_header__set_feat(&session->header, feat);
 
+	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
 	perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 	perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
-- 
GitLab


From f32c2877bcb068a718bb70094cd59ccc29d4d082 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Tue, 9 Apr 2019 11:15:29 +0200
Subject: [PATCH 1190/1861] tools lib traceevent: Fix missing equality check
 for strcmp

There was a missing comparison with 0 when checking if type is "s64" or
"u64". Therefore, the body of the if-statement was entered if "type" was
"u64" or not "s64", which made the first strcmp() redundant since if
type is "u64", it's not "s64".

If type is "s64", the body of the if-statement is not entered but since
the remainder of the function consists of if-statements which will not
be entered if type is "s64", we will just return "val", which is
correct, albeit at the cost of a few more calls to strcmp(), i.e., it
will behave just as if the if-statement was entered.

If type is neither "s64" or "u64", the body of the if-statement will be
entered incorrectly and "val" returned. This means that any type that is
checked after "s64" and "u64" is handled the same way as "s64" and
"u64", i.e., the limiting of "val" to fit in for example "s8" is never
reached.

This was introduced in the kernel tree when the sources were copied from
trace-cmd in commit f7d82350e597 ("tools/events: Add files to create
libtraceevent.a"), and in the trace-cmd repo in 1cdbae6035cei
("Implement typecasting in parser") when the function was introduced,
i.e., it has always behaved the wrong way.

Detected by cppcheck.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Tzvetomir Stoyanov <tstoyanov@vmware.com>
Fixes: f7d82350e597 ("tools/events: Add files to create libtraceevent.a")
Link: http://lkml.kernel.org/r/20190409091529.2686-1-rikard.falkeborn@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 87494c7c619d8..981c6ce2da2c7 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -2233,7 +2233,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer)
 		return val & 0xffffffff;
 
 	if (strcmp(type, "u64") == 0 ||
-	    strcmp(type, "s64"))
+	    strcmp(type, "s64") == 0)
 		return val;
 
 	if (strcmp(type, "s8") == 0)
-- 
GitLab


From 3a5b64f05d7fe36dea0dde26423e3044fbacd482 Mon Sep 17 00:00:00 2001
From: Mao Han <han_mao@c-sky.com>
Date: Wed, 10 Apr 2019 16:16:43 +0800
Subject: [PATCH 1191/1861] perf evsel: Use hweight64() instead of
 hweight_long(attr.sample_regs_user)

On 32-bits platform with more than 32 registers, the 64 bits mask is
truncate to the lower 32 bits and the return value of hweight_long will
always smaller than 32. When kernel outputs more than 32 registers, but
the user perf program only counts 32, there will be a data mismatch
result to overflow check fail.

Signed-off-by: Mao Han <han_mao@c-sky.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Fixes: 6a21c0b5c2ab ("perf tools: Add core support for sampling intr machine state regs")
Fixes: d03f2170546d ("perf tools: Expand perf_event__synthesize_sample()")
Fixes: 0f6a30150ca2 ("perf tools: Support user regs and stack in sample parsing")
Link: http://lkml.kernel.org/r/29ad7947dc8fd1ff0abd2093a72cc27a2446be9f.1554883878.git.han_mao@c-sky.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 66d066f18b5b2..966360844fffb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2368,7 +2368,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		if (data->user_regs.abi) {
 			u64 mask = evsel->attr.sample_regs_user;
 
-			sz = hweight_long(mask) * sizeof(u64);
+			sz = hweight64(mask) * sizeof(u64);
 			OVERFLOW_CHECK(array, sz, max_size);
 			data->user_regs.mask = mask;
 			data->user_regs.regs = (u64 *)array;
@@ -2424,7 +2424,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
 			u64 mask = evsel->attr.sample_regs_intr;
 
-			sz = hweight_long(mask) * sizeof(u64);
+			sz = hweight64(mask) * sizeof(u64);
 			OVERFLOW_CHECK(array, sz, max_size);
 			data->intr_regs.mask = mask;
 			data->intr_regs.regs = (u64 *)array;
@@ -2552,7 +2552,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 	if (type & PERF_SAMPLE_REGS_USER) {
 		if (sample->user_regs.abi) {
 			result += sizeof(u64);
-			sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+			sz = hweight64(sample->user_regs.mask) * sizeof(u64);
 			result += sz;
 		} else {
 			result += sizeof(u64);
@@ -2580,7 +2580,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 	if (type & PERF_SAMPLE_REGS_INTR) {
 		if (sample->intr_regs.abi) {
 			result += sizeof(u64);
-			sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+			sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
 			result += sz;
 		} else {
 			result += sizeof(u64);
@@ -2710,7 +2710,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 	if (type & PERF_SAMPLE_REGS_USER) {
 		if (sample->user_regs.abi) {
 			*array++ = sample->user_regs.abi;
-			sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+			sz = hweight64(sample->user_regs.mask) * sizeof(u64);
 			memcpy(array, sample->user_regs.regs, sz);
 			array = (void *)array + sz;
 		} else {
@@ -2746,7 +2746,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 	if (type & PERF_SAMPLE_REGS_INTR) {
 		if (sample->intr_regs.abi) {
 			*array++ = sample->intr_regs.abi;
-			sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+			sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
 			memcpy(array, sample->intr_regs.regs, sz);
 			array = (void *)array + sz;
 		} else {
-- 
GitLab


From 6a3eb3360667170988f8a6477f6686242061488a Mon Sep 17 00:00:00 2001
From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
Date: Sat, 6 Apr 2019 15:47:38 +0800
Subject: [PATCH 1192/1861] cifs: Fix use-after-free in SMB2_write

There is a KASAN use-after-free:
BUG: KASAN: use-after-free in SMB2_write+0x1342/0x1580
Read of size 8 at addr ffff8880b6a8e450 by task ln/4196

Should not release the 'req' because it will use in the trace.

Fixes: eccb4422cf97 ("smb3: Add ftrace tracepoints for improved SMB3 debugging")

Signed-off-by: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org> 4.18+
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2pdu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 21ad01d55ab2d..5d1f8d2d44e43 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3769,7 +3769,6 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
 
 	rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst,
 			    &resp_buftype, flags, &rsp_iov);
-	cifs_small_buf_release(req);
 	rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
 
 	if (rc) {
@@ -3787,6 +3786,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
 				     io_parms->offset, *nbytes);
 	}
 
+	cifs_small_buf_release(req);
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }
-- 
GitLab


From 088aaf17aa79300cab14dbee2569c58cfafd7d6e Mon Sep 17 00:00:00 2001
From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
Date: Sat, 6 Apr 2019 15:47:39 +0800
Subject: [PATCH 1193/1861] cifs: Fix use-after-free in SMB2_read

There is a KASAN use-after-free:
BUG: KASAN: use-after-free in SMB2_read+0x1136/0x1190
Read of size 8 at addr ffff8880b4e45e50 by task ln/1009

Should not release the 'req' because it will use in the trace.

Fixes: eccb4422cf97 ("smb3: Add ftrace tracepoints for improved SMB3 debugging")

Signed-off-by: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org> 4.18+
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2pdu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5d1f8d2d44e43..5d6adc63ad621 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3448,8 +3448,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
 	rqst.rq_nvec = 1;
 
 	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
-	cifs_small_buf_release(req);
-
 	rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
 
 	if (rc) {
@@ -3471,6 +3469,8 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
 				    io_parms->tcon->tid, ses->Suid,
 				    io_parms->offset, io_parms->length);
 
+	cifs_small_buf_release(req);
+
 	*nbytes = le32_to_cpu(rsp->DataLength);
 	if ((*nbytes > CIFS_MAX_MSGSIZE) ||
 	    (*nbytes > io_parms->length)) {
-- 
GitLab


From b57a55e2200ede754e4dc9cce4ba9402544b9365 Mon Sep 17 00:00:00 2001
From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
Date: Sat, 6 Apr 2019 15:30:38 +0800
Subject: [PATCH 1194/1861] cifs: Fix lease buffer length error

There is a KASAN slab-out-of-bounds:
BUG: KASAN: slab-out-of-bounds in _copy_from_iter_full+0x783/0xaa0
Read of size 80 at addr ffff88810c35e180 by task mount.cifs/539

CPU: 1 PID: 539 Comm: mount.cifs Not tainted 4.19 #10
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
            rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
Call Trace:
 dump_stack+0xdd/0x12a
 print_address_description+0xa7/0x540
 kasan_report+0x1ff/0x550
 check_memory_region+0x2f1/0x310
 memcpy+0x2f/0x80
 _copy_from_iter_full+0x783/0xaa0
 tcp_sendmsg_locked+0x1840/0x4140
 tcp_sendmsg+0x37/0x60
 inet_sendmsg+0x18c/0x490
 sock_sendmsg+0xae/0x130
 smb_send_kvec+0x29c/0x520
 __smb_send_rqst+0x3ef/0xc60
 smb_send_rqst+0x25a/0x2e0
 compound_send_recv+0x9e8/0x2af0
 cifs_send_recv+0x24/0x30
 SMB2_open+0x35e/0x1620
 open_shroot+0x27b/0x490
 smb2_open_op_close+0x4e1/0x590
 smb2_query_path_info+0x2ac/0x650
 cifs_get_inode_info+0x1058/0x28f0
 cifs_root_iget+0x3bb/0xf80
 cifs_smb3_do_mount+0xe00/0x14c0
 cifs_do_mount+0x15/0x20
 mount_fs+0x5e/0x290
 vfs_kern_mount+0x88/0x460
 do_mount+0x398/0x31e0
 ksys_mount+0xc6/0x150
 __x64_sys_mount+0xea/0x190
 do_syscall_64+0x122/0x590
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

It can be reproduced by the following step:
  1. samba configured with: server max protocol = SMB2_10
  2. mount -o vers=default

When parse the mount version parameter, the 'ops' and 'vals'
was setted to smb30,  if negotiate result is smb21, just
update the 'ops' to smb21, but the 'vals' is still smb30.
When add lease context, the iov_base is allocated with smb21
ops, but the iov_len is initiallited with the smb30. Because
the iov_len is longer than iov_base, when send the message,
copy array out of bounds.

we need to keep the 'ops' and 'vals' consistent.

Fixes: 9764c02fcbad ("SMB3: Add support for multidialect negotiate (SMB2.1 and later)")
Fixes: d5c7076b772a ("smb3: add smb3.1.1 to default dialect list")

Signed-off-by: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2pdu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5d6adc63ad621..b8f7262ac3541 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -832,8 +832,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 		} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
 			/* ops set to 3.0 by default for default so update */
 			ses->server->ops = &smb21_operations;
-		} else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+			ses->server->vals = &smb21_values;
+		} else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
 			ses->server->ops = &smb311_operations;
+			ses->server->vals = &smb311_values;
+		}
 	} else if (le16_to_cpu(rsp->DialectRevision) !=
 				ses->server->vals->protocol_id) {
 		/* if requested single dialect ensure returned dialect matched */
-- 
GitLab


From e6d0fb7b34f264f72c33053558a360a6a734905e Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 10 Apr 2019 07:47:22 +1000
Subject: [PATCH 1195/1861] cifs: fix handle leak in smb2_query_symlink()

If we enter smb2_query_symlink() for something that is not a symlink
and where the SMB2_open() would succeed we would never end up
closing this handle and would thus leak a handle on the server.

Fix this by immediately calling SMB2_close() on successfull open.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2ops.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 00225e699d036..c36ff0d1fe2a8 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2389,6 +2389,8 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov,
 		       &resp_buftype);
+	if (!rc)
+		SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
 	if (!rc || !err_iov.iov_base) {
 		rc = -ENOENT;
 		goto free_path;
-- 
GitLab


From b98749cac4a695f084a5ff076f4510b23e353ecd Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Fri, 29 Mar 2019 10:49:12 +0100
Subject: [PATCH 1196/1861] CIFS: keep FileInfo handle live during oplock break

In the oplock break handler, writing pending changes from pages puts
the FileInfo handle. If the refcount reaches zero it closes the handle
and waits for any oplock break handler to return, thus causing a deadlock.

To prevent this situation:

* We add a wait flag to cifsFileInfo_put() to decide whether we should
  wait for running/pending oplock break handlers

* We keep an additionnal reference of the SMB FileInfo handle so that
  for the rest of the handler putting the handle won't close it.
  - The ref is bumped everytime we queue the handler via the
    cifs_queue_oplock_break() helper.
  - The ref is decremented at the end of the handler

This bug was triggered by xfstest 464.

Also important fix to address the various reports of
oops in smb2_push_mandatory_locks

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/cifsglob.h |  2 ++
 fs/cifs/file.c     | 30 +++++++++++++++++++++++++-----
 fs/cifs/misc.c     | 25 +++++++++++++++++++++++--
 fs/cifs/smb2misc.c |  6 +++---
 4 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5b18d45857409..585ad3207cb12 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1333,6 +1333,7 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
 }
 
 struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr);
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
 
 #define CIFS_CACHE_READ_FLG	1
@@ -1855,6 +1856,7 @@ GLOBAL_EXTERN spinlock_t gidsidlock;
 #endif /* CONFIG_CIFS_ACL */
 
 void cifs_oplock_break(struct work_struct *work);
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
 
 extern const struct slow_work_ops cifs_oplock_break_ops;
 extern struct workqueue_struct *cifsiod_wq;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 89006e044973e..9c0ccc06d172e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -360,12 +360,30 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 	return cifs_file;
 }
 
-/*
- * Release a reference on the file private data. This may involve closing
- * the filehandle out on the server. Must be called without holding
- * tcon->open_file_lock and cifs_file->file_info_lock.
+/**
+ * cifsFileInfo_put - release a reference of file priv data
+ *
+ * Always potentially wait for oplock handler. See _cifsFileInfo_put().
  */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
+{
+	_cifsFileInfo_put(cifs_file, true);
+}
+
+/**
+ * _cifsFileInfo_put - release a reference of file priv data
+ *
+ * This may involve closing the filehandle @cifs_file out on the
+ * server. Must be called without holding tcon->open_file_lock and
+ * cifs_file->file_info_lock.
+ *
+ * If @wait_for_oplock_handler is true and we are releasing the last
+ * reference, wait for any running oplock break handler of the file
+ * and cancel any pending one. If calling this function from the
+ * oplock break handler, you need to pass false.
+ *
+ */
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
 {
 	struct inode *inode = d_inode(cifs_file->dentry);
 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
@@ -414,7 +432,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 
 	spin_unlock(&tcon->open_file_lock);
 
-	oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
+	oplock_break_cancelled = wait_oplock_handler ?
+		cancel_work_sync(&cifs_file->oplock_break) : false;
 
 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 		struct TCP_Server_Info *server = tcon->ses->server;
@@ -4603,6 +4622,7 @@ void cifs_oplock_break(struct work_struct *work)
 							     cinode);
 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
 	}
+	_cifsFileInfo_put(cfile, false /* do not wait for ourself */);
 	cifs_done_oplock_break(cinode);
 }
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index bee203055b300..1e1626a2cfc39 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -501,8 +501,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
 					   CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
 					   &pCifsInode->flags);
 
-				queue_work(cifsoplockd_wq,
-					   &netfile->oplock_break);
+				cifs_queue_oplock_break(netfile);
 				netfile->oplock_break_cancelled = false;
 
 				spin_unlock(&tcon->open_file_lock);
@@ -607,6 +606,28 @@ void cifs_put_writer(struct cifsInodeInfo *cinode)
 	spin_unlock(&cinode->writers_lock);
 }
 
+/**
+ * cifs_queue_oplock_break - queue the oplock break handler for cfile
+ *
+ * This function is called from the demultiplex thread when it
+ * receives an oplock break for @cfile.
+ *
+ * Assumes the tcon->open_file_lock is held.
+ * Assumes cfile->file_info_lock is NOT held.
+ */
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile)
+{
+	/*
+	 * Bump the handle refcount now while we hold the
+	 * open_file_lock to enforce the validity of it for the oplock
+	 * break handler. The matching put is done at the end of the
+	 * handler.
+	 */
+	cifsFileInfo_get(cfile);
+
+	queue_work(cifsoplockd_wq, &cfile->oplock_break);
+}
+
 void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
 {
 	clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 0e3570e40ff8e..e311f58dc1c82 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -555,7 +555,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
 			clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
 				  &cinode->flags);
 
-		queue_work(cifsoplockd_wq, &cfile->oplock_break);
+		cifs_queue_oplock_break(cfile);
 		kfree(lw);
 		return true;
 	}
@@ -712,8 +712,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 					   CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
 					   &cinode->flags);
 				spin_unlock(&cfile->file_info_lock);
-				queue_work(cifsoplockd_wq,
-					   &cfile->oplock_break);
+
+				cifs_queue_oplock_break(cfile);
 
 				spin_unlock(&tcon->open_file_lock);
 				spin_unlock(&cifs_tcp_ses_lock);
-- 
GitLab


From e2abb398115e9c33f3d1e25bf6d1d08badc58b13 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Tue, 16 Apr 2019 15:06:21 +0100
Subject: [PATCH 1197/1861] sched/fair: Remove unneeded prototype of
 capacity_of()

The prototype of that function was already hoisted up in:

  commit 3b1baa6496e6 ("sched/fair: Add 'group_misfit_task' load-balance type")

but that seems to have been missed. Get rid of the extra prototype.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Acked-by: Quentin Perret <quentin.perret@arm.com>
Cc: Dietmar.Eggemann@arm.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: morten.rasmussen@arm.com
Fixes: 2802bf3cd936 ("sched/fair: Add over-utilization/tipping point indicator")
Link: http://lkml.kernel.org/r/20190416140621.19884-1-valentin.schneider@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ed7f5f8107b71..b6cc0703b8509 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5116,7 +5116,6 @@ static inline void hrtick_update(struct rq *rq)
 
 #ifdef CONFIG_SMP
 static inline unsigned long cpu_util(int cpu);
-static unsigned long capacity_of(int cpu);
 
 static inline bool cpu_overutilized(int cpu)
 {
-- 
GitLab


From 2e8e19226398db8265a8e675fcc0118b9e80c9e8 Mon Sep 17 00:00:00 2001
From: Phil Auld <pauld@redhat.com>
Date: Tue, 19 Mar 2019 09:00:05 -0400
Subject: [PATCH 1198/1861] sched/fair: Limit sched_cfs_period_timer() loop to
 avoid hard lockup

With extremely short cfs_period_us setting on a parent task group with a large
number of children the for loop in sched_cfs_period_timer() can run until the
watchdog fires. There is no guarantee that the call to hrtimer_forward_now()
will ever return 0.  The large number of children can make
do_sched_cfs_period_timer() take longer than the period.

 NMI watchdog: Watchdog detected hard LOCKUP on cpu 24
 RIP: 0010:tg_nop+0x0/0x10
  <IRQ>
  walk_tg_tree_from+0x29/0xb0
  unthrottle_cfs_rq+0xe0/0x1a0
  distribute_cfs_runtime+0xd3/0xf0
  sched_cfs_period_timer+0xcb/0x160
  ? sched_cfs_slack_timer+0xd0/0xd0
  __hrtimer_run_queues+0xfb/0x270
  hrtimer_interrupt+0x122/0x270
  smp_apic_timer_interrupt+0x6a/0x140
  apic_timer_interrupt+0xf/0x20
  </IRQ>

To prevent this we add protection to the loop that detects when the loop has run
too many times and scales the period and quota up, proportionally, so that the timer
can complete before then next period expires.  This preserves the relative runtime
quota while preventing the hard lockup.

A warning is issued reporting this state and the new values.

Signed-off-by: Phil Auld <pauld@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190319130005.25492-1-pauld@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 40bd1e27b1b79..a4d9e14bf1389 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4885,6 +4885,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+extern const u64 max_cfs_quota_period;
+
 static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 {
 	struct cfs_bandwidth *cfs_b =
@@ -4892,6 +4894,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 	unsigned long flags;
 	int overrun;
 	int idle = 0;
+	int count = 0;
 
 	raw_spin_lock_irqsave(&cfs_b->lock, flags);
 	for (;;) {
@@ -4899,6 +4902,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 		if (!overrun)
 			break;
 
+		if (++count > 3) {
+			u64 new, old = ktime_to_ns(cfs_b->period);
+
+			new = (old * 147) / 128; /* ~115% */
+			new = min(new, max_cfs_quota_period);
+
+			cfs_b->period = ns_to_ktime(new);
+
+			/* since max is 1s, this is limited to 1e9^2, which fits in u64 */
+			cfs_b->quota *= new;
+			cfs_b->quota = div64_u64(cfs_b->quota, old);
+
+			pr_warn_ratelimited(
+	"cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
+				smp_processor_id(),
+				div_u64(new, NSEC_PER_USEC),
+				div_u64(cfs_b->quota, NSEC_PER_USEC));
+
+			/* reset count so we don't come right back in here */
+			count = 0;
+		}
+
 		idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
 	}
 	if (idle)
-- 
GitLab


From 1b02cd6a2d7f3e2a6a5262887d2cb2912083e42f Mon Sep 17 00:00:00 2001
From: luca abeni <luca.abeni@santannapisa.it>
Date: Mon, 25 Mar 2019 14:15:30 +0100
Subject: [PATCH 1199/1861] sched/deadline: Correctly handle active 0-lag
 timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

syzbot reported the following warning:

   [ ] WARNING: CPU: 4 PID: 17089 at kernel/sched/deadline.c:255 task_non_contending+0xae0/0x1950

line 255 of deadline.c is:

	WARN_ON(hrtimer_active(&dl_se->inactive_timer));

in task_non_contending().

Unfortunately, in some cases (for example, a deadline task
continuosly blocking and waking immediately) it can happen that
a task blocks (and task_non_contending() is called) while the
0-lag timer is still active.

In this case, the safest thing to do is to immediately decrease
the running bandwidth of the task, without trying to re-arm the 0-lag timer.

Signed-off-by: luca abeni <luca.abeni@santannapisa.it>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Juri Lelli <juri.lelli@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: chengjian (D) <cj.chengjian@huawei.com>
Link: https://lkml.kernel.org/r/20190325131530.34706-1-luca.abeni@santannapisa.it
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6a73e41a20160..43901fa3f2693 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,6 @@ static void task_non_contending(struct task_struct *p)
 	if (dl_entity_is_special(dl_se))
 		return;
 
-	WARN_ON(hrtimer_active(&dl_se->inactive_timer));
 	WARN_ON(dl_se->dl_non_contending);
 
 	zerolag_time = dl_se->deadline -
@@ -269,7 +268,7 @@ static void task_non_contending(struct task_struct *p)
 	 * If the "0-lag time" already passed, decrease the active
 	 * utilization now, instead of starting a timer
 	 */
-	if (zerolag_time < 0) {
+	if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
 		if (dl_task(p))
 			sub_running_bw(dl_se, dl_rq);
 		if (!dl_task(p) || p->state == TASK_DEAD) {
-- 
GitLab


From 6d25be5782e482eb93e3de0c94d0a517879377d0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Mar 2019 17:55:48 +0100
Subject: [PATCH 1200/1861] sched/core, workqueues: Distangle worker accounting
 from rq lock

The worker accounting for CPU bound workers is plugged into the core
scheduler code and the wakeup code. This is not a hard requirement and
can be avoided by keeping track of the state in the workqueue code
itself.

Keep track of the sleeping state in the worker itself and call the
notifier before entering the core scheduler. There might be false
positives when the task is woken between that call and actually
scheduling, but that's not really different from scheduling and being
woken immediately after switching away. When nr_running is updated when
the task is retunrning from schedule() then it is later compared when it
is done from ttwu().

[ bigeasy: preempt_disable() around wq_worker_sleeping() by Daniel Bristot de Oliveira ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/ad2b29b5715f970bffc1a7026cabd6ff0b24076a.1532952814.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c         | 88 +++++++++----------------------------
 kernel/workqueue.c          | 54 ++++++++++-------------
 kernel/workqueue_internal.h |  5 ++-
 3 files changed, 48 insertions(+), 99 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4778c48a7fda4..6184a0856aab7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1685,10 +1685,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
 {
 	activate_task(rq, p, en_flags);
 	p->on_rq = TASK_ON_RQ_QUEUED;
-
-	/* If a worker is waking up, notify the workqueue: */
-	if (p->flags & PF_WQ_WORKER)
-		wq_worker_waking_up(p, cpu_of(rq));
 }
 
 /*
@@ -2106,56 +2102,6 @@ out:
 	return success;
 }
 
-/**
- * try_to_wake_up_local - try to wake up a local task with rq lock held
- * @p: the thread to be awakened
- * @rf: request-queue flags for pinning
- *
- * Put @p on the run-queue if it's not already there. The caller must
- * ensure that this_rq() is locked, @p is bound to this_rq() and not
- * the current task.
- */
-static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
-{
-	struct rq *rq = task_rq(p);
-
-	if (WARN_ON_ONCE(rq != this_rq()) ||
-	    WARN_ON_ONCE(p == current))
-		return;
-
-	lockdep_assert_held(&rq->lock);
-
-	if (!raw_spin_trylock(&p->pi_lock)) {
-		/*
-		 * This is OK, because current is on_cpu, which avoids it being
-		 * picked for load-balance and preemption/IRQs are still
-		 * disabled avoiding further scheduler activity on it and we've
-		 * not yet picked a replacement task.
-		 */
-		rq_unlock(rq, rf);
-		raw_spin_lock(&p->pi_lock);
-		rq_relock(rq, rf);
-	}
-
-	if (!(p->state & TASK_NORMAL))
-		goto out;
-
-	trace_sched_waking(p);
-
-	if (!task_on_rq_queued(p)) {
-		if (p->in_iowait) {
-			delayacct_blkio_end(p);
-			atomic_dec(&rq->nr_iowait);
-		}
-		ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
-	}
-
-	ttwu_do_wakeup(rq, p, 0, rf);
-	ttwu_stat(p, smp_processor_id(), 0);
-out:
-	raw_spin_unlock(&p->pi_lock);
-}
-
 /**
  * wake_up_process - Wake up a specific process
  * @p: The process to be woken up.
@@ -3472,19 +3418,6 @@ static void __sched notrace __schedule(bool preempt)
 				atomic_inc(&rq->nr_iowait);
 				delayacct_blkio_start();
 			}
-
-			/*
-			 * If a worker went to sleep, notify and ask workqueue
-			 * whether it wants to wake up a task to maintain
-			 * concurrency.
-			 */
-			if (prev->flags & PF_WQ_WORKER) {
-				struct task_struct *to_wakeup;
-
-				to_wakeup = wq_worker_sleeping(prev);
-				if (to_wakeup)
-					try_to_wake_up_local(to_wakeup, &rf);
-			}
 		}
 		switch_count = &prev->nvcsw;
 	}
@@ -3544,6 +3477,20 @@ static inline void sched_submit_work(struct task_struct *tsk)
 {
 	if (!tsk->state || tsk_is_pi_blocked(tsk))
 		return;
+
+	/*
+	 * If a worker went to sleep, notify and ask workqueue whether
+	 * it wants to wake up a task to maintain concurrency.
+	 * As this function is called inside the schedule() context,
+	 * we disable preemption to avoid it calling schedule() again
+	 * in the possible wakeup of a kworker.
+	 */
+	if (tsk->flags & PF_WQ_WORKER) {
+		preempt_disable();
+		wq_worker_sleeping(tsk);
+		preempt_enable_no_resched();
+	}
+
 	/*
 	 * If we are going to sleep and we have plugged IO queued,
 	 * make sure to submit it to avoid deadlocks.
@@ -3552,6 +3499,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
 		blk_schedule_flush_plug(tsk);
 }
 
+static void sched_update_worker(struct task_struct *tsk)
+{
+	if (tsk->flags & PF_WQ_WORKER)
+		wq_worker_running(tsk);
+}
+
 asmlinkage __visible void __sched schedule(void)
 {
 	struct task_struct *tsk = current;
@@ -3562,6 +3515,7 @@ asmlinkage __visible void __sched schedule(void)
 		__schedule(false);
 		sched_preempt_enable_no_resched();
 	} while (need_resched());
+	sched_update_worker(tsk);
 }
 EXPORT_SYMBOL(schedule);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ddee541ea97aa..56180c9286f50 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -841,43 +841,32 @@ static void wake_up_worker(struct worker_pool *pool)
 }
 
 /**
- * wq_worker_waking_up - a worker is waking up
+ * wq_worker_running - a worker is running again
  * @task: task waking up
- * @cpu: CPU @task is waking up to
  *
- * This function is called during try_to_wake_up() when a worker is
- * being awoken.
- *
- * CONTEXT:
- * spin_lock_irq(rq->lock)
+ * This function is called when a worker returns from schedule()
  */
-void wq_worker_waking_up(struct task_struct *task, int cpu)
+void wq_worker_running(struct task_struct *task)
 {
 	struct worker *worker = kthread_data(task);
 
-	if (!(worker->flags & WORKER_NOT_RUNNING)) {
-		WARN_ON_ONCE(worker->pool->cpu != cpu);
+	if (!worker->sleeping)
+		return;
+	if (!(worker->flags & WORKER_NOT_RUNNING))
 		atomic_inc(&worker->pool->nr_running);
-	}
+	worker->sleeping = 0;
 }
 
 /**
  * wq_worker_sleeping - a worker is going to sleep
  * @task: task going to sleep
  *
- * This function is called during schedule() when a busy worker is
- * going to sleep.  Worker on the same cpu can be woken up by
- * returning pointer to its task.
- *
- * CONTEXT:
- * spin_lock_irq(rq->lock)
- *
- * Return:
- * Worker task on @cpu to wake up, %NULL if none.
+ * This function is called from schedule() when a busy worker is
+ * going to sleep.
  */
-struct task_struct *wq_worker_sleeping(struct task_struct *task)
+void wq_worker_sleeping(struct task_struct *task)
 {
-	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
+	struct worker *next, *worker = kthread_data(task);
 	struct worker_pool *pool;
 
 	/*
@@ -886,13 +875,15 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
 	 * checking NOT_RUNNING.
 	 */
 	if (worker->flags & WORKER_NOT_RUNNING)
-		return NULL;
+		return;
 
 	pool = worker->pool;
 
-	/* this can only happen on the local cpu */
-	if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
-		return NULL;
+	if (WARN_ON_ONCE(worker->sleeping))
+		return;
+
+	worker->sleeping = 1;
+	spin_lock_irq(&pool->lock);
 
 	/*
 	 * The counterpart of the following dec_and_test, implied mb,
@@ -906,9 +897,12 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
 	 * lock is safe.
 	 */
 	if (atomic_dec_and_test(&pool->nr_running) &&
-	    !list_empty(&pool->worklist))
-		to_wakeup = first_idle_worker(pool);
-	return to_wakeup ? to_wakeup->task : NULL;
+	    !list_empty(&pool->worklist)) {
+		next = first_idle_worker(pool);
+		if (next)
+			wake_up_process(next->task);
+	}
+	spin_unlock_irq(&pool->lock);
 }
 
 /**
@@ -4929,7 +4923,7 @@ static void rebind_workers(struct worker_pool *pool)
 		 *
 		 * WRITE_ONCE() is necessary because @worker->flags may be
 		 * tested without holding any lock in
-		 * wq_worker_waking_up().  Without it, NOT_RUNNING test may
+		 * wq_worker_running().  Without it, NOT_RUNNING test may
 		 * fail incorrectly leading to premature concurrency
 		 * management operations.
 		 */
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index cb68b03ca89aa..498de0e909a43 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -44,6 +44,7 @@ struct worker {
 	unsigned long		last_active;	/* L: last active timestamp */
 	unsigned int		flags;		/* X: flags */
 	int			id;		/* I: worker id */
+	int			sleeping;	/* None */
 
 	/*
 	 * Opaque string set with work_set_desc().  Printed out with task
@@ -72,8 +73,8 @@ static inline struct worker *current_wq_worker(void)
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
  * sched/ and workqueue.c.
  */
-void wq_worker_waking_up(struct task_struct *task, int cpu);
-struct task_struct *wq_worker_sleeping(struct task_struct *task);
+void wq_worker_running(struct task_struct *task);
+void wq_worker_sleeping(struct task_struct *task);
 work_func_t wq_worker_last_func(struct task_struct *task);
 
 #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
-- 
GitLab


From 1b174a2cb67a3a156d5a28426ae14241e6dfa655 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 9 Apr 2019 09:53:13 +0200
Subject: [PATCH 1201/1861] sched/core: Remove ttwu_activate()

After the removal of try_to_wake_up_local(), there is only one user of
ttwu_activate() left, and since it is a trivial function, remove it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6184a0856aab7..3feb83df322ed 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1681,12 +1681,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 		__schedstat_inc(p->se.statistics.nr_wakeups_sync);
 }
 
-static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
-{
-	activate_task(rq, p, en_flags);
-	p->on_rq = TASK_ON_RQ_QUEUED;
-}
-
 /*
  * Mark the task runnable and perform wakeup-preemption.
  */
@@ -1738,7 +1732,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 		en_flags |= ENQUEUE_MIGRATED;
 #endif
 
-	ttwu_activate(rq, p, en_flags);
+	activate_task(rq, p, en_flags);
+	p->on_rq = TASK_ON_RQ_QUEUED;
 	ttwu_do_wakeup(rq, p, wake_flags, rf);
 }
 
-- 
GitLab


From 7dd7788411646c9619aa6495f832bc0a9b0146b5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 9 Apr 2019 09:59:05 +0200
Subject: [PATCH 1202/1861] sched/core: Unify p->on_rq updates

Almost all {,de}activate_task() invocations pair with p->on_rq
updates, the exception being the usage in rt/deadline which hold both
rq locks and therefore don't strictly need to set
TASK_ON_RQ_MIGRATING, but it is harmless if we do anyway.

Put the updates in {,de}activate_task() and cut down on repetition.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 9 ++++-----
 kernel/sched/fair.c | 2 --
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3feb83df322ed..f4838b78b9f90 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -792,10 +792,14 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
 		rq->nr_uninterruptible--;
 
 	enqueue_task(rq, p, flags);
+
+	p->on_rq = TASK_ON_RQ_QUEUED;
 }
 
 void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 {
+	p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
+
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible++;
 
@@ -1237,11 +1241,9 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
 		rq_pin_lock(src_rq, &srf);
 		rq_pin_lock(dst_rq, &drf);
 
-		p->on_rq = TASK_ON_RQ_MIGRATING;
 		deactivate_task(src_rq, p, 0);
 		set_task_cpu(p, cpu);
 		activate_task(dst_rq, p, 0);
-		p->on_rq = TASK_ON_RQ_QUEUED;
 		check_preempt_curr(dst_rq, p, 0);
 
 		rq_unpin_lock(dst_rq, &drf);
@@ -1733,7 +1735,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 #endif
 
 	activate_task(rq, p, en_flags);
-	p->on_rq = TASK_ON_RQ_QUEUED;
 	ttwu_do_wakeup(rq, p, wake_flags, rf);
 }
 
@@ -2408,7 +2409,6 @@ void wake_up_new_task(struct task_struct *p)
 	post_init_entity_util_avg(p);
 
 	activate_task(rq, p, ENQUEUE_NOCLOCK);
-	p->on_rq = TASK_ON_RQ_QUEUED;
 	trace_sched_wakeup_new(p);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
@@ -3407,7 +3407,6 @@ static void __sched notrace __schedule(bool preempt)
 			prev->state = TASK_RUNNING;
 		} else {
 			deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
-			prev->on_rq = 0;
 
 			if (prev->in_iowait) {
 				atomic_inc(&rq->nr_iowait);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b6cc0703b8509..e5b100b6ba4ea 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7491,7 +7491,6 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
 {
 	lockdep_assert_held(&env->src_rq->lock);
 
-	p->on_rq = TASK_ON_RQ_MIGRATING;
 	deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
 	set_task_cpu(p, env->dst_cpu);
 }
@@ -7627,7 +7626,6 @@ static void attach_task(struct rq *rq, struct task_struct *p)
 
 	BUG_ON(task_rq(p) != rq);
 	activate_task(rq, p, ENQUEUE_NOCLOCK);
-	p->on_rq = TASK_ON_RQ_QUEUED;
 	check_preempt_curr(rq, p, 0);
 }
 
-- 
GitLab


From 2a3f7221acddfe1caa9ff09b3a8158c39b2fdeac Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 16 Apr 2019 17:06:33 +0200
Subject: [PATCH 1203/1861] ALSA: core: Fix card races between register and
 disconnect

There is a small race window in the card disconnection code that
allows the registration of another card with the very same card id.
This leads to a warning in procfs creation as caught by syzkaller.

The problem is that we delete snd_cards and snd_cards_lock entries at
the very beginning of the disconnection procedure.  This makes the
slot available to be assigned for another card object while the
disconnection procedure is being processed.  Then it becomes possible
to issue a procfs registration with the existing file name although we
check the conflict beforehand.

The fix is simply to move the snd_cards and snd_cards_lock clearances
at the end of the disconnection procedure.  The references to these
entries are merely either from the global proc files like
/proc/asound/cards or from the card registration / disconnection, so
it should be fine to shift at the very end.

Reported-by: syzbot+48df349490c36f9f54ab@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/init.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sound/core/init.c b/sound/core/init.c
index 0c4dc40376a70..079c12d64b0e3 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -382,14 +382,7 @@ int snd_card_disconnect(struct snd_card *card)
 	card->shutdown = 1;
 	spin_unlock(&card->files_lock);
 
-	/* phase 1: disable fops (user space) operations for ALSA API */
-	mutex_lock(&snd_card_mutex);
-	snd_cards[card->number] = NULL;
-	clear_bit(card->number, snd_cards_lock);
-	mutex_unlock(&snd_card_mutex);
-	
-	/* phase 2: replace file->f_op with special dummy operations */
-	
+	/* replace file->f_op with special dummy operations */
 	spin_lock(&card->files_lock);
 	list_for_each_entry(mfile, &card->files_list, list) {
 		/* it's critical part, use endless loop */
@@ -405,7 +398,7 @@ int snd_card_disconnect(struct snd_card *card)
 	}
 	spin_unlock(&card->files_lock);	
 
-	/* phase 3: notify all connected devices about disconnection */
+	/* notify all connected devices about disconnection */
 	/* at this point, they cannot respond to any calls except release() */
 
 #if IS_ENABLED(CONFIG_SND_MIXER_OSS)
@@ -421,6 +414,13 @@ int snd_card_disconnect(struct snd_card *card)
 		device_del(&card->card_dev);
 		card->registered = false;
 	}
+
+	/* disable fops (user space) operations for ALSA API */
+	mutex_lock(&snd_card_mutex);
+	snd_cards[card->number] = NULL;
+	clear_bit(card->number, snd_cards_lock);
+	mutex_unlock(&snd_card_mutex);
+
 #ifdef CONFIG_PM
 	wake_up(&card->power_sleep);
 #endif
-- 
GitLab


From aaba098fe6ce594ae6f963dc041be6307e499f19 Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Tue, 9 Apr 2019 10:52:40 +0100
Subject: [PATCH 1204/1861] arm64: HWCAP: add support for AT_HWCAP2

As we will exhaust the first 32 bits of AT_HWCAP let's start
exposing AT_HWCAP2 to userspace to give us up to 64 caps.

Whilst it's possible to use the remaining 32 bits of AT_HWCAP, we
prefer to expand into AT_HWCAP2 in order to provide a consistent
view to userspace between ILP32 and LP64. However internal to the
kernel we prefer to continue to use the full space of elf_hwcap.

To reduce complexity and allow for future expansion, we now
represent hwcaps in the kernel as ordinals and use a
KERNEL_HWCAP_ prefix. This allows us to support automatic feature
based module loading for all our hwcaps.

We introduce cpu_set_feature to set hwcaps which complements the
existing cpu_have_feature helper. These helpers allow us to clean
up existing direct uses of elf_hwcap and reduce any future effort
required to move beyond 64 caps.

For convenience we also introduce cpu_{have,set}_named_feature which
makes use of the cpu_feature macro to allow providing a hwcap name
without a {KERNEL_}HWCAP_ prefix.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
[will: use const_ilog2() and tweak documentation]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/arm64/elf_hwcaps.txt       | 13 +++--
 arch/arm64/crypto/aes-ce-ccm-glue.c      |  2 +-
 arch/arm64/crypto/aes-neonbs-glue.c      |  2 +-
 arch/arm64/crypto/chacha-neon-glue.c     |  2 +-
 arch/arm64/crypto/crct10dif-ce-glue.c    |  4 +-
 arch/arm64/crypto/ghash-ce-glue.c        |  8 +--
 arch/arm64/crypto/nhpoly1305-neon-glue.c |  2 +-
 arch/arm64/crypto/sha256-glue.c          |  4 +-
 arch/arm64/include/asm/cpufeature.h      | 22 ++++----
 arch/arm64/include/asm/hwcap.h           | 52 ++++++++++++++++++-
 arch/arm64/include/uapi/asm/hwcap.h      |  2 +-
 arch/arm64/kernel/cpufeature.c           | 66 ++++++++++++------------
 arch/arm64/kernel/cpuinfo.c              |  2 +-
 arch/arm64/kernel/fpsimd.c               |  4 +-
 drivers/clocksource/arm_arch_timer.c     |  8 +++
 15 files changed, 130 insertions(+), 63 deletions(-)

diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index 13d6691b37bee..11805777031e7 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -13,9 +13,9 @@ architected discovery mechanism available to userspace code at EL0. The
 kernel exposes the presence of these features to userspace through a set
 of flags called hwcaps, exposed in the auxilliary vector.
 
-Userspace software can test for features by acquiring the AT_HWCAP entry
-of the auxilliary vector, and testing whether the relevant flags are
-set, e.g.
+Userspace software can test for features by acquiring the AT_HWCAP or
+AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
+flags are set, e.g.
 
 bool floating_point_is_present(void)
 {
@@ -194,3 +194,10 @@ HWCAP_PACG
     Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or
     ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
     Documentation/arm64/pointer-authentication.txt.
+
+
+4. Unused AT_HWCAP bits
+-----------------------
+
+For interoperation with userspace, the kernel guarantees that bits 62
+and 63 of AT_HWCAP will always be returned as 0.
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 5fc6f51908fdd..036ea77f83bc5 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -372,7 +372,7 @@ static struct aead_alg ccm_aes_alg = {
 
 static int __init aes_mod_init(void)
 {
-	if (!(elf_hwcap & HWCAP_AES))
+	if (!cpu_have_named_feature(AES))
 		return -ENODEV;
 	return crypto_register_aead(&ccm_aes_alg);
 }
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index e7a95a566462f..bf1b321ff4c1f 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -440,7 +440,7 @@ static int __init aes_init(void)
 	int err;
 	int i;
 
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_named_feature(ASIMD))
 		return -ENODEV;
 
 	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index bece1d85bd816..cb054f51c9178 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -173,7 +173,7 @@ static struct skcipher_alg algs[] = {
 
 static int __init chacha_simd_mod_init(void)
 {
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_named_feature(ASIMD))
 		return -ENODEV;
 
 	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index dd325829ee44f..e81d5bd555c09 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -101,7 +101,7 @@ static struct shash_alg crc_t10dif_alg[] = {{
 
 static int __init crc_t10dif_mod_init(void)
 {
-	if (elf_hwcap & HWCAP_PMULL)
+	if (cpu_have_named_feature(PMULL))
 		return crypto_register_shashes(crc_t10dif_alg,
 					       ARRAY_SIZE(crc_t10dif_alg));
 	else
@@ -111,7 +111,7 @@ static int __init crc_t10dif_mod_init(void)
 
 static void __exit crc_t10dif_mod_exit(void)
 {
-	if (elf_hwcap & HWCAP_PMULL)
+	if (cpu_have_named_feature(PMULL))
 		crypto_unregister_shashes(crc_t10dif_alg,
 					  ARRAY_SIZE(crc_t10dif_alg));
 	else
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 791ad422c427d..4e69bb78ea89f 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -704,10 +704,10 @@ static int __init ghash_ce_mod_init(void)
 {
 	int ret;
 
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_named_feature(ASIMD))
 		return -ENODEV;
 
-	if (elf_hwcap & HWCAP_PMULL)
+	if (cpu_have_named_feature(PMULL))
 		ret = crypto_register_shashes(ghash_alg,
 					      ARRAY_SIZE(ghash_alg));
 	else
@@ -717,7 +717,7 @@ static int __init ghash_ce_mod_init(void)
 	if (ret)
 		return ret;
 
-	if (elf_hwcap & HWCAP_PMULL) {
+	if (cpu_have_named_feature(PMULL)) {
 		ret = crypto_register_aead(&gcm_aes_alg);
 		if (ret)
 			crypto_unregister_shashes(ghash_alg,
@@ -728,7 +728,7 @@ static int __init ghash_ce_mod_init(void)
 
 static void __exit ghash_ce_mod_exit(void)
 {
-	if (elf_hwcap & HWCAP_PMULL)
+	if (cpu_have_named_feature(PMULL))
 		crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
 	else
 		crypto_unregister_shash(ghash_alg);
diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c
index 22cc32ac9448d..38a589044b6cc 100644
--- a/arch/arm64/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c
@@ -56,7 +56,7 @@ static struct shash_alg nhpoly1305_alg = {
 
 static int __init nhpoly1305_mod_init(void)
 {
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_named_feature(ASIMD))
 		return -ENODEV;
 
 	return crypto_register_shash(&nhpoly1305_alg);
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index 4aedeaefd61f3..0cccdb9cc2c01 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -173,7 +173,7 @@ static int __init sha256_mod_init(void)
 	if (ret)
 		return ret;
 
-	if (elf_hwcap & HWCAP_ASIMD) {
+	if (cpu_have_named_feature(ASIMD)) {
 		ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs));
 		if (ret)
 			crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
@@ -183,7 +183,7 @@ static int __init sha256_mod_init(void)
 
 static void __exit sha256_mod_fini(void)
 {
-	if (elf_hwcap & HWCAP_ASIMD)
+	if (cpu_have_named_feature(ASIMD))
 		crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs));
 	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 }
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index e505e1fbd2b93..347c170466686 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -14,15 +14,8 @@
 #include <asm/hwcap.h>
 #include <asm/sysreg.h>
 
-/*
- * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
- * in the kernel and for user space to keep track of which optional features
- * are supported by the current system. So let's map feature 'x' to HWCAP_x.
- * Note that HWCAP_x constants are bit fields so we need to take the log.
- */
-
-#define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
-#define cpu_feature(x)		ilog2(HWCAP_ ## x)
+#define MAX_CPU_FEATURES	64
+#define cpu_feature(x)		KERNEL_HWCAP_ ## x
 
 #ifndef __ASSEMBLY__
 
@@ -400,10 +393,19 @@ extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
 
 bool this_cpu_has_cap(unsigned int cap);
 
+static inline void cpu_set_feature(unsigned int num)
+{
+	WARN_ON(num >= MAX_CPU_FEATURES);
+	elf_hwcap |= BIT(num);
+}
+#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
+
 static inline bool cpu_have_feature(unsigned int num)
 {
-	return elf_hwcap & (1UL << num);
+	WARN_ON(num >= MAX_CPU_FEATURES);
+	return elf_hwcap & BIT(num);
 }
+#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
 
 /* System capability check for constant caps */
 static inline bool __cpus_have_const_cap(int num)
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 400b80b49595d..af868cbe96f83 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -40,11 +40,61 @@
 #define COMPAT_HWCAP2_CRC32	(1 << 4)
 
 #ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+/*
+ * For userspace we represent hwcaps as a collection of HWCAP{,2}_x bitfields
+ * as described in uapi/asm/hwcap.h. For the kernel we represent hwcaps as
+ * natural numbers (in a single range of size MAX_CPU_FEATURES) defined here
+ * with prefix KERNEL_HWCAP_ mapped to their HWCAP{,2}_x counterpart.
+ *
+ * Hwcaps should be set and tested within the kernel via the
+ * cpu_{set,have}_named_feature(feature) where feature is the unique suffix
+ * of KERNEL_HWCAP_{feature}.
+ */
+#define __khwcap_feature(x)		const_ilog2(HWCAP_ ## x)
+#define KERNEL_HWCAP_FP			__khwcap_feature(FP)
+#define KERNEL_HWCAP_ASIMD		__khwcap_feature(ASIMD)
+#define KERNEL_HWCAP_EVTSTRM		__khwcap_feature(EVTSTRM)
+#define KERNEL_HWCAP_AES		__khwcap_feature(AES)
+#define KERNEL_HWCAP_PMULL		__khwcap_feature(PMULL)
+#define KERNEL_HWCAP_SHA1		__khwcap_feature(SHA1)
+#define KERNEL_HWCAP_SHA2		__khwcap_feature(SHA2)
+#define KERNEL_HWCAP_CRC32		__khwcap_feature(CRC32)
+#define KERNEL_HWCAP_ATOMICS		__khwcap_feature(ATOMICS)
+#define KERNEL_HWCAP_FPHP		__khwcap_feature(FPHP)
+#define KERNEL_HWCAP_ASIMDHP		__khwcap_feature(ASIMDHP)
+#define KERNEL_HWCAP_CPUID		__khwcap_feature(CPUID)
+#define KERNEL_HWCAP_ASIMDRDM		__khwcap_feature(ASIMDRDM)
+#define KERNEL_HWCAP_JSCVT		__khwcap_feature(JSCVT)
+#define KERNEL_HWCAP_FCMA		__khwcap_feature(FCMA)
+#define KERNEL_HWCAP_LRCPC		__khwcap_feature(LRCPC)
+#define KERNEL_HWCAP_DCPOP		__khwcap_feature(DCPOP)
+#define KERNEL_HWCAP_SHA3		__khwcap_feature(SHA3)
+#define KERNEL_HWCAP_SM3		__khwcap_feature(SM3)
+#define KERNEL_HWCAP_SM4		__khwcap_feature(SM4)
+#define KERNEL_HWCAP_ASIMDDP		__khwcap_feature(ASIMDDP)
+#define KERNEL_HWCAP_SHA512		__khwcap_feature(SHA512)
+#define KERNEL_HWCAP_SVE		__khwcap_feature(SVE)
+#define KERNEL_HWCAP_ASIMDFHM		__khwcap_feature(ASIMDFHM)
+#define KERNEL_HWCAP_DIT		__khwcap_feature(DIT)
+#define KERNEL_HWCAP_USCAT		__khwcap_feature(USCAT)
+#define KERNEL_HWCAP_ILRCPC		__khwcap_feature(ILRCPC)
+#define KERNEL_HWCAP_FLAGM		__khwcap_feature(FLAGM)
+#define KERNEL_HWCAP_SSBS		__khwcap_feature(SSBS)
+#define KERNEL_HWCAP_SB			__khwcap_feature(SB)
+#define KERNEL_HWCAP_PACA		__khwcap_feature(PACA)
+#define KERNEL_HWCAP_PACG		__khwcap_feature(PACG)
+
+#define __khwcap2_feature(x)		(const_ilog2(HWCAP2_ ## x) + 32)
+
 /*
  * This yields a mask that user programs can use to figure out what
  * instruction set this cpu supports.
  */
-#define ELF_HWCAP		(elf_hwcap)
+#define ELF_HWCAP		lower_32_bits(elf_hwcap)
+#define ELF_HWCAP2		upper_32_bits(elf_hwcap)
 
 #ifdef CONFIG_COMPAT
 #define COMPAT_ELF_HWCAP	(compat_elf_hwcap)
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 5f0750c2199cc..453b45af80b77 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -18,7 +18,7 @@
 #define _UAPI__ASM_HWCAP_H
 
 /*
- * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
+ * HWCAP flags - for AT_HWCAP
  */
 #define HWCAP_FP		(1 << 0)
 #define HWCAP_ASIMD		(1 << 1)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 4061de10cea6c..986ceeacd19f2 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1571,39 +1571,39 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
 #endif
 
 static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SB),
-	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
 #endif
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
 #ifdef CONFIG_ARM64_PTR_AUTH
-	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, HWCAP_PACA),
-	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, HWCAP_PACG),
+	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
+	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
 #endif
 	{},
 };
@@ -1623,7 +1623,7 @@ static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
 {
 	switch (cap->hwcap_type) {
 	case CAP_HWCAP:
-		elf_hwcap |= cap->hwcap;
+		cpu_set_feature(cap->hwcap);
 		break;
 #ifdef CONFIG_COMPAT
 	case CAP_COMPAT_HWCAP:
@@ -1646,7 +1646,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
 
 	switch (cap->hwcap_type) {
 	case CAP_HWCAP:
-		rc = (elf_hwcap & cap->hwcap) != 0;
+		rc = cpu_have_feature(cap->hwcap);
 		break;
 #ifdef CONFIG_COMPAT
 	case CAP_COMPAT_HWCAP:
@@ -1667,7 +1667,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
 static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
 {
 	/* We support emulation of accesses to CPU ID feature registers */
-	elf_hwcap |= HWCAP_CPUID;
+	cpu_set_named_feature(CPUID);
 	for (; hwcaps->matches; hwcaps++)
 		if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps)))
 			cap_set_elf_hwcap(hwcaps);
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index ca0685f339002..810db95f293ff 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -167,7 +167,7 @@ static int c_show(struct seq_file *m, void *v)
 #endif /* CONFIG_COMPAT */
 		} else {
 			for (j = 0; hwcap_str[j]; j++)
-				if (elf_hwcap & (1 << j))
+				if (cpu_have_feature(j))
 					seq_printf(m, " %s", hwcap_str[j]);
 		}
 		seq_puts(m, "\n");
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5ebe73b699618..735cf1f8b109c 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1258,14 +1258,14 @@ static inline void fpsimd_hotplug_init(void) { }
  */
 static int __init fpsimd_init(void)
 {
-	if (elf_hwcap & HWCAP_FP) {
+	if (cpu_have_named_feature(FP)) {
 		fpsimd_pm_init();
 		fpsimd_hotplug_init();
 	} else {
 		pr_notice("Floating-point is not implemented\n");
 	}
 
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_named_feature(ASIMD))
 		pr_notice("Advanced SIMD is not implemented\n");
 
 	return sve_sysctl_init();
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index aa4ec53281cea..6cc8aff838058 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -833,7 +833,11 @@ static void arch_timer_evtstrm_enable(int divider)
 	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
 			| ARCH_TIMER_VIRT_EVT_EN;
 	arch_timer_set_cntkctl(cntkctl);
+#ifdef CONFIG_ARM64
+	cpu_set_named_feature(EVTSTRM);
+#else
 	elf_hwcap |= HWCAP_EVTSTRM;
+#endif
 #ifdef CONFIG_COMPAT
 	compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
 #endif
@@ -1055,7 +1059,11 @@ static int arch_timer_cpu_pm_notify(struct notifier_block *self,
 	} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
 		arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
 
+#ifdef CONFIG_ARM64
+		if (cpu_have_named_feature(EVTSTRM))
+#else
 		if (elf_hwcap & HWCAP_EVTSTRM)
+#endif
 			cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
 	}
 	return NOTIFY_OK;
-- 
GitLab


From aec0bff757c937489d003ab7b36c52e77e4b096a Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Tue, 9 Apr 2019 10:52:41 +0100
Subject: [PATCH 1205/1861] arm64: HWCAP: encapsulate elf_hwcap

The introduction of AT_HWCAP2 introduced accessors which ensure that
hwcap features are set and tested appropriately.

Let's now mandate access to elf_hwcap via these accessors by making
elf_hwcap static within cpufeature.c.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 15 ++++---------
 arch/arm64/include/asm/hwcap.h      |  7 +++---
 arch/arm64/kernel/cpufeature.c      | 33 +++++++++++++++++++++++++++--
 3 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 347c170466686..a3f028f82def9 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -392,19 +392,12 @@ extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
 	for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
 
 bool this_cpu_has_cap(unsigned int cap);
+void cpu_set_feature(unsigned int num);
+bool cpu_have_feature(unsigned int num);
+unsigned long cpu_get_elf_hwcap(void);
+unsigned long cpu_get_elf_hwcap2(void);
 
-static inline void cpu_set_feature(unsigned int num)
-{
-	WARN_ON(num >= MAX_CPU_FEATURES);
-	elf_hwcap |= BIT(num);
-}
 #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
-
-static inline bool cpu_have_feature(unsigned int num)
-{
-	WARN_ON(num >= MAX_CPU_FEATURES);
-	return elf_hwcap & BIT(num);
-}
 #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
 
 /* System capability check for constant caps */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index af868cbe96f83..a843b6efce5b4 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -17,6 +17,7 @@
 #define __ASM_HWCAP_H
 
 #include <uapi/asm/hwcap.h>
+#include <asm/cpufeature.h>
 
 #define COMPAT_HWCAP_HALF	(1 << 1)
 #define COMPAT_HWCAP_THUMB	(1 << 2)
@@ -40,7 +41,6 @@
 #define COMPAT_HWCAP2_CRC32	(1 << 4)
 
 #ifndef __ASSEMBLY__
-#include <linux/kernel.h>
 #include <linux/log2.h>
 
 /*
@@ -93,8 +93,8 @@
  * This yields a mask that user programs can use to figure out what
  * instruction set this cpu supports.
  */
-#define ELF_HWCAP		lower_32_bits(elf_hwcap)
-#define ELF_HWCAP2		upper_32_bits(elf_hwcap)
+#define ELF_HWCAP		cpu_get_elf_hwcap()
+#define ELF_HWCAP2		cpu_get_elf_hwcap2()
 
 #ifdef CONFIG_COMPAT
 #define COMPAT_ELF_HWCAP	(compat_elf_hwcap)
@@ -110,6 +110,5 @@ enum {
 #endif
 };
 
-extern unsigned long elf_hwcap;
 #endif
 #endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 986ceeacd19f2..a655d1bb1186d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -35,8 +35,8 @@
 #include <asm/traps.h>
 #include <asm/virt.h>
 
-unsigned long elf_hwcap __read_mostly;
-EXPORT_SYMBOL_GPL(elf_hwcap);
+/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
+static unsigned long elf_hwcap __read_mostly;
 
 #ifdef CONFIG_COMPAT
 #define COMPAT_ELF_HWCAP_DEFAULT	\
@@ -1947,6 +1947,35 @@ bool this_cpu_has_cap(unsigned int n)
 	return false;
 }
 
+void cpu_set_feature(unsigned int num)
+{
+	WARN_ON(num >= MAX_CPU_FEATURES);
+	elf_hwcap |= BIT(num);
+}
+EXPORT_SYMBOL_GPL(cpu_set_feature);
+
+bool cpu_have_feature(unsigned int num)
+{
+	WARN_ON(num >= MAX_CPU_FEATURES);
+	return elf_hwcap & BIT(num);
+}
+EXPORT_SYMBOL_GPL(cpu_have_feature);
+
+unsigned long cpu_get_elf_hwcap(void)
+{
+	/*
+	 * We currently only populate the first 32 bits of AT_HWCAP. Please
+	 * note that for userspace compatibility we guarantee that bits 62
+	 * and 63 will always be returned as 0.
+	 */
+	return lower_32_bits(elf_hwcap);
+}
+
+unsigned long cpu_get_elf_hwcap2(void)
+{
+	return upper_32_bits(elf_hwcap);
+}
+
 static void __init setup_system_capabilities(void)
 {
 	/*
-- 
GitLab


From d16ed4105f5bbd8a34f15053045a6657f4c52c6f Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Tue, 9 Apr 2019 10:52:42 +0100
Subject: [PATCH 1206/1861] arm64: Handle trapped DC CVADP

The ARMv8.5 DC CVADP instruction may be trapped to EL1 via
SCTLR_EL1.UCI therefore let's provide a handler for it.

Just like the CVAP instruction we use a 'sys' instruction instead of
the 'dc' alias to avoid build issues with older toolchains.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/esr.h | 3 ++-
 arch/arm64/kernel/traps.c    | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 3541720189c95..0e27fe91d5ea8 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -196,9 +196,10 @@
 /*
  * User space cache operations have the following sysreg encoding
  * in System instructions.
- * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 14 }, WRITE (L=0)
+ * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 13, 14 }, WRITE (L=0)
  */
 #define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC	14
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVADP	13
 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAP	12
 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAU	11
 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAC	10
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 74598396e0bf4..21e73954762ca 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -459,6 +459,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAC:	/* DC CVAC, gets promoted */
 		__user_cache_maint("dc civac", address, ret);
 		break;
+	case ESR_ELx_SYS64_ISS_CRM_DC_CVADP:	/* DC CVADP */
+		__user_cache_maint("sys 3, c7, c13, 1", address, ret);
+		break;
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAP:	/* DC CVAP */
 		__user_cache_maint("sys 3, c7, c12, 1", address, ret);
 		break;
-- 
GitLab


From 671db581815faf17cbedd7fcbc48823a247d90b1 Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Tue, 9 Apr 2019 10:52:43 +0100
Subject: [PATCH 1207/1861] arm64: Expose DC CVADP to userspace

ARMv8.5 builds upon the ARMv8.2 DC CVAP instruction by introducing a DC
CVADP instruction which cleans the data cache to the point of deep
persistence. Let's expose this support via the arm64 ELF hwcaps.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/arm64/elf_hwcaps.txt  | 4 ++++
 arch/arm64/include/asm/hwcap.h      | 1 +
 arch/arm64/include/uapi/asm/hwcap.h | 5 +++++
 arch/arm64/kernel/cpufeature.c      | 1 +
 arch/arm64/kernel/cpuinfo.c         | 1 +
 5 files changed, 12 insertions(+)

diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index 11805777031e7..55431fd2a67a6 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -135,6 +135,10 @@ HWCAP_DCPOP
 
     Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
 
+HWCAP2_DCPODP
+
+    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
+
 HWCAP_SHA3
 
     Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index a843b6efce5b4..f78c86c64e682 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -88,6 +88,7 @@
 #define KERNEL_HWCAP_PACG		__khwcap_feature(PACG)
 
 #define __khwcap2_feature(x)		(const_ilog2(HWCAP2_ ## x) + 32)
+#define KERNEL_HWCAP_DCPODP		__khwcap2_feature(DCPODP)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 453b45af80b77..d64af3913a9e0 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -53,4 +53,9 @@
 #define HWCAP_PACA		(1 << 30)
 #define HWCAP_PACG		(1UL << 31)
 
+/*
+ * HWCAP2 flags - for AT_HWCAP2
+ */
+#define HWCAP2_DCPODP		(1 << 0)
+
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a655d1bb1186d..f8b682a3a9f4f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1591,6 +1591,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 810db95f293ff..093ca53ce1d1c 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -85,6 +85,7 @@ static const char *const hwcap_str[] = {
 	"sb",
 	"paca",
 	"pacg",
+	"dcpodp",
 	NULL
 };
 
-- 
GitLab


From 04a1438e5660ae44ecebd6c870fbcc140dd883a7 Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Tue, 9 Apr 2019 10:52:44 +0100
Subject: [PATCH 1208/1861] arm64: add CVADP support to the cache maintenance
 helper

Allow users of dcache_by_line_op to specify cvadp as an op.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 6212121968713..039fbd822ec64 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -407,10 +407,14 @@ alternative_endif
 	.ifc	\op, cvap
 	sys	3, c7, c12, 1, \kaddr	// dc cvap
 	.else
+	.ifc	\op, cvadp
+	sys	3, c7, c13, 1, \kaddr	// dc cvadp
+	.else
 	dc	\op, \kaddr
 	.endif
 	.endif
 	.endif
+	.endif
 	add	\kaddr, \kaddr, \tmp1
 	cmp	\kaddr, \size
 	b.lo	9998b
-- 
GitLab


From b9585f53bcf1ada1fbac22c15fd723c65ef67ff1 Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Tue, 9 Apr 2019 10:52:45 +0100
Subject: [PATCH 1209/1861] arm64: Advertise ARM64_HAS_DCPODP cpu feature

Advertise ARM64_HAS_DCPODP when both DC CVAP and DC CVADP are supported.

Even though we don't use this feature now, we provide it for consistency
with DCPOP and anticipate it being used in the future.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpucaps.h |  3 ++-
 arch/arm64/kernel/cpufeature.c   | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index f6a76e43f39ed..defdc67d9ab49 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -61,7 +61,8 @@
 #define ARM64_HAS_GENERIC_AUTH_ARCH		40
 #define ARM64_HAS_GENERIC_AUTH_IMP_DEF		41
 #define ARM64_HAS_IRQ_PRIO_MASKING		42
+#define ARM64_HAS_DCPODP			43
 
-#define ARM64_NCAPS				43
+#define ARM64_NCAPS				44
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f8b682a3a9f4f..9d18e45311fd4 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1340,6 +1340,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
 		.min_field_value = 1,
 	},
+	{
+		.desc = "Data cache clean to Point of Deep Persistence",
+		.capability = ARM64_HAS_DCPODP,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+		.min_field_value = 2,
+	},
 #endif
 #ifdef CONFIG_ARM64_SVE
 	{
-- 
GitLab


From eea1bb2248691ed65c33daff94a253af44feb103 Mon Sep 17 00:00:00 2001
From: Miles Chen <miles.chen@mediatek.com>
Date: Tue, 16 Apr 2019 01:36:36 +0800
Subject: [PATCH 1210/1861] arm64: mm: check virtual addr in virt_to_page() if
 CONFIG_DEBUG_VIRTUAL=y

This change uses the original virt_to_page() (the one with __pa()) to
check the given virtual address if CONFIG_DEBUG_VIRTUAL=y.

Recently, I worked on a bug: a driver passes a symbol address to
dma_map_single() and the virt_to_page() (called by dma_map_single())
does not work for non-linear addresses after commit 9f2875912dac
("arm64: mm: restrict virt_to_page() to the linear mapping").

I tried to trap the bug by enabling CONFIG_DEBUG_VIRTUAL but it
did not work - bacause the commit removes the __pa() from
virt_to_page() but CONFIG_DEBUG_VIRTUAL checks the virtual address
in __pa()/__virt_to_phys().

A simple solution is to use the original virt_to_page()
(the one with__pa()) if CONFIG_DEBUG_VIRTUAL=y.

Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Miles Chen <miles.chen@mediatek.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 290195168bb3c..2cb8248fa2c89 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -302,7 +302,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  */
 #define ARCH_PFN_OFFSET		((unsigned long)PHYS_PFN_OFFSET)
 
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL)
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define _virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 #else
-- 
GitLab


From 131e135f7fd14b1de7a5eb26631076705c18073f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 9 Apr 2019 12:13:13 +0100
Subject: [PATCH 1211/1861] arm64: instrument smp_{load_acquire,store_release}

Our __smp_store_release() and __smp_load_acquire() macros use inline
assembly, which is opaque to kasan. This means that kasan can't catch
erroneous use of these.

This patch adds kasan instrumentation to both.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
[will: consistently use *p as argument to sizeof]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/barrier.h | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index f66bb04fdf2dd..85b6bedbcc680 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -20,6 +20,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/kasan-checks.h>
+
 #define __nops(n)	".rept	" #n "\nnop\n.endr\n"
 #define nops(n)		asm volatile(__nops(n))
 
@@ -72,31 +74,33 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
 
 #define __smp_store_release(p, v)					\
 do {									\
+	typeof(p) __p = (p);						\
 	union { typeof(*p) __val; char __c[1]; } __u =			\
-		{ .__val = (__force typeof(*p)) (v) }; 			\
+		{ .__val = (__force typeof(*p)) (v) };			\
 	compiletime_assert_atomic_type(*p);				\
+	kasan_check_write(__p, sizeof(*p));				\
 	switch (sizeof(*p)) {						\
 	case 1:								\
 		asm volatile ("stlrb %w1, %0"				\
-				: "=Q" (*p)				\
+				: "=Q" (*__p)				\
 				: "r" (*(__u8 *)__u.__c)		\
 				: "memory");				\
 		break;							\
 	case 2:								\
 		asm volatile ("stlrh %w1, %0"				\
-				: "=Q" (*p)				\
+				: "=Q" (*__p)				\
 				: "r" (*(__u16 *)__u.__c)		\
 				: "memory");				\
 		break;							\
 	case 4:								\
 		asm volatile ("stlr %w1, %0"				\
-				: "=Q" (*p)				\
+				: "=Q" (*__p)				\
 				: "r" (*(__u32 *)__u.__c)		\
 				: "memory");				\
 		break;							\
 	case 8:								\
 		asm volatile ("stlr %1, %0"				\
-				: "=Q" (*p)				\
+				: "=Q" (*__p)				\
 				: "r" (*(__u64 *)__u.__c)		\
 				: "memory");				\
 		break;							\
@@ -106,27 +110,29 @@ do {									\
 #define __smp_load_acquire(p)						\
 ({									\
 	union { typeof(*p) __val; char __c[1]; } __u;			\
+	typeof(p) __p = (p);						\
 	compiletime_assert_atomic_type(*p);				\
+	kasan_check_read(__p, sizeof(*p));				\
 	switch (sizeof(*p)) {						\
 	case 1:								\
 		asm volatile ("ldarb %w0, %1"				\
 			: "=r" (*(__u8 *)__u.__c)			\
-			: "Q" (*p) : "memory");				\
+			: "Q" (*__p) : "memory");			\
 		break;							\
 	case 2:								\
 		asm volatile ("ldarh %w0, %1"				\
 			: "=r" (*(__u16 *)__u.__c)			\
-			: "Q" (*p) : "memory");				\
+			: "Q" (*__p) : "memory");			\
 		break;							\
 	case 4:								\
 		asm volatile ("ldar %w0, %1"				\
 			: "=r" (*(__u32 *)__u.__c)			\
-			: "Q" (*p) : "memory");				\
+			: "Q" (*__p) : "memory");			\
 		break;							\
 	case 8:								\
 		asm volatile ("ldar %0, %1"				\
 			: "=r" (*(__u64 *)__u.__c)			\
-			: "Q" (*p) : "memory");				\
+			: "Q" (*__p) : "memory");			\
 		break;							\
 	}								\
 	__u.__val;							\
-- 
GitLab


From 22e6c8087e175bc5c507b4e45d1ca588b2bcc61c Mon Sep 17 00:00:00 2001
From: Nishad Kamdar <nishadkamdar@gmail.com>
Date: Tue, 16 Apr 2019 20:49:35 +0530
Subject: [PATCH 1212/1861] arm64: Use the correct style for SPDX License
 Identifier

This patch corrects the SPDX License Identifier style
in the arm64 Hardware Architecture related files.

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nishad Kamdar <nishadkamdar@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h | 2 +-
 arch/arm64/include/asm/pointer_auth.h         | 2 +-
 arch/arm64/include/asm/sdei.h                 | 2 +-
 arch/arm64/include/asm/vmap_stack.h           | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h b/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h
index 1b4cb0c55744a..385c455a7c985 100644
--- a/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h
+++ b/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2018 MediaTek Inc.
  * Author: Zhiyong Tao <zhiyong.tao@mediatek.com>
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 15d49515efdd7..d328540cb85ed 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __ASM_POINTER_AUTH_H
 #define __ASM_POINTER_AUTH_H
 
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index ffe47d766c259..63e0b92a5fbb0 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2017 Arm Ltd.
 #ifndef __ASM_SDEI_H
 #define __ASM_SDEI_H
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
index 0b5ec6e08c101..0a12115d96384 100644
--- a/arch/arm64/include/asm/vmap_stack.h
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2017 Arm Ltd.
 #ifndef __ASM_VMAP_STACK_H
 #define __ASM_VMAP_STACK_H
-- 
GitLab


From 1e6db2ee86e6a4399fc0ae5689e55e0fd1c43caf Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 15 Apr 2019 14:53:33 +0200
Subject: [PATCH 1213/1861] perf top: Always sample time to satisfy needs of
 use of ordered queuing

Bastian reported broken 'perf top -p PID' command, it won't display any
data.

The problem is that for -p option we monitor single thread, so we don't
enable time in samples, because it's not needed.

However since commit 16c66bc167cc we use ordered queues to stash data
plus later commits added logic for dropping samples in case there's big
load and we don't keep up. All this needs timestamp for sample. Enabling
it unconditionally for perf top.

Reported-by: Bastian Beischer <bastian.beischer@rwth-aachen.de>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bastian beischer <bastian.beischer@rwth-aachen.de>
Fixes: 16c66bc167cc ("perf top: Add processing thread")
Link: http://lkml.kernel.org/r/20190415125333.27160-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1999d6533d12a..fbbb0da43abba 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1377,6 +1377,7 @@ int cmd_top(int argc, const char **argv)
 			 * */
 			.overwrite	= 0,
 			.sample_time	= true,
+			.sample_time_set = true,
 		},
 		.max_stack	     = sysctl__max_stack(),
 		.annotation_opts     = annotation__default_options,
-- 
GitLab


From 30e4c574969ccb624940f1f2f7886596f8fc60ad Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 16 Apr 2019 11:30:15 -0300
Subject: [PATCH 1214/1861] tools include uapi: Sync sound/asound.h copy

Picking the changes from:

  Fixes: b5bdbb6ccd11 ("ALSA: uapi: #include <time.h> in asound.h")

Which entails no changes in the tooling side.

To silence this perf tools build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/sound/asound.h' differs from latest version at 'include/uapi/sound/asound.h'
  diff -u tools/include/uapi/sound/asound.h include/uapi/sound/asound.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Daniel Mentz <danielmentz@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Takashi Iwai <tiwai@suse.de>
Link: https://lkml.kernel.org/n/tip-15o4twfkbn6nny9aus90dyzx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/sound/asound.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 404d4b9ffe764..df1153cea0b7e 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -32,6 +32,7 @@
 
 #ifndef __KERNEL__
 #include <stdlib.h>
+#include <time.h>
 #endif
 
 /*
-- 
GitLab


From 81fb8736dd81da3fe94f28968dac60f392ec6746 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Tue, 16 Apr 2019 17:14:30 +0100
Subject: [PATCH 1215/1861] arm64: vdso: Fix clock_getres() for CLOCK_REALTIME

clock_getres() in the vDSO library has to preserve the same behaviour
of posix_get_hrtimer_res().

In particular, posix_get_hrtimer_res() does:

    sec = 0;
    ns = hrtimer_resolution;

where 'hrtimer_resolution' depends on whether or not high resolution
timers are enabled, which is a runtime decision.

The vDSO incorrectly returns the constant CLOCK_REALTIME_RES. Fix this
by exposing 'hrtimer_resolution' in the vDSO datapage and returning that
instead.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
[will: Use WRITE_ONCE(), move adr off COARSE path, renumber labels, use 'w' reg]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/vdso_datapage.h | 1 +
 arch/arm64/kernel/asm-offsets.c        | 2 +-
 arch/arm64/kernel/vdso.c               | 3 +++
 arch/arm64/kernel/vdso/gettimeofday.S  | 7 +++----
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
index 2b9a63771eda8..f89263c8e11af 100644
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ b/arch/arm64/include/asm/vdso_datapage.h
@@ -38,6 +38,7 @@ struct vdso_data {
 	__u32 tz_minuteswest;	/* Whacky timezone stuff */
 	__u32 tz_dsttime;
 	__u32 use_syscall;
+	__u32 hrtimer_res;
 };
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 7f40dcbdd51d0..e10e2a5d9ddcf 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -94,7 +94,7 @@ int main(void)
   DEFINE(CLOCK_REALTIME,	CLOCK_REALTIME);
   DEFINE(CLOCK_MONOTONIC,	CLOCK_MONOTONIC);
   DEFINE(CLOCK_MONOTONIC_RAW,	CLOCK_MONOTONIC_RAW);
-  DEFINE(CLOCK_REALTIME_RES,	MONOTONIC_RES_NSEC);
+  DEFINE(CLOCK_REALTIME_RES,	offsetof(struct vdso_data, hrtimer_res));
   DEFINE(CLOCK_REALTIME_COARSE,	CLOCK_REALTIME_COARSE);
   DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
   DEFINE(CLOCK_COARSE_RES,	LOW_RES_NSEC);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 9fb0c0c95a85c..42b7082029e1d 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -230,6 +230,9 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->wtm_clock_sec		= tk->wall_to_monotonic.tv_sec;
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
+	/* Read without the seqlock held by clock_getres() */
+	WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
+
 	if (!use_syscall) {
 		/* tkr_mono.cycle_last == tkr_raw.cycle_last */
 		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index c39872a7b03c3..21805e4164831 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -301,13 +301,14 @@ ENTRY(__kernel_clock_getres)
 	ccmp	w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
 	b.ne	1f
 
-	ldr	x2, 5f
+	adr	vdso_data, _vdso_data
+	ldr	w2, [vdso_data, #CLOCK_REALTIME_RES]
 	b	2f
 1:
 	cmp	w0, #CLOCK_REALTIME_COARSE
 	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
 	b.ne	4f
-	ldr	x2, 6f
+	ldr	x2, 5f
 2:
 	cbz	x1, 3f
 	stp	xzr, x2, [x1]
@@ -321,8 +322,6 @@ ENTRY(__kernel_clock_getres)
 	svc	#0
 	ret
 5:
-	.quad	CLOCK_REALTIME_RES
-6:
 	.quad	CLOCK_COARSE_RES
 	.cfi_endproc
 ENDPROC(__kernel_clock_getres)
-- 
GitLab


From 36a2ba07757df790b4a874efb1a105b9330a9ae7 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 8 Apr 2019 23:21:12 +0800
Subject: [PATCH 1216/1861] ACPI/IORT: Reject platform device creation on NUMA
 node mapping failure

In a system where, through IORT firmware mappings, the SMMU device is
mapped to a NUMA node that is not online, the kernel bootstrap results
in the following crash:

  Unable to handle kernel paging request at virtual address 0000000000001388
  Mem abort info:
    ESR = 0x96000004
    Exception class = DABT (current EL), IL = 32 bits
    SET = 0, FnV = 0
    EA = 0, S1PTW = 0
  Data abort info:
    ISV = 0, ISS = 0x00000004
    CM = 0, WnR = 0
  [0000000000001388] user address but active_mm is swapper
  Internal error: Oops: 96000004 [#1] SMP
  Modules linked in:
  CPU: 5 PID: 1 Comm: swapper/0 Not tainted 5.0.0 #15
  pstate: 80c00009 (Nzcv daif +PAN +UAO)
  pc : __alloc_pages_nodemask+0x13c/0x1068
  lr : __alloc_pages_nodemask+0xdc/0x1068
  ...
  Process swapper/0 (pid: 1, stack limit = 0x(____ptrval____))
  Call trace:
   __alloc_pages_nodemask+0x13c/0x1068
   new_slab+0xec/0x570
   ___slab_alloc+0x3e0/0x4f8
   __slab_alloc+0x60/0x80
   __kmalloc_node_track_caller+0x10c/0x478
   devm_kmalloc+0x44/0xb0
   pinctrl_bind_pins+0x4c/0x188
   really_probe+0x78/0x2b8
   driver_probe_device+0x64/0x110
   device_driver_attach+0x74/0x98
   __driver_attach+0x9c/0xe8
   bus_for_each_dev+0x84/0xd8
   driver_attach+0x30/0x40
   bus_add_driver+0x170/0x218
   driver_register+0x64/0x118
   __platform_driver_register+0x54/0x60
   arm_smmu_driver_init+0x24/0x2c
   do_one_initcall+0xbc/0x328
   kernel_init_freeable+0x304/0x3ac
   kernel_init+0x18/0x110
   ret_from_fork+0x10/0x1c
  Code: f90013b5 b9410fa1 1a9f0694 b50014c2 (b9400804)
  ---[ end trace dfeaed4c373a32da ]--

Change the dev_set_proximity() hook prototype so that it returns a
value and make it return failure if the PXM->NUMA-node mapping
corresponds to an offline node, fixing the crash.

Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Link: https://lore.kernel.org/linux-arm-kernel/20190315021940.86905-1-wangkefeng.wang@huawei.com/
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/acpi/arm64/iort.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index e48894e002ba8..a46c2c162c03e 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1232,18 +1232,24 @@ static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
 /*
  * set numa proximity domain for smmuv3 device
  */
-static void  __init arm_smmu_v3_set_proximity(struct device *dev,
+static int  __init arm_smmu_v3_set_proximity(struct device *dev,
 					      struct acpi_iort_node *node)
 {
 	struct acpi_iort_smmu_v3 *smmu;
 
 	smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 	if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) {
-		set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm));
+		int node = acpi_map_pxm_to_node(smmu->pxm);
+
+		if (node != NUMA_NO_NODE && !node_online(node))
+			return -EINVAL;
+
+		set_dev_node(dev, node);
 		pr_info("SMMU-v3[%llx] Mapped to Proximity domain %d\n",
 			smmu->base_address,
 			smmu->pxm);
 	}
+	return 0;
 }
 #else
 #define arm_smmu_v3_set_proximity NULL
@@ -1318,7 +1324,7 @@ struct iort_dev_config {
 	int (*dev_count_resources)(struct acpi_iort_node *node);
 	void (*dev_init_resources)(struct resource *res,
 				     struct acpi_iort_node *node);
-	void (*dev_set_proximity)(struct device *dev,
+	int (*dev_set_proximity)(struct device *dev,
 				    struct acpi_iort_node *node);
 };
 
@@ -1369,8 +1375,11 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node,
 	if (!pdev)
 		return -ENOMEM;
 
-	if (ops->dev_set_proximity)
-		ops->dev_set_proximity(&pdev->dev, node);
+	if (ops->dev_set_proximity) {
+		ret = ops->dev_set_proximity(&pdev->dev, node);
+		if (ret)
+			goto dev_put;
+	}
 
 	count = ops->dev_count_resources(node);
 
-- 
GitLab


From 2d65c42b43e53d61f1fd6b8d0a097451a4cffa24 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 10 Apr 2019 12:09:14 -0500
Subject: [PATCH 1217/1861] genirq/devres: Use struct_size() in devm_kzalloc()

One of the more common cases of allocation size calculations is finding
the size of a structure that has a zero-sized array at the end, along
with memory for some number of elements for that array. For example:

struct foo {
    int stuff;
    struct boo entry[];
};

size = sizeof(struct foo) + count * sizeof(struct boo);
instance = devm_kzalloc(dev, size, GFP_KERNEL);

Instead of leaving these open-coded and prone to type mistakes, we can
now use the new struct_size() helper.

instance = devm_kzalloc(dev, struct_size(instance, entry, count), GFP_KERNEL);

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190410170914.GA16161@embeddedor
---
 kernel/irq/devres.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index f808c6a97dccc..f6e5515ee0774 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -220,9 +220,8 @@ devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
 			    irq_flow_handler_t handler)
 {
 	struct irq_chip_generic *gc;
-	unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
 
-	gc = devm_kzalloc(dev, sz, GFP_KERNEL);
+	gc = devm_kzalloc(dev, struct_size(gc, chip_types, num_ct), GFP_KERNEL);
 	if (gc)
 		irq_init_generic_chip(gc, name, num_ct,
 				      irq_base, reg_base, handler);
-- 
GitLab


From 4302e381a870aafb547e6139830e5a4ee2cb8261 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 15 Apr 2019 12:47:46 +0100
Subject: [PATCH 1218/1861] firmware/psci: add support for SYSTEM_RESET2

PSCI v1.1 introduced SYSTEM_RESET2 to allow both architectural resets
where the semantics are described by the PSCI specification itself as
well as vendor-specific resets. Currently only system warm reset
semantics is defined as part of architectural resets by the specification.

This patch implements support for SYSTEM_RESET2 by making using of
reboot_mode passed by the reboot infrastructure in the kernel.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/firmware/psci/psci.c | 24 +++++++++++++++++++++++-
 include/uapi/linux/psci.h    |  2 ++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index eabd01383cd6e..fe090ef43d286 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -88,6 +88,7 @@ static u32 psci_function_id[PSCI_FN_MAX];
 				PSCI_1_0_EXT_POWER_STATE_TYPE_MASK)
 
 static u32 psci_cpu_suspend_feature;
+static bool psci_system_reset2_supported;
 
 static inline bool psci_has_ext_power_state(void)
 {
@@ -258,7 +259,17 @@ static int get_set_conduit_method(struct device_node *np)
 
 static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd)
 {
-	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+	if ((reboot_mode == REBOOT_WARM || reboot_mode == REBOOT_SOFT) &&
+	    psci_system_reset2_supported) {
+		/*
+		 * reset_type[31] = 0 (architectural)
+		 * reset_type[30:0] = 0 (SYSTEM_WARM_RESET)
+		 * cookie = 0 (ignored by the implementation)
+		 */
+		invoke_psci_fn(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2), 0, 0, 0);
+	} else {
+		invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
+	}
 }
 
 static void psci_sys_poweroff(void)
@@ -460,6 +471,16 @@ static const struct platform_suspend_ops psci_suspend_ops = {
 	.enter          = psci_system_suspend_enter,
 };
 
+static void __init psci_init_system_reset2(void)
+{
+	int ret;
+
+	ret = psci_features(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2));
+
+	if (ret != PSCI_RET_NOT_SUPPORTED)
+		psci_system_reset2_supported = true;
+}
+
 static void __init psci_init_system_suspend(void)
 {
 	int ret;
@@ -597,6 +618,7 @@ static int __init psci_probe(void)
 		psci_init_smccc();
 		psci_init_cpu_suspend();
 		psci_init_system_suspend();
+		psci_init_system_reset2();
 	}
 
 	return 0;
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
index 581f72085c334..2fcad1dd0b0e0 100644
--- a/include/uapi/linux/psci.h
+++ b/include/uapi/linux/psci.h
@@ -50,8 +50,10 @@
 #define PSCI_1_0_FN_PSCI_FEATURES		PSCI_0_2_FN(10)
 #define PSCI_1_0_FN_SYSTEM_SUSPEND		PSCI_0_2_FN(14)
 #define PSCI_1_0_FN_SET_SUSPEND_MODE		PSCI_0_2_FN(15)
+#define PSCI_1_1_FN_SYSTEM_RESET2		PSCI_0_2_FN(18)
 
 #define PSCI_1_0_FN64_SYSTEM_SUSPEND		PSCI_0_2_FN64(14)
+#define PSCI_1_1_FN64_SYSTEM_RESET2		PSCI_0_2_FN64(18)
 
 /* PSCI v0.2 power state encoding for CPU_SUSPEND function */
 #define PSCI_0_2_POWER_STATE_ID_MASK		0xffff
-- 
GitLab


From eae2ef0ed2460c56915b13e7d67bb46ae379ec72 Mon Sep 17 00:00:00 2001
From: Mohan Kumar <mohankumar718@gmail.com>
Date: Mon, 15 Apr 2019 14:03:58 +0300
Subject: [PATCH 1219/1861] drivers/cpufreq/acpi-cpufreq.c: This fixes the
 following checkpatch warning

WARNING: Prefer using '"%s...", __func__' to using function's name, in a
string

Switch hardcoded function name with a reference to __func__ making the
code more maintainable

Signed-off-by: Mohan Kumar <mohankumar718@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index c72258a44ba4d..73bb2aafb1a8f 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -366,7 +366,7 @@ static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *dat
 
 	val = drv_read(data, mask);
 
-	pr_debug("get_cur_val = %u\n", val);
+	pr_debug("%s = %u\n", __func__, val);
 
 	return val;
 }
@@ -378,7 +378,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	unsigned int freq;
 	unsigned int cached_freq;
 
-	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
+	pr_debug("%s (%d)\n", __func__, cpu);
 
 	policy = cpufreq_cpu_get_raw(cpu);
 	if (unlikely(!policy))
@@ -458,8 +458,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	if (acpi_pstate_strict) {
 		if (!check_freqs(policy, mask,
 				 policy->freq_table[index].frequency)) {
-			pr_debug("acpi_cpufreq_target failed (%d)\n",
-				policy->cpu);
+			pr_debug("%s (%d)\n", __func__, policy->cpu);
 			result = -EAGAIN;
 		}
 	}
@@ -573,7 +572,7 @@ static int cpufreq_boost_down_prep(unsigned int cpu)
 static int __init acpi_cpufreq_early_init(void)
 {
 	unsigned int i;
-	pr_debug("acpi_cpufreq_early_init\n");
+	pr_debug("%s\n", __func__);
 
 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
 	if (!acpi_perf_data) {
@@ -657,7 +656,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	static int blacklisted;
 #endif
 
-	pr_debug("acpi_cpufreq_cpu_init\n");
+	pr_debug("%s\n", __func__);
 
 #ifdef CONFIG_SMP
 	if (blacklisted)
@@ -856,7 +855,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = policy->driver_data;
 
-	pr_debug("acpi_cpufreq_cpu_exit\n");
+	pr_debug("%s\n", __func__);
 
 	policy->fast_switch_possible = false;
 	policy->driver_data = NULL;
@@ -881,7 +880,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = policy->driver_data;
 
-	pr_debug("acpi_cpufreq_resume\n");
+	pr_debug("%s\n", __func__);
 
 	data->resume = 1;
 
@@ -954,7 +953,7 @@ static int __init acpi_cpufreq_init(void)
 	if (cpufreq_get_current_driver())
 		return -EEXIST;
 
-	pr_debug("acpi_cpufreq_init\n");
+	pr_debug("%s\n", __func__);
 
 	ret = acpi_cpufreq_early_init();
 	if (ret)
@@ -991,7 +990,7 @@ static int __init acpi_cpufreq_init(void)
 
 static void __exit acpi_cpufreq_exit(void)
 {
-	pr_debug("acpi_cpufreq_exit\n");
+	pr_debug("%s\n", __func__);
 
 	acpi_cpufreq_boost_exit();
 
-- 
GitLab


From b23aa311fa1f376640069e5da897b4a6148d104f Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Tue, 16 Apr 2019 10:40:27 +0800
Subject: [PATCH 1220/1861] cpufreq: Remove needless bios_limit check in
 show_bios_limit()

Initially, bios_limit attribute will be created if driver->bios_limit
is set in cpufreq_add_dev_interface(). So remove the redundant check
for latter show operation.

Signed-off-by: Yue Hu <huyue2@yulong.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3f235d5f67ee8..df7d112fe6218 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -894,11 +894,9 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
 {
 	unsigned int limit;
 	int ret;
-	if (cpufreq_driver->bios_limit) {
-		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
-		if (!ret)
-			return sprintf(buf, "%u\n", limit);
-	}
+	ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
+	if (!ret)
+		return sprintf(buf, "%u\n", limit);
 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
 }
 
-- 
GitLab


From bd2e98b351b668fa914b46cc77040fdb2a817c06 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Tue, 16 Apr 2019 16:26:45 -0500
Subject: [PATCH 1221/1861] ipmi: Fix failure on SMBIOS specified devices

An extra memset was put into a place that cleared the interface
type.

Reported-by: Tony Camuso <tcamuso@redhat.com>
Fixes: 3cd83bac481dc4 ("ipmi: Consolidate the adding of platform devices")
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_dmi.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c
index ff0b199be4729..f2411468f33ff 100644
--- a/drivers/char/ipmi/ipmi_dmi.c
+++ b/drivers/char/ipmi/ipmi_dmi.c
@@ -66,7 +66,6 @@ static void __init dmi_add_platform_ipmi(unsigned long base_addr,
 		return;
 	}
 
-	memset(&p, 0, sizeof(p));
 	p.addr = base_addr;
 	p.space = space;
 	p.regspacing = offset;
-- 
GitLab


From a885bcfd152f97b25005298ab2d6b741aed9b49c Mon Sep 17 00:00:00 2001
From: Tony Camuso <tcamuso@redhat.com>
Date: Tue, 9 Apr 2019 15:20:03 -0400
Subject: [PATCH 1222/1861] ipmi: ipmi_si_hardcode.c: init si_type array to fix
 a crash

The intended behavior of function ipmi_hardcode_init_one() is to default
to kcs interface when no type argument is presented when initializing
ipmi with hard coded addresses.

However, the array of char pointers allocated on the stack by function
ipmi_hardcode_init() was not inited to zeroes, so it contained stack
debris.

Consequently, passing the cruft stored in this array to function
ipmi_hardcode_init_one() caused a crash when it was unable to detect
that the char * being passed was nonsense and tried to access the
address specified by the bogus pointer.

The fix is simply to initialize the si_type array to zeroes, so if
there were no type argument given to at the command line, function
ipmi_hardcode_init_one() could properly default to the kcs interface.

Signed-off-by: Tony Camuso <tcamuso@redhat.com>
Message-Id: <1554837603-40299-1-git-send-email-tcamuso@redhat.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_hardcode.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/ipmi/ipmi_si_hardcode.c b/drivers/char/ipmi/ipmi_si_hardcode.c
index 01946cad3d138..682221eebd661 100644
--- a/drivers/char/ipmi/ipmi_si_hardcode.c
+++ b/drivers/char/ipmi/ipmi_si_hardcode.c
@@ -118,6 +118,8 @@ void __init ipmi_hardcode_init(void)
 	char *str;
 	char *si_type[SI_MAX_PARMS];
 
+	memset(si_type, 0, sizeof(si_type));
+
 	/* Parse out the si_type string into its components. */
 	str = si_type_str;
 	if (*str != '\0') {
-- 
GitLab


From a1e8783db8e0d58891681bc1e6d9ada66eae8e20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20=C5=A0tetiar?= <ynezz@true.cz>
Date: Fri, 12 Apr 2019 23:08:32 +0200
Subject: [PATCH 1223/1861] MIPS: perf: ath79: Fix perfcount IRQ assignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently it's not possible to use perf on ath79 due to genirq flags
mismatch happening on static virtual IRQ 13 which is used for
performance counters hardware IRQ 5.

On TP-Link Archer C7v5:

           CPU0
  2:          0      MIPS   2  ath9k
  4:        318      MIPS   4  19000000.eth
  7:      55034      MIPS   7  timer
  8:       1236      MISC   3  ttyS0
 12:          0      INTC   1  ehci_hcd:usb1
 13:          0  gpio-ath79   2  keys
 14:          0  gpio-ath79   5  keys
 15:         31  AR724X PCI    1  ath10k_pci

 $ perf top
 genirq: Flags mismatch irq 13. 00014c83 (mips_perf_pmu) vs. 00002003 (keys)

On TP-Link Archer C7v4:

         CPU0
  4:          0      MIPS   4  19000000.eth
  5:       7135      MIPS   5  1a000000.eth
  7:      98379      MIPS   7  timer
  8:         30      MISC   3  ttyS0
 12:      90028      INTC   0  ath9k
 13:       5520      INTC   1  ehci_hcd:usb1
 14:       4623      INTC   2  ehci_hcd:usb2
 15:      32844  AR724X PCI    1  ath10k_pci
 16:          0  gpio-ath79  16  keys
 23:          0  gpio-ath79  23  keys

 $ perf top
 genirq: Flags mismatch irq 13. 00014c80 (mips_perf_pmu) vs. 00000080 (ehci_hcd:usb1)

This problem is happening, because currently statically assigned virtual
IRQ 13 for performance counters is not claimed during the initialization
of MIPS PMU during the bootup, so the IRQ subsystem doesn't know, that
this interrupt isn't available for further use.

So this patch fixes the issue by simply booking hardware IRQ 5 for MIPS PMU.

Tested-by: Kevin 'ldir' Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Signed-off-by: Petr Štetiar <ynezz@true.cz>
Acked-by: John Crispin <john@phrozen.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
---
 arch/mips/ath79/setup.c          |  6 ------
 drivers/irqchip/irq-ath79-misc.c | 11 +++++++++++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 4a70c5de8c929..25a57895a3a35 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -210,12 +210,6 @@ const char *get_system_type(void)
 	return ath79_sys_type;
 }
 
-int get_c0_perfcount_int(void)
-{
-	return ATH79_MISC_IRQ(5);
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
 unsigned int get_c0_compare_int(void)
 {
 	return CP0_LEGACY_COMPARE_IRQ;
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c
index aa72907846360..0390603170b40 100644
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -22,6 +22,15 @@
 #define AR71XX_RESET_REG_MISC_INT_ENABLE	4
 
 #define ATH79_MISC_IRQ_COUNT			32
+#define ATH79_MISC_PERF_IRQ			5
+
+static int ath79_perfcount_irq;
+
+int get_c0_perfcount_int(void)
+{
+	return ath79_perfcount_irq;
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
 
 static void ath79_misc_irq_handler(struct irq_desc *desc)
 {
@@ -113,6 +122,8 @@ static void __init ath79_misc_intc_domain_init(
 {
 	void __iomem *base = domain->host_data;
 
+	ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ);
+
 	/* Disable and clear all interrupts */
 	__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
 	__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-- 
GitLab


From 3b2e2904deb314cc77a2192f506f2fd44e3d10d0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Thu, 11 Apr 2019 13:56:39 +0300
Subject: [PATCH 1224/1861] net: bridge: fix per-port af_packet sockets

When the commit below was introduced it changed two visible things:
 - the skb was no longer passed through the protocol handlers with the
   original device
 - the skb was passed up the stack with skb->dev = bridge

The first change broke af_packet sockets on bridge ports. For example we
use them for hostapd which listens for ETH_P_PAE packets on the ports.
We discussed two possible fixes:
 - create a clone and pass it through NF_HOOK(), act on the original skb
   based on the result
 - somehow signal to the caller from the okfn() that it was called,
   meaning the skb is ok to be passed, which this patch is trying to
   implement via returning 1 from the bridge link-local okfn()

Note that we rely on the fact that NF_QUEUE/STOLEN would return 0 and
drop/error would return < 0 thus the okfn() is called only when the
return was 1, so we signal to the caller that it was called by preserving
the return value from nf_hook().

Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 5ea7e56119c13..ba303ee99b9b5 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -197,13 +197,10 @@ static void __br_handle_local_finish(struct sk_buff *skb)
 /* note: already called with rcu_read_lock */
 static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
-
 	__br_handle_local_finish(skb);
 
-	BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
-	br_pass_frame_up(skb);
-	return 0;
+	/* return 1 to signal the okfn() was called so it's ok to use the skb */
+	return 1;
 }
 
 /*
@@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 				goto forward;
 		}
 
-		/* Deliver packet to local host only */
-		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
-			NULL, skb, skb->dev, NULL, br_handle_local_finish);
-		return RX_HANDLER_CONSUMED;
+		/* The else clause should be hit when nf_hook():
+		 *   - returns < 0 (drop/error)
+		 *   - returns = 0 (stolen/nf_queue)
+		 * Thus return 1 from the okfn() to signal the skb is ok to pass
+		 */
+		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+			    dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+			    br_handle_local_finish) == 1) {
+			return RX_HANDLER_PASS;
+		} else {
+			return RX_HANDLER_CONSUMED;
+		}
 	}
 
 forward:
-- 
GitLab


From ad910c7c01269f229a97c335f2dc669fff750f65 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 15 Apr 2019 19:14:45 +0200
Subject: [PATCH 1225/1861] net/core: work around section mismatch warning for
 ptp_classifier

The routine ptp_classifier_init() uses an initializer for an
automatic struct type variable which refers to an __initdata
symbol. This is perfectly legal, but may trigger a section
mismatch warning when running the compiler in -fpic mode, due
to the fact that the initializer may be emitted into an anonymous
.data section thats lack the __init annotation. So work around it
by using assignments instead.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ptp_classifier.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 703cf76aa7c2d..7109c168b5e0f 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -185,9 +185,10 @@ void __init ptp_classifier_init(void)
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 	};
-	struct sock_fprog_kern ptp_prog = {
-		.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
-	};
+	struct sock_fprog_kern ptp_prog;
+
+	ptp_prog.len = ARRAY_SIZE(ptp_filter);
+	ptp_prog.filter = ptp_filter;
 
 	BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
 }
-- 
GitLab


From 899537b73557aafbdd11050b501cf54b4f5c45af Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 15 Apr 2019 15:57:23 -0500
Subject: [PATCH 1226/1861] net: atm: Fix potential Spectre v1 vulnerabilities

arg is controlled by user-space, hence leading to a potential
exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

net/atm/lec.c:715 lec_mcast_attach() warn: potential spectre issue 'dev_lec' [r] (local cap)

Fix this by sanitizing arg before using it to index dev_lec.

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/lec.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/atm/lec.c b/net/atm/lec.c
index d7f5cf5b7594d..ad4f829193f05 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
 
 static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
 {
-	if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
+	if (arg < 0 || arg >= MAX_LEC_ITF)
+		return -EINVAL;
+	arg = array_index_nospec(arg, MAX_LEC_ITF);
+	if (!dev_lec[arg])
 		return -EINVAL;
 	vcc->proto_data = dev_lec[arg];
 	return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
@@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
 		i = arg;
 	if (arg >= MAX_LEC_ITF)
 		return -EINVAL;
+	i = array_index_nospec(arg, MAX_LEC_ITF);
 	if (!dev_lec[i]) {
 		int size;
 
-- 
GitLab


From d85e8be2a5a02869f815dd0ac2d743deb4cd7957 Mon Sep 17 00:00:00 2001
From: Yuya Kusakabe <yuya.kusakabe@gmail.com>
Date: Tue, 16 Apr 2019 10:22:28 +0900
Subject: [PATCH 1227/1861] net: Fix missing meta data in skb with vlan packet

skb_reorder_vlan_header() should move XDP meta data with ethernet header
if XDP meta data exists.

Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
Signed-off-by: Takeru Hayasaka <taketarou2@gmail.com>
Co-developed-by: Takeru Hayasaka <taketarou2@gmail.com>
Reviewed-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ef2cd57120989..40796b8bf8204 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
 
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
-	int mac_len;
+	int mac_len, meta_len;
+	void *meta;
 
 	if (skb_cow(skb, skb_headroom(skb)) < 0) {
 		kfree_skb(skb);
@@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 		memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
 			mac_len - VLAN_HLEN - ETH_TLEN);
 	}
+
+	meta_len = skb_metadata_len(skb);
+	if (meta_len) {
+		meta = skb_metadata_end(skb) - meta_len;
+		memmove(meta + VLAN_HLEN, meta, meta_len);
+	}
+
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
-- 
GitLab


From f7a937801b9f8788519a23b12cb4d6c2c84d84be Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Tue, 16 Apr 2019 10:48:07 +0700
Subject: [PATCH 1228/1861] tipc: fix link established but not in session

According to the link FSM, when a link endpoint got RESET_MSG (- a
traditional one without the stopping bit) from its peer, it moves to
PEER_RESET state and raises a LINK_DOWN event which then resets the
link itself. Its state will become ESTABLISHING after the reset event
and the link will be re-established soon after this endpoint starts to
send ACTIVATE_MSG to the peer.

There is no problem with this mechanism, however the link resetting has
cleared the link 'in_session' flag (along with the other important link
data such as: the link 'mtu') that was correctly set up at the 1st step
(i.e. when this endpoint received the peer RESET_MSG). As a result, the
link will become ESTABLISHED, but the 'in_session' flag is not set, and
all STATE_MSG from its peer will be dropped at the link_validate_msg().
It means the link not synced and will sooner or later face a failure.

Since the link reset action is obviously needed for a new link session
(this is also true in the other situations), the problem here is that
the link is re-established a bit too early when the link endpoints are
not really in-sync yet. The commit forces a resync as already done in
the previous commit 91986ee166cf ("tipc: fix link session and
re-establish issues") by simply varying the link 'peer_session' value
at the link_reset().

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 341ecd796aa47..131aa2f0fd27c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -869,6 +869,8 @@ void tipc_link_reset(struct tipc_link *l)
 	__skb_queue_head_init(&list);
 
 	l->in_session = false;
+	/* Force re-synch of peer session number before establishing */
+	l->peer_session--;
 	l->session++;
 	l->mtu = l->advertised_mtu;
 
-- 
GitLab


From 4bcd4ec1017205644a2697bccbc3b5143f522f5f Mon Sep 17 00:00:00 2001
From: Jie Liu <liujie165@huawei.com>
Date: Tue, 16 Apr 2019 13:10:09 +0800
Subject: [PATCH 1229/1861] tipc: set sysctl_tipc_rmem and named_timeout right
 range

We find that sysctl_tipc_rmem and named_timeout do not have the right minimum
setting. sysctl_tipc_rmem should be larger than zero, like sysctl_tcp_rmem.
And named_timeout as a timeout setting should be not less than zero.

Fixes: cc79dd1ba9c10 ("tipc: change socket buffer overflow control to respect sk_rcvbuf")
Fixes: a5325ae5b8bff ("tipc: add name distributor resiliency queue")
Signed-off-by: Jie Liu <liujie165@huawei.com>
Reported-by: Qiang Ning <ningqiang1@huawei.com>
Reviewed-by: Zhiqiang Liu <liuzhiqiang26@huawei.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/sysctl.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 3481e4906bd6a..9df82a573aa77 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -38,6 +38,8 @@
 
 #include <linux/sysctl.h>
 
+static int zero;
+static int one = 1;
 static struct ctl_table_header *tipc_ctl_hdr;
 
 static struct ctl_table tipc_table[] = {
@@ -46,14 +48,16 @@ static struct ctl_table tipc_table[] = {
 		.data		= &sysctl_tipc_rmem,
 		.maxlen		= sizeof(sysctl_tipc_rmem),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1         = &one,
 	},
 	{
 		.procname	= "named_timeout",
 		.data		= &sysctl_tipc_named_timeout,
 		.maxlen		= sizeof(sysctl_tipc_named_timeout),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1         = &zero,
 	},
 	{
 		.procname       = "sk_filter",
-- 
GitLab


From 3321b6c23fb330e690ed5cf1336c827e07843200 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 16 Apr 2019 12:43:17 +0100
Subject: [PATCH 1230/1861] qed: fix spelling mistake "faspath" -> "fastpath"

There is a spelling mistake in a DP_INFO message, fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_sriov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 9faaa6df78ed9..2f318aaf2b05d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -1591,7 +1591,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 			p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN;
 		} else {
 			DP_INFO(p_hwfn,
-				"VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
+				"VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's fastpath HSI %02x.%02x\n",
 				vf->abs_vf_id,
 				req->vfdev_info.eth_fp_hsi_major,
 				req->vfdev_info.eth_fp_hsi_minor,
-- 
GitLab


From 600bea7dba1a72874ae0cd9bc66bf2abfe43b49d Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Tue, 16 Apr 2019 16:15:56 +0300
Subject: [PATCH 1231/1861] net: bridge: fix netlink export of
 vlan_stats_per_port option

Since the introduction of the vlan_stats_per_port option the netlink
export of it has been broken since I made a typo and used the ifla
attribute instead of the bridge option to retrieve its state.
Sysfs export is fine, only netlink export has been affected.

Fixes: 9163a0fc1f0c0 ("net: bridge: add support for per-port vlan stats")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 9c07591b0232e..7104cf13da840 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	    nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED,
 		       br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) ||
 	    nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT,
-		       br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT)))
+		       br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)))
 		return -EMSGSIZE;
 #endif
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-- 
GitLab


From a8fd48b50deaa20808bbf0f6685f6f1acba6a64c Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Tue, 16 Apr 2019 17:51:58 +0300
Subject: [PATCH 1232/1861] ocelot: Don't sleep in atomic context
 (irqs_disabled())

Preemption disabled at:
 [<ffff000008cabd54>] dev_set_rx_mode+0x1c/0x38
 Call trace:
 [<ffff00000808a5c0>] dump_backtrace+0x0/0x3d0
 [<ffff00000808a9a4>] show_stack+0x14/0x20
 [<ffff000008e6c0c0>] dump_stack+0xac/0xe4
 [<ffff0000080fe76c>] ___might_sleep+0x164/0x238
 [<ffff0000080fe890>] __might_sleep+0x50/0x88
 [<ffff0000082261e4>] kmem_cache_alloc+0x17c/0x1d0
 [<ffff000000ea0ae8>] ocelot_set_rx_mode+0x108/0x188 [mscc_ocelot_common]
 [<ffff000008cabcf0>] __dev_set_rx_mode+0x58/0xa0
 [<ffff000008cabd5c>] dev_set_rx_mode+0x24/0x38

Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support")

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index a1d0d6e425332..6cb2f03b67e66 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -613,7 +613,7 @@ static int ocelot_mact_mc_add(struct ocelot_port *port,
 			      struct netdev_hw_addr *hw_addr)
 {
 	struct ocelot *ocelot = port->ocelot;
-	struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL);
+	struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_ATOMIC);
 
 	if (!ha)
 		return -ENOMEM;
-- 
GitLab


From 1e1caa9735f90b5452fc48685c23552e56aa1920 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Tue, 16 Apr 2019 17:51:59 +0300
Subject: [PATCH 1233/1861] ocelot: Clean up stats update deferred work

This is preventive cleanup that may save troubles later.
No need to cancel repeateadly queued work if code is properly
refactored.
Don't let the ethtool -s process interfere with the stat workqueue
scheduling.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 6cb2f03b67e66..d715ef4fc92fd 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -959,10 +959,8 @@ static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
 		       ETH_GSTRING_LEN);
 }
 
-static void ocelot_check_stats(struct work_struct *work)
+static void ocelot_update_stats(struct ocelot *ocelot)
 {
-	struct delayed_work *del_work = to_delayed_work(work);
-	struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work);
 	int i, j;
 
 	mutex_lock(&ocelot->stats_lock);
@@ -986,11 +984,19 @@ static void ocelot_check_stats(struct work_struct *work)
 		}
 	}
 
-	cancel_delayed_work(&ocelot->stats_work);
+	mutex_unlock(&ocelot->stats_lock);
+}
+
+static void ocelot_check_stats_work(struct work_struct *work)
+{
+	struct delayed_work *del_work = to_delayed_work(work);
+	struct ocelot *ocelot = container_of(del_work, struct ocelot,
+					     stats_work);
+
+	ocelot_update_stats(ocelot);
+
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
-
-	mutex_unlock(&ocelot->stats_lock);
 }
 
 static void ocelot_get_ethtool_stats(struct net_device *dev,
@@ -1001,7 +1007,7 @@ static void ocelot_get_ethtool_stats(struct net_device *dev,
 	int i;
 
 	/* check and update now */
-	ocelot_check_stats(&ocelot->stats_work.work);
+	ocelot_update_stats(ocelot);
 
 	/* Copy all counters */
 	for (i = 0; i < ocelot->num_stats; i++)
@@ -1809,7 +1815,7 @@ int ocelot_init(struct ocelot *ocelot)
 				 ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
 				 ANA_CPUQ_8021_CFG, i);
 
-	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats);
+	INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
 	return 0;
-- 
GitLab


From 50ce163a72d817a99e8974222dcf2886d5deb1ae Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 16 Apr 2019 10:55:20 -0700
Subject: [PATCH 1234/1861] tcp: tcp_grow_window() needs to respect tcp_space()

For some reason, tcp_grow_window() correctly tests if enough room
is present before attempting to increase tp->rcv_ssthresh,
but does not prevent it to grow past tcp_space()

This is causing hard to debug issues, like failing
the (__tcp_select_window(sk) >= tp->rcv_wnd) test
in __tcp_ack_snd_check(), causing ACK delays and possibly
slow flows.

Depending on tcp_rmem[2], MTU, skb->len/skb->truesize ratio,
we can see the problem happening on "netperf -t TCP_RR -- -r 2000,2000"
after about 60 round trips, when the active side no longer sends
immediate acks.

This bug predates git history.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Wei Wang <weiwan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5def3c48870e1..731d3045b50a0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	int room;
+
+	room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
 
 	/* Check #1 */
-	if (tp->rcv_ssthresh < tp->window_clamp &&
-	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
-	    !tcp_under_memory_pressure(sk)) {
+	if (room > 0 && !tcp_under_memory_pressure(sk)) {
 		int incr;
 
 		/* Check #2. Increase window, if skb with such overhead
@@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 
 		if (incr) {
 			incr = max_t(int, incr, 2 * skb->len);
-			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
-					       tp->window_clamp);
+			tp->rcv_ssthresh += min(room, incr);
 			inet_csk(sk)->icsk_ack.quick |= 1;
 		}
 	}
-- 
GitLab


From e6986423d28362aafe64d3757bbbc493f2687f8f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Apr 2019 22:31:14 +0200
Subject: [PATCH 1235/1861] socket: fix compat SO_RCVTIMEO_NEW/SO_SNDTIMEO_NEW

It looks like the new socket options only work correctly
for native execution, but in case of compat mode fall back
to the old behavior as we ignore the 'old_timeval' flag.

Rework so we treat SO_RCVTIMEO_NEW/SO_SNDTIMEO_NEW the
same way in compat and native 32-bit mode.

Cc: Deepa Dinamani <deepa.kernel@gmail.com>
Fixes: a9beb86ae6e5 ("sock: Add SO_RCVTIMEO_NEW and SO_SNDTIMEO_NEW")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 782343bb925b6..067878a1e4c51 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -348,7 +348,7 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
 		tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
 	}
 
-	if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
+	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
 		struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
 		*(struct old_timeval32 *)optval = tv32;
 		return sizeof(tv32);
@@ -372,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool
 {
 	struct __kernel_sock_timeval tv;
 
-	if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
+	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
 		struct old_timeval32 tv32;
 
 		if (optlen < sizeof(tv32))
-- 
GitLab


From 83f8bf4b837b0e3417f0e5c717a43fcf71ecc992 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 16 Apr 2019 14:43:26 +0200
Subject: [PATCH 1236/1861] drm/tegra: hdmi: Setup audio only if configured

The audio configuration is only valid if the HDMI codec has been
properly set up. Do not attempt to set up audio before that happens
because it causes a division by zero.

Note that this is only problematic on Tegra20 and Tegra30. Later chips
implement the division instructions which return zero when dividing by
zero and don't throw an exception.

Fixes: db5adf4d6dce ("drm/tegra: hdmi: Fix audio to work with any pixel clock rate")
Reported-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/hdmi.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 47c55974756d5..d23c4bfde790c 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -1260,9 +1260,15 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 
 	hdmi->dvi = !tegra_output_is_hdmi(output);
 	if (!hdmi->dvi) {
-		err = tegra_hdmi_setup_audio(hdmi);
-		if (err < 0)
-			hdmi->dvi = true;
+		/*
+		 * Make sure that the audio format has been configured before
+		 * enabling audio, otherwise we may try to divide by zero.
+		*/
+		if (hdmi->format.sample_rate > 0) {
+			err = tegra_hdmi_setup_audio(hdmi);
+			if (err < 0)
+				hdmi->dvi = true;
+		}
 	}
 
 	if (hdmi->config->has_hda)
-- 
GitLab


From 5f21f3055a32878c0c9d9ebbbafef72d60a1ff49 Mon Sep 17 00:00:00 2001
From: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Date: Mon, 1 Apr 2019 16:26:35 +0800
Subject: [PATCH 1237/1861] ACPI: property: restore _DSD data subnodes GUID
 comment

Commit 5f5e4890d57a ("ACPI / property: Allow multiple property
compatible _DSD entries") removed the comment of _DSD data subnodes
GUID.  Restore it.

Fixes: 5f5e4890d57a ("ACPI / property: Allow multiple property compatible _DSD entries")
Signed-off-by: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 77abe0ec40431..8832d0e13a72d 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -44,6 +44,7 @@ static const guid_t prp_guids[] = {
 		  0xbf, 0xf0, 0x76, 0x14, 0x38, 0x07, 0xc3, 0x89),
 };
 
+/* ACPI _DSD data subnodes GUID: dbb8e3e6-5886-4ba6-8795-1319f52a966b */
 static const guid_t ads_guid =
 	GUID_INIT(0xdbb8e3e6, 0x5886, 0x4ba6,
 		  0x87, 0x95, 0x13, 0x19, 0xf5, 0x2a, 0x96, 0x6b);
-- 
GitLab


From 35af0d469c6694c05f06e75c5d75caee9be66122 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 15 Apr 2019 12:41:08 +0200
Subject: [PATCH 1238/1861] s390: correct some inline assembly constraints

Inline assembly code changed in this patch should really use "Q"
constraint "Memory reference without index register and with short
displacement". The kernel build with kasan instrumentation enabled
might occasionally break otherwise (due to stack instrumentation).

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/fpu.c   | 2 +-
 arch/s390/kernel/vtime.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 594464f2129d4..0da378e2eb25e 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -23,7 +23,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 
 	if (flags & KERNEL_FPC)
 		/* Save floating point control */
-		asm volatile("stfpc %0" : "=m" (state->fpc));
+		asm volatile("stfpc %0" : "=Q" (state->fpc));
 
 	if (!MACHINE_HAS_VX) {
 		if (flags & KERNEL_VXR_V0V7) {
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index a69a0911ed0e8..c475ca49cfc6b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -37,7 +37,7 @@ static inline u64 get_vtimer(void)
 {
 	u64 timer;
 
-	asm volatile("stpt %0" : "=m" (timer));
+	asm volatile("stpt %0" : "=Q" (timer));
 	return timer;
 }
 
@@ -48,7 +48,7 @@ static inline void set_vtimer(u64 expires)
 	asm volatile(
 		"	stpt	%0\n"	/* Store current cpu timer value */
 		"	spt	%1"	/* Set new value imm. afterwards */
-		: "=m" (timer) : "m" (expires));
+		: "=Q" (timer) : "Q" (expires));
 	S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
 	S390_lowcore.last_update_timer = expires;
 }
@@ -135,8 +135,8 @@ static int do_account_vtime(struct task_struct *tsk)
 #else
 		"	stck	%1"	/* Store current tod clock value */
 #endif
-		: "=m" (S390_lowcore.last_update_timer),
-		  "=m" (S390_lowcore.last_update_clock));
+		: "=Q" (S390_lowcore.last_update_timer),
+		  "=Q" (S390_lowcore.last_update_clock));
 	clock = S390_lowcore.last_update_clock - clock;
 	timer -= S390_lowcore.last_update_timer;
 
-- 
GitLab


From b26e36b7ef36a8a3a147b1609b2505f8a4ecf511 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 17 Apr 2019 16:10:32 +0800
Subject: [PATCH 1239/1861] ALSA: hda/realtek - add two more pin configuration
 sets to quirk table

We have two Dell laptops which have the codec 10ec0236 and 10ec0256
respectively, the headset mic on them can't work, need to apply the
quirk of ALC255_FIXUP_DELL1_MIC_NO_PRESENCE. So adding their pin
configurations in the pin quirk table.

Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8104797660903..f5b510f119edd 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7266,6 +7266,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x12, 0x90a60140},
 		{0x14, 0x90170150},
 		{0x21, 0x02211020}),
+	SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x21, 0x02211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
 		{0x14, 0x90170110},
 		{0x21, 0x02211020}),
@@ -7376,6 +7378,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x21, 0x0221101f}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC256_STANDARD_PINS),
+	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x14, 0x90170110},
+		{0x1b, 0x01011020},
+		{0x21, 0x0221101f}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC,
 		{0x14, 0x90170110},
 		{0x1b, 0x90a70130},
-- 
GitLab


From 8adddf349fda0d3de2f6bb41ddf838cbf36a8ad2 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 16 Apr 2019 23:59:02 +1000
Subject: [PATCH 1240/1861] powerpc/mm/radix: Make Radix require HUGETLB_PAGE

Joel reported weird crashes using skiroot_defconfig, in his case we
jumped into an NX page:

  kernel tried to execute exec-protected page (c000000002bff4f0) - exploit attempt? (uid: 0)
  BUG: Unable to handle kernel instruction fetch
  Faulting instruction address: 0xc000000002bff4f0

Looking at the disassembly, we had simply branched to that address:

  c000000000c001bc  49fff335    bl     c000000002bff4f0

But that didn't match the original kernel image:

  c000000000c001bc  4bfff335    bl     c000000000bff4f0 <kobject_get+0x8>

When STRICT_KERNEL_RWX is enabled, and we're using the radix MMU, we
call radix__change_memory_range() late in boot to change page
protections. We do that both to mark rodata read only and also to mark
init text no-execute. That involves walking the kernel page tables,
and clearing _PAGE_WRITE or _PAGE_EXEC respectively.

With radix we may use hugepages for the linear mapping, so the code in
radix__change_memory_range() uses eg. pmd_huge() to test if it has
found a huge mapping, and if so it stops the page table walk and
changes the PMD permissions.

However if the kernel is built without HUGETLBFS support, pmd_huge()
is just a #define that always returns 0. That causes the code in
radix__change_memory_range() to incorrectly interpret the PMD value as
a pointer to a PTE page rather than as a PTE at the PMD level.

We can see this using `dv` in xmon which also uses pmd_huge():

  0:mon> dv c000000000000000
  pgd  @ 0xc000000001740000
  pgdp @ 0xc000000001740000 = 0x80000000ffffb009
  pudp @ 0xc0000000ffffb000 = 0x80000000ffffa009
  pmdp @ 0xc0000000ffffa000 = 0xc00000000000018f   <- this is a PTE
  ptep @ 0xc000000000000100 = 0xa64bb17da64ab07d   <- kernel text

The end result is we treat the value at 0xc000000000000100 as a PTE
and clear _PAGE_WRITE or _PAGE_EXEC, potentially corrupting the code
at that address.

In Joel's specific case we cleared the sign bit in the offset of the
branch, causing a backward branch to turn into a forward branch which
caused us to branch into a non-executable page. However the exact
nature of the crash depends on kernel version, compiler version, and
other factors.

We need to fix radix__change_memory_range() to not use accessors that
depend on HUGETLBFS, but we also have radix memory hotplug code that
uses pmd_huge() etc that will also need fixing. So for now just
disallow the broken combination of Radix with HUGETLBFS disabled.

The only defconfig we have that is affected is skiroot_defconfig, so
turn on HUGETLBFS there so that it still gets Radix.

Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()")
Cc: stable@vger.kernel.org # v4.7+
Reported-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/skiroot_defconfig | 1 +
 arch/powerpc/platforms/Kconfig.cputype | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 5ba131c30f6bc..1bcd468ab422d 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -266,6 +266,7 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
+CONFIG_HUGETLBFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_NLS=y
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 842b2c7e156ab..50cd09b4e05d5 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -324,7 +324,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 
 config PPC_RADIX_MMU
 	bool "Radix MMU Support"
-	depends on PPC_BOOK3S_64
+	depends on PPC_BOOK3S_64 && HUGETLB_PAGE
 	select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
 	default y
 	help
-- 
GitLab


From 80552f0f7aebdd8deda8ea455292cbfbf462d655 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Tue, 16 Apr 2019 10:22:57 -0400
Subject: [PATCH 1241/1861] mm/slab: Remove store_stackinfo()

store_stackinfo() does not seem used in actual SLAB debugging.
Potentially, it could be added to check_poison_obj() to provide more
information but this seems like an overkill due to the declining
popularity of SLAB, so just remove it instead.

Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: linux-mm <linux-mm@kvack.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: rientjes@google.com
Cc: sean.j.christopherson@intel.com
Link: https://lkml.kernel.org/r/20190416142258.18694-1-cai@lca.pw
---
 mm/slab.c | 48 ++++++------------------------------------------
 1 file changed, 6 insertions(+), 42 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 47a380a486eef..e79ef28396e2a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1467,53 +1467,17 @@ static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
-			    unsigned long caller)
-{
-	int size = cachep->object_size;
-
-	addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
-
-	if (size < 5 * sizeof(unsigned long))
-		return;
-
-	*addr++ = 0x12345678;
-	*addr++ = caller;
-	*addr++ = smp_processor_id();
-	size -= 3 * sizeof(unsigned long);
-	{
-		unsigned long *sptr = &caller;
-		unsigned long svalue;
-
-		while (!kstack_end(sptr)) {
-			svalue = *sptr++;
-			if (kernel_text_address(svalue)) {
-				*addr++ = svalue;
-				size -= sizeof(unsigned long);
-				if (size <= sizeof(unsigned long))
-					break;
-			}
-		}
-
-	}
-	*addr++ = 0x87654321;
-}
-
-static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
-				int map, unsigned long caller)
+static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map)
 {
 	if (!is_debug_pagealloc_cache(cachep))
 		return;
 
-	if (caller)
-		store_stackinfo(cachep, objp, caller);
-
 	kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
 }
 
 #else
 static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
-				int map, unsigned long caller) {}
+				int map) {}
 
 #endif
 
@@ -1661,7 +1625,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
 
 		if (cachep->flags & SLAB_POISON) {
 			check_poison_obj(cachep, objp);
-			slab_kernel_map(cachep, objp, 1, 0);
+			slab_kernel_map(cachep, objp, 1);
 		}
 		if (cachep->flags & SLAB_RED_ZONE) {
 			if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
@@ -2434,7 +2398,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
 		/* need to poison the objs? */
 		if (cachep->flags & SLAB_POISON) {
 			poison_obj(cachep, objp, POISON_FREE);
-			slab_kernel_map(cachep, objp, 0, 0);
+			slab_kernel_map(cachep, objp, 0);
 		}
 	}
 #endif
@@ -2813,7 +2777,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 
 	if (cachep->flags & SLAB_POISON) {
 		poison_obj(cachep, objp, POISON_FREE);
-		slab_kernel_map(cachep, objp, 0, caller);
+		slab_kernel_map(cachep, objp, 0);
 	}
 	return objp;
 }
@@ -3077,7 +3041,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 		return objp;
 	if (cachep->flags & SLAB_POISON) {
 		check_poison_obj(cachep, objp);
-		slab_kernel_map(cachep, objp, 1, 0);
+		slab_kernel_map(cachep, objp, 1);
 		poison_obj(cachep, objp, POISON_INUSE);
 	}
 	if (cachep->flags & SLAB_STORE_USER)
-- 
GitLab


From 660cf4ce9d0f3497cc7456eaa6d74c8b71d6282c Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 15 Apr 2019 12:43:01 +0100
Subject: [PATCH 1242/1861] staging: comedi: ni_usb6501: Fix use of
 uninitialized mutex

If `ni6501_auto_attach()` returns an error, the core comedi module code
will call `ni6501_detach()` to clean up.  If `ni6501_auto_attach()`
successfully allocated the comedi device private data, `ni6501_detach()`
assumes that a `struct mutex mut` contained in the private data has been
initialized and uses it.  Unfortunately, there are a couple of places
where `ni6501_auto_attach()` can return an error after allocating the
device private data but before initializing the mutex, so this
assumption is invalid.  Fix it by initializing the mutex just after
allocating the private data in `ni6501_auto_attach()` before any other
errors can be retturned.  Also move the call to `usb_set_intfdata()`
just to keep the code a bit neater (either position for the call is
fine).

I believe this was the cause of the following syzbot crash report
<https://syzkaller.appspot.com/bug?extid=cf4f2b6c24aff0a3edf6>:

usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
usb 1-1: config 0 descriptor??
usb 1-1: string descriptor 0 read error: -71
comedi comedi0: Wrong number of endpoints
ni6501 1-1:0.233: driver 'ni6501' failed to auto-configure device.
INFO: trying to register non-static key.
the code is fine but needs lockdep annotation.
turning off the locking correctness validator.
CPU: 0 PID: 585 Comm: kworker/0:3 Not tainted 5.1.0-rc4-319354-g9a33b36 #3
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: usb_hub_wq hub_event
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xe8/0x16e lib/dump_stack.c:113
 assign_lock_key kernel/locking/lockdep.c:786 [inline]
 register_lock_class+0x11b8/0x1250 kernel/locking/lockdep.c:1095
 __lock_acquire+0xfb/0x37c0 kernel/locking/lockdep.c:3582
 lock_acquire+0x10d/0x2f0 kernel/locking/lockdep.c:4211
 __mutex_lock_common kernel/locking/mutex.c:925 [inline]
 __mutex_lock+0xfe/0x12b0 kernel/locking/mutex.c:1072
 ni6501_detach+0x5b/0x110 drivers/staging/comedi/drivers/ni_usb6501.c:567
 comedi_device_detach+0xed/0x800 drivers/staging/comedi/drivers.c:204
 comedi_device_cleanup.part.0+0x68/0x140 drivers/staging/comedi/comedi_fops.c:156
 comedi_device_cleanup drivers/staging/comedi/comedi_fops.c:187 [inline]
 comedi_free_board_dev.part.0+0x16/0x90 drivers/staging/comedi/comedi_fops.c:190
 comedi_free_board_dev drivers/staging/comedi/comedi_fops.c:189 [inline]
 comedi_release_hardware_device+0x111/0x140 drivers/staging/comedi/comedi_fops.c:2880
 comedi_auto_config.cold+0x124/0x1b0 drivers/staging/comedi/drivers.c:1068
 usb_probe_interface+0x31d/0x820 drivers/usb/core/driver.c:361
 really_probe+0x2da/0xb10 drivers/base/dd.c:509
 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671
 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778
 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454
 __device_attach+0x223/0x3a0 drivers/base/dd.c:844
 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514
 device_add+0xad2/0x16e0 drivers/base/core.c:2106
 usb_set_configuration+0xdf7/0x1740 drivers/usb/core/message.c:2021
 generic_probe+0xa2/0xda drivers/usb/core/generic.c:210
 usb_probe_device+0xc0/0x150 drivers/usb/core/driver.c:266
 really_probe+0x2da/0xb10 drivers/base/dd.c:509
 driver_probe_device+0x21d/0x350 drivers/base/dd.c:671
 __device_attach_driver+0x1d8/0x290 drivers/base/dd.c:778
 bus_for_each_drv+0x163/0x1e0 drivers/base/bus.c:454
 __device_attach+0x223/0x3a0 drivers/base/dd.c:844
 bus_probe_device+0x1f1/0x2a0 drivers/base/bus.c:514
 device_add+0xad2/0x16e0 drivers/base/core.c:2106
 usb_new_device.cold+0x537/0xccf drivers/usb/core/hub.c:2534
 hub_port_connect drivers/usb/core/hub.c:5089 [inline]
 hub_port_connect_change drivers/usb/core/hub.c:5204 [inline]
 port_event drivers/usb/core/hub.c:5350 [inline]
 hub_event+0x138e/0x3b00 drivers/usb/core/hub.c:5432
 process_one_work+0x90f/0x1580 kernel/workqueue.c:2269
 worker_thread+0x9b/0xe20 kernel/workqueue.c:2415
 kthread+0x313/0x420 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

Reported-by: syzbot+cf4f2b6c24aff0a3edf6@syzkaller.appspotmail.com
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/ni_usb6501.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c
index 808ed92ed66fe..ed5e426558216 100644
--- a/drivers/staging/comedi/drivers/ni_usb6501.c
+++ b/drivers/staging/comedi/drivers/ni_usb6501.c
@@ -518,6 +518,9 @@ static int ni6501_auto_attach(struct comedi_device *dev,
 	if (!devpriv)
 		return -ENOMEM;
 
+	mutex_init(&devpriv->mut);
+	usb_set_intfdata(intf, devpriv);
+
 	ret = ni6501_find_endpoints(dev);
 	if (ret)
 		return ret;
@@ -526,9 +529,6 @@ static int ni6501_auto_attach(struct comedi_device *dev,
 	if (ret)
 		return ret;
 
-	mutex_init(&devpriv->mut);
-	usb_set_intfdata(intf, devpriv);
-
 	ret = comedi_alloc_subdevices(dev, 2);
 	if (ret)
 		return ret;
-- 
GitLab


From af4b54a2e5ba18259ff9aac445bf546dd60d037e Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 15 Apr 2019 12:43:02 +0100
Subject: [PATCH 1243/1861] staging: comedi: ni_usb6501: Fix possible
 double-free of ->usb_rx_buf

`ni6501_alloc_usb_buffers()` is called from `ni6501_auto_attach()` to
allocate RX and TX buffers for USB transfers.  It allocates
`devpriv->usb_rx_buf` followed by `devpriv->usb_tx_buf`.  If the
allocation of `devpriv->usb_tx_buf` fails, it frees
`devpriv->usb_rx_buf`, leaving the pointer set dangling, and returns an
error.  Later, `ni6501_detach()` will be called from the core comedi
module code to clean up.  `ni6501_detach()` also frees both
`devpriv->usb_rx_buf` and `devpriv->usb_tx_buf`, but
`devpriv->usb_rx_buf` may have already beed freed, leading to a
double-free error.  Fix it bu removing the call to
`kfree(devpriv->usb_rx_buf)` from `ni6501_alloc_usb_buffers()`, relying
on `ni6501_detach()` to free the memory.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/ni_usb6501.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c
index ed5e426558216..1bb1cb6513491 100644
--- a/drivers/staging/comedi/drivers/ni_usb6501.c
+++ b/drivers/staging/comedi/drivers/ni_usb6501.c
@@ -463,10 +463,8 @@ static int ni6501_alloc_usb_buffers(struct comedi_device *dev)
 
 	size = usb_endpoint_maxp(devpriv->ep_tx);
 	devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
-	if (!devpriv->usb_tx_buf) {
-		kfree(devpriv->usb_rx_buf);
+	if (!devpriv->usb_tx_buf)
 		return -ENOMEM;
-	}
 
 	return 0;
 }
-- 
GitLab


From 7dbcf2b0b770eeb803a416ee8dcbef78e6389d40 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:38 +0200
Subject: [PATCH 1244/1861] x86/irq/64: Limit IST stack overflow check to #DB
 stack

Commit

  37fe6a42b343 ("x86: Check stack overflow in detail")

added a broad check for the full exception stack area, i.e. it considers
the full exception stack area as valid.

That's wrong in two aspects:

 1) It does not check the individual areas one by one

 2) #DF, NMI and #MCE are not enabling interrupts which means that a
    regular device interrupt cannot happen in their context. In fact if a
    device interrupt hits one of those IST stacks that's a bug because some
    code path enabled interrupts while handling the exception.

Limit the check to the #DB stack and consider all other IST stacks as
'overflow' or invalid.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Mitsuo Hayasaka <mitsuo.hayasaka.hu@hitachi.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160143.682135110@linutronix.de
---
 arch/x86/kernel/irq_64.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 0469cd078db15..b50ac9c7397bb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,9 +26,18 @@ int sysctl_panic_on_stackoverflow;
 /*
  * Probabilistic stack overflow check:
  *
- * Only check the stack in process context, because everything else
- * runs on the big interrupt stacks. Checking reliably is too expensive,
- * so we just check from interrupts.
+ * Regular device interrupts can enter on the following stacks:
+ *
+ * - User stack
+ *
+ * - Kernel task stack
+ *
+ * - Interrupt stack if a device driver reenables interrupts
+ *   which should only happen in really old drivers.
+ *
+ * - Debug IST stack
+ *
+ * All other contexts are invalid.
  */
 static inline void stack_overflow_check(struct pt_regs *regs)
 {
@@ -53,8 +62,8 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 		return;
 
 	oist = this_cpu_ptr(&orig_ist);
-	estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
-	estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
+	estack_bottom = (u64)oist->ist[DEBUG_STACK];
+	estack_top = estack_bottom - DEBUG_STKSZ + STACK_TOP_MARGIN;
 	if (regs->sp >= estack_top && regs->sp <= estack_bottom)
 		return;
 
-- 
GitLab


From fa33215422fd415a07ec2a00e9f1acdaf0fa8e94 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 14 Apr 2019 17:59:39 +0200
Subject: [PATCH 1245/1861] x86/dumpstack: Fix off-by-one errors in stack
 identification

The get_stack_info() function is off-by-one when checking whether an
address is on a IRQ stack or a IST stack. This prevents an overflowed
IRQ or IST stack from being dumped properly.

[ tglx: Do the same for 32-bit ]

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160143.785651055@linutronix.de
---
 arch/x86/kernel/dumpstack_32.c | 4 ++--
 arch/x86/kernel/dumpstack_64.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index cd53f3030e400..d305440ebe9c2 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -41,7 +41,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
 	 * This is a software stack, so 'end' can be a valid stack pointer.
 	 * It just means the stack is empty.
 	 */
-	if (stack <= begin || stack > end)
+	if (stack < begin || stack > end)
 		return false;
 
 	info->type	= STACK_TYPE_IRQ;
@@ -66,7 +66,7 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
 	 * This is a software stack, so 'end' can be a valid stack pointer.
 	 * It just means the stack is empty.
 	 */
-	if (stack <= begin || stack > end)
+	if (stack < begin || stack > end)
 		return false;
 
 	info->type	= STACK_TYPE_SOFTIRQ;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5cdb9e84da57d..90f0fa88cbb39 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -65,7 +65,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 		begin = end - (exception_stack_sizes[k] / sizeof(long));
 		regs  = (struct pt_regs *)end - 1;
 
-		if (stack <= begin || stack >= end)
+		if (stack < begin || stack >= end)
 			continue;
 
 		info->type	= STACK_TYPE_EXCEPTION + k;
@@ -88,7 +88,7 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
 	 * This is a software stack, so 'end' can be a valid stack pointer.
 	 * It just means the stack is empty.
 	 */
-	if (stack <= begin || stack > end)
+	if (stack < begin || stack >= end)
 		return false;
 
 	info->type	= STACK_TYPE_IRQ;
-- 
GitLab


From 4f44b8f0b33b7111216f0fad353315f796b81617 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 14 Apr 2019 17:59:40 +0200
Subject: [PATCH 1246/1861] x86/irq/64: Remove a hardcoded irq_stack_union
 access

stack_overflow_check() is using both irq_stack_ptr and irq_stack_union
to find the IRQ stack. That's going to break when vmapped irq stacks are
introduced.

Change it to just use irq_stack_ptr.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160143.872549191@linutronix.de
---
 arch/x86/kernel/irq_64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index b50ac9c7397bb..f6dcc8fea5c00 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -55,9 +55,8 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 	    regs->sp <= curbase + THREAD_SIZE)
 		return;
 
-	irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
-			STACK_TOP_MARGIN;
 	irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
+	irq_stack_top = irq_stack_bottom - IRQ_STACK_SIZE + STACK_TOP_MARGIN;
 	if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
 		return;
 
-- 
GitLab


From df835e7083bee33e98635aca26b39b63ebc6cca7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:41 +0200
Subject: [PATCH 1247/1861] x86/irq/64: Sanitize the top/bottom confusion

On x86, stacks go top to bottom, but the stack overflow check uses it
the other way round, which is just confusing. Clean it up and sanitize
the warning string a bit.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160143.961241397@linutronix.de
---
 arch/x86/kernel/irq_64.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f6dcc8fea5c00..cf200466d5c8a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -42,7 +42,7 @@ int sysctl_panic_on_stackoverflow;
 static inline void stack_overflow_check(struct pt_regs *regs)
 {
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
-#define STACK_TOP_MARGIN	128
+#define STACK_MARGIN	128
 	struct orig_ist *oist;
 	u64 irq_stack_top, irq_stack_bottom;
 	u64 estack_top, estack_bottom;
@@ -51,25 +51,25 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 	if (user_mode(regs))
 		return;
 
-	if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
+	if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_MARGIN &&
 	    regs->sp <= curbase + THREAD_SIZE)
 		return;
 
-	irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
-	irq_stack_top = irq_stack_bottom - IRQ_STACK_SIZE + STACK_TOP_MARGIN;
-	if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
+	irq_stack_top = (u64)__this_cpu_read(irq_stack_ptr);
+	irq_stack_bottom = irq_stack_top - IRQ_STACK_SIZE + STACK_MARGIN;
+	if (regs->sp >= irq_stack_bottom && regs->sp <= irq_stack_top)
 		return;
 
 	oist = this_cpu_ptr(&orig_ist);
-	estack_bottom = (u64)oist->ist[DEBUG_STACK];
-	estack_top = estack_bottom - DEBUG_STKSZ + STACK_TOP_MARGIN;
-	if (regs->sp >= estack_top && regs->sp <= estack_bottom)
+	estack_top = (u64)oist->ist[DEBUG_STACK];
+	estack_bottom = estack_top - DEBUG_STKSZ + STACK_MARGIN;
+	if (regs->sp >= estack_bottom && regs->sp <= estack_top)
 		return;
 
-	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
+	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx, irq stack:%Lx-%Lx, exception stack: %Lx-%Lx, ip:%pF)\n",
 		current->comm, curbase, regs->sp,
-		irq_stack_top, irq_stack_bottom,
-		estack_top, estack_bottom, (void *)regs->ip);
+		irq_stack_bottom, irq_stack_top,
+		estack_bottom, estack_top, (void *)regs->ip);
 
 	if (sysctl_panic_on_stackoverflow)
 		panic("low stack detected by irq handler - check messages\n");
-- 
GitLab


From 99d334511b337884cadbdfae28da912a4edb1001 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:42 +0200
Subject: [PATCH 1248/1861] x86/idt: Remove unused macro SISTG

Commit

  d8ba61ba58c8 ("x86/entry/64: Don't use IST entry for #BP stack")

removed the last user but left the macro around.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.050689789@linutronix.de
---
 arch/x86/kernel/idt.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 01adea278a710..2877606e97de0 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -45,10 +45,6 @@ struct idt_data {
 #define ISTG(_vector, _addr, _ist)			\
 	G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
 
-/* System interrupt gate with interrupt stack */
-#define SISTG(_vector, _addr, _ist)			\
-	G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
-
 /* Task gate */
 #define TSKG(_vector, _gdt)				\
 	G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3)
-- 
GitLab


From 6f36bd8d2e8c221eaaf4ce5b0ebbb11c00b0ac98 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:43 +0200
Subject: [PATCH 1249/1861] x86/64: Remove stale CURRENT_MASK

Nothing uses that and before people get the wrong ideas, get rid of it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.139284839@linutronix.de
---
 arch/x86/include/asm/page_64_types.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8f657286d599a..bcd8c05186049 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -14,7 +14,6 @@
 
 #define THREAD_SIZE_ORDER	(2 + KASAN_STACK_ORDER)
 #define THREAD_SIZE  (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define CURRENT_MASK (~(THREAD_SIZE - 1))
 
 #define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
 #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-- 
GitLab


From 30842211506e376b76394a9cb4e6d0c9d258b8d4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:44 +0200
Subject: [PATCH 1250/1861] x86/exceptions: Remove unused stack defines on
 32bit

Nothing requires those for 32bit builds.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.227822695@linutronix.de
---
 arch/x86/include/asm/page_32_types.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0d5c739eebd71..57b5dc15ca7e1 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -22,11 +22,7 @@
 #define THREAD_SIZE_ORDER	1
 #define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
 
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 0
-#define DEBUG_STACK 0
-#define MCE_STACK 0
-#define N_EXCEPTION_STACKS 1
+#define N_EXCEPTION_STACKS	1
 
 #ifdef CONFIG_X86_PAE
 /*
-- 
GitLab


From 8f34c5b5afce91d171bb0802631197484cb69b8b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:45 +0200
Subject: [PATCH 1251/1861] x86/exceptions: Make IST index zero based

The defines for the exception stack (IST) array in the TSS are using the
SDM convention IST1 - IST7. That causes all sorts of code to subtract 1 for
array indices related to IST. That's confusing at best and does not provide
any value.

Make the indices zero based and fixup the usage sites. The only code which
needs to adjust the 0 based index is the interrupt descriptor setup which
needs to add 1 now.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: linux-doc@vger.kernel.org
Cc: Nicolai Stange <nstange@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qian Cai <cai@lca.pw>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.331772825@linutronix.de
---
 Documentation/x86/kernel-stacks      |  8 ++++----
 arch/x86/entry/entry_64.S            |  4 ++--
 arch/x86/include/asm/page_64_types.h | 13 ++++++++-----
 arch/x86/kernel/cpu/common.c         |  4 ++--
 arch/x86/kernel/dumpstack_64.c       | 14 +++++++-------
 arch/x86/kernel/idt.c                | 15 +++++++++------
 arch/x86/kernel/irq_64.c             |  2 +-
 arch/x86/mm/fault.c                  |  2 +-
 8 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks
index 9a0aa4d3a8669..1b04596caea98 100644
--- a/Documentation/x86/kernel-stacks
+++ b/Documentation/x86/kernel-stacks
@@ -59,7 +59,7 @@ If that assumption is ever broken then the stacks will become corrupt.
 
 The currently assigned IST stacks are :-
 
-* DOUBLEFAULT_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
+* ESTACK_DF.  EXCEPTION_STKSZ (PAGE_SIZE).
 
   Used for interrupt 8 - Double Fault Exception (#DF).
 
@@ -68,7 +68,7 @@ The currently assigned IST stacks are :-
   Using a separate stack allows the kernel to recover from it well enough
   in many cases to still output an oops.
 
-* NMI_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
+* ESTACK_NMI.  EXCEPTION_STKSZ (PAGE_SIZE).
 
   Used for non-maskable interrupts (NMI).
 
@@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
   middle of switching stacks.  Using IST for NMI events avoids making
   assumptions about the previous state of the kernel stack.
 
-* DEBUG_STACK.  DEBUG_STKSZ
+* ESTACK_DB.  DEBUG_STKSZ
 
   Used for hardware debug interrupts (interrupt 1) and for software
   debug interrupts (INT3).
@@ -86,7 +86,7 @@ The currently assigned IST stacks are :-
   avoids making assumptions about the previous state of the kernel
   stack.
 
-* MCE_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
+* ESTACK_MCE.  EXCEPTION_STKSZ (PAGE_SIZE).
 
   Used for interrupt 18 - Machine Check Exception (#MC).
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b6294..fd0a50452cb35 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -841,7 +841,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 /*
  * Exception entry points.
  */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
 
 /**
  * idtentry - Generate an IDT entry stub
@@ -1129,7 +1129,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
 	hv_stimer0_callback_vector hv_stimer0_vector_handler
 #endif /* CONFIG_HYPERV */
 
-idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
+idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=ESTACK_DB
 idtentry int3			do_int3			has_error_code=0
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index bcd8c05186049..6ab2c54c1bf97 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -24,11 +24,14 @@
 #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
 
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 2
-#define DEBUG_STACK 3
-#define MCE_STACK 4
-#define N_EXCEPTION_STACKS 4  /* hw limit: 7 */
+/*
+ * The index for the tss.ist[] array. The hardware limit is 7 entries.
+ */
+#define	ESTACK_DF		0
+#define	ESTACK_NMI		1
+#define	ESTACK_DB		2
+#define	ESTACK_MCE		3
+#define	N_EXCEPTION_STACKS	4
 
 /*
  * Set __PAGE_OFFSET to the most negative possible address +
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659a..0e4cb718fc4a4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -516,7 +516,7 @@ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
  */
 static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
 	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
-	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
+	  [ESTACK_DB]				= DEBUG_STKSZ
 };
 #endif
 
@@ -1760,7 +1760,7 @@ void cpu_init(void)
 			estacks += exception_stack_sizes[v];
 			oist->ist[v] = t->x86_tss.ist[v] =
 					(unsigned long)estacks;
-			if (v == DEBUG_STACK-1)
+			if (v == ESTACK_DB)
 				per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
 		}
 	}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 90f0fa88cbb39..455b47ef92509 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -18,16 +18,16 @@
 
 #include <asm/stacktrace.h>
 
-static char *exception_stack_names[N_EXCEPTION_STACKS] = {
-		[ DOUBLEFAULT_STACK-1	]	= "#DF",
-		[ NMI_STACK-1		]	= "NMI",
-		[ DEBUG_STACK-1		]	= "#DB",
-		[ MCE_STACK-1		]	= "#MC",
+static const char *exception_stack_names[N_EXCEPTION_STACKS] = {
+		[ ESTACK_DF	]	= "#DF",
+		[ ESTACK_NMI	]	= "NMI",
+		[ ESTACK_DB	]	= "#DB",
+		[ ESTACK_MCE	]	= "#MC",
 };
 
-static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
+static const unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
 	[0 ... N_EXCEPTION_STACKS - 1]		= EXCEPTION_STKSZ,
-	[DEBUG_STACK - 1]			= DEBUG_STKSZ
+	[ESTACK_DB]				= DEBUG_STKSZ
 };
 
 const char *stack_type_name(enum stack_type type)
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 2877606e97de0..2188f734ec613 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -41,9 +41,12 @@ struct idt_data {
 #define SYSG(_vector, _addr)				\
 	G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
 
-/* Interrupt gate with interrupt stack */
+/*
+ * Interrupt gate with interrupt stack. The _ist index is the index in
+ * the tss.ist[] array, but for the descriptor it needs to start at 1.
+ */
 #define ISTG(_vector, _addr, _ist)			\
-	G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
+	G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS)
 
 /* Task gate */
 #define TSKG(_vector, _gdt)				\
@@ -180,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
  * cpu_init() when the TSS has been initialized.
  */
 static const __initconst struct idt_data ist_idts[] = {
-	ISTG(X86_TRAP_DB,	debug,		DEBUG_STACK),
-	ISTG(X86_TRAP_NMI,	nmi,		NMI_STACK),
-	ISTG(X86_TRAP_DF,	double_fault,	DOUBLEFAULT_STACK),
+	ISTG(X86_TRAP_DB,	debug,		ESTACK_DB),
+	ISTG(X86_TRAP_NMI,	nmi,		ESTACK_NMI),
+	ISTG(X86_TRAP_DF,	double_fault,	ESTACK_DF),
 #ifdef CONFIG_X86_MCE
-	ISTG(X86_TRAP_MC,	&machine_check,	MCE_STACK),
+	ISTG(X86_TRAP_MC,	&machine_check,	ESTACK_MCE),
 #endif
 };
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index cf200466d5c8a..182e8b245e063 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -61,7 +61,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 		return;
 
 	oist = this_cpu_ptr(&orig_ist);
-	estack_top = (u64)oist->ist[DEBUG_STACK];
+	estack_top = (u64)oist->ist[ESTACK_DB];
 	estack_bottom = estack_top - DEBUG_STKSZ + STACK_MARGIN;
 	if (regs->sp >= estack_bottom && regs->sp <= estack_top)
 		return;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 667f1da36208e..0524e1d74f245 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -793,7 +793,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	if (is_vmalloc_addr((void *)address) &&
 	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
 	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
-		unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
+		unsigned long stack = this_cpu_read(orig_ist.ist[ESTACK_DF]) - sizeof(void *);
 		/*
 		 * We're likely to be running with very little stack space
 		 * left.  It's plausible that we'd hit this condition but
-- 
GitLab


From 881a463cf21dbf83aab2cf6c9a359f34f88c2491 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:46 +0200
Subject: [PATCH 1252/1861] x86/cpu_entry_area: Cleanup setup functions

No point in retrieving the entry area pointer over and over. Do it once
and use unsigned int for 'cpu' everywhere.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.419653165@linutronix.de
---
 arch/x86/mm/cpu_entry_area.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 19c6abf9ea317..c2a54f75d335f 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
 		cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
 }
 
-static void __init percpu_setup_debug_store(int cpu)
+static void __init percpu_setup_debug_store(unsigned int cpu)
 {
 #ifdef CONFIG_CPU_SUP_INTEL
-	int npages;
+	unsigned int npages;
 	void *cea;
 
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
@@ -79,8 +79,9 @@ static void __init percpu_setup_debug_store(int cpu)
 }
 
 /* Setup the fixmap mappings only once per-processor */
-static void __init setup_cpu_entry_area(int cpu)
+static void __init setup_cpu_entry_area(unsigned int cpu)
 {
+	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
 #ifdef CONFIG_X86_64
 	/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
 	pgprot_t gdt_prot = PAGE_KERNEL_RO;
@@ -101,10 +102,9 @@ static void __init setup_cpu_entry_area(int cpu)
 	pgprot_t tss_prot = PAGE_KERNEL;
 #endif
 
-	cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
-		    gdt_prot);
+	cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
 
-	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+	cea_map_percpu_pages(&cea->entry_stack_page,
 			     per_cpu_ptr(&entry_stack_storage, cpu), 1,
 			     PAGE_KERNEL);
 
@@ -128,19 +128,18 @@ static void __init setup_cpu_entry_area(int cpu)
 	BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
 		      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
 	BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
-	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
-			     &per_cpu(cpu_tss_rw, cpu),
+	cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
 			     sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
 
 #ifdef CONFIG_X86_32
-	per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+	per_cpu(cpu_entry_area, cpu) = cea;
 #endif
 
 #ifdef CONFIG_X86_64
 	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
 	BUILD_BUG_ON(sizeof(exception_stacks) !=
 		     sizeof(((struct cpu_entry_area *)0)->exception_stacks));
-	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
+	cea_map_percpu_pages(&cea->exception_stacks,
 			     &per_cpu(exception_stacks, cpu),
 			     sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
 #endif
-- 
GitLab


From 019b17b3ffe48100e52f609ca1c6ed6e5a40cba1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:47 +0200
Subject: [PATCH 1253/1861] x86/exceptions: Add structs for exception stacks

At the moment everything assumes a full linear mapping of the various
exception stacks. Adding guard pages to the cpu entry area mapping of the
exception stacks will break that assumption.

As a preparatory step convert both the real storage and the effective
mapping in the cpu entry area from character arrays to structures.

To ensure that both arrays have the same ordering and the same size of the
individual stacks fill the members with a macro. The guard size is the only
difference between the two resulting structures. For now both have guard
size 0 until the preparation of all usage sites is done.

Provide a couple of helper macros which are used in the following
conversions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.506807893@linutronix.de
---
 arch/x86/include/asm/cpu_entry_area.h | 52 ++++++++++++++++++++++++---
 arch/x86/kernel/cpu/common.c          |  2 +-
 arch/x86/mm/cpu_entry_area.c          |  8 ++---
 3 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 29c7064154435..af8c312673ded 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -7,6 +7,51 @@
 #include <asm/processor.h>
 #include <asm/intel_ds.h>
 
+#ifdef CONFIG_X86_64
+
+/* Macro to enforce the same ordering and stack sizes */
+#define ESTACKS_MEMBERS(guardsize)		\
+	char	DF_stack_guard[guardsize];	\
+	char	DF_stack[EXCEPTION_STKSZ];	\
+	char	NMI_stack_guard[guardsize];	\
+	char	NMI_stack[EXCEPTION_STKSZ];	\
+	char	DB_stack_guard[guardsize];	\
+	char	DB_stack[DEBUG_STKSZ];		\
+	char	MCE_stack_guard[guardsize];	\
+	char	MCE_stack[EXCEPTION_STKSZ];	\
+	char	IST_top_guard[guardsize];	\
+
+/* The exception stacks' physical storage. No guard pages required */
+struct exception_stacks {
+	ESTACKS_MEMBERS(0)
+};
+
+/*
+ * The effective cpu entry area mapping with guard pages. Guard size is
+ * zero until the code which makes assumptions about linear mappings is
+ * cleaned up.
+ */
+struct cea_exception_stacks {
+	ESTACKS_MEMBERS(0)
+};
+
+#define CEA_ESTACK_SIZE(st)					\
+	sizeof(((struct cea_exception_stacks *)0)->st## _stack)
+
+#define CEA_ESTACK_BOT(ceastp, st)				\
+	((unsigned long)&(ceastp)->st## _stack)
+
+#define CEA_ESTACK_TOP(ceastp, st)				\
+	(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
+
+#define CEA_ESTACK_OFFS(st)					\
+	offsetof(struct cea_exception_stacks, st## _stack)
+
+#define CEA_ESTACK_PAGES					\
+	(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
+
+#endif
+
 /*
  * cpu_entry_area is a percpu region that contains things needed by the CPU
  * and early entry/exit code.  Real types aren't used for all fields here
@@ -32,12 +77,9 @@ struct cpu_entry_area {
 
 #ifdef CONFIG_X86_64
 	/*
-	 * Exception stacks used for IST entries.
-	 *
-	 * In the future, this should have a separate slot for each stack
-	 * with guard pages between them.
+	 * Exception stacks used for IST entries with guard pages.
 	 */
-	char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+	struct cea_exception_stacks estacks;
 #endif
 #ifdef CONFIG_CPU_SUP_INTEL
 	/*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0e4cb718fc4a4..24b801ea75221 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1754,7 +1754,7 @@ void cpu_init(void)
 	 * set up and load the per-CPU TSS
 	 */
 	if (!oist->ist[0]) {
-		char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
+		char *estacks = (char *)&get_cpu_entry_area(cpu)->estacks;
 
 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
 			estacks += exception_stack_sizes[v];
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index c2a54f75d335f..6a09b84c13fea 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -13,8 +13,7 @@
 static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
 
 #ifdef CONFIG_X86_64
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
 #endif
 
 struct cpu_entry_area *get_cpu_entry_area(int cpu)
@@ -138,9 +137,8 @@ static void __init setup_cpu_entry_area(unsigned int cpu)
 #ifdef CONFIG_X86_64
 	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
 	BUILD_BUG_ON(sizeof(exception_stacks) !=
-		     sizeof(((struct cpu_entry_area *)0)->exception_stacks));
-	cea_map_percpu_pages(&cea->exception_stacks,
-			     &per_cpu(exception_stacks, cpu),
+		     sizeof(((struct cpu_entry_area *)0)->estacks));
+	cea_map_percpu_pages(&cea->estacks, &per_cpu(exception_stacks, cpu),
 			     sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
 #endif
 	percpu_setup_debug_store(cpu);
-- 
GitLab


From a4af767ae59cc579569bbfe49513a0037d5989ee Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:48 +0200
Subject: [PATCH 1254/1861] x86/cpu_entry_area: Prepare for IST guard pages

To allow guard pages between the IST stacks each stack needs to be
mapped individually.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.592691557@linutronix.de
---
 arch/x86/mm/cpu_entry_area.c | 37 +++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 6a09b84c13fea..2b1407662a6d5 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -77,6 +77,34 @@ static void __init percpu_setup_debug_store(unsigned int cpu)
 #endif
 }
 
+#ifdef CONFIG_X86_64
+
+#define cea_map_stack(name) do {					\
+	npages = sizeof(estacks->name## _stack) / PAGE_SIZE;		\
+	cea_map_percpu_pages(cea->estacks.name## _stack,		\
+			estacks->name## _stack, npages, PAGE_KERNEL);	\
+	} while (0)
+
+static void __init percpu_setup_exception_stacks(unsigned int cpu)
+{
+	struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
+	struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
+	unsigned int npages;
+
+	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+	/*
+	 * The exceptions stack mappings in the per cpu area are protected
+	 * by guard pages so each stack must be mapped separately.
+	 */
+	cea_map_stack(DF);
+	cea_map_stack(NMI);
+	cea_map_stack(DB);
+	cea_map_stack(MCE);
+}
+#else
+static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
+#endif
+
 /* Setup the fixmap mappings only once per-processor */
 static void __init setup_cpu_entry_area(unsigned int cpu)
 {
@@ -134,13 +162,8 @@ static void __init setup_cpu_entry_area(unsigned int cpu)
 	per_cpu(cpu_entry_area, cpu) = cea;
 #endif
 
-#ifdef CONFIG_X86_64
-	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
-	BUILD_BUG_ON(sizeof(exception_stacks) !=
-		     sizeof(((struct cpu_entry_area *)0)->estacks));
-	cea_map_percpu_pages(&cea->estacks, &per_cpu(exception_stacks, cpu),
-			     sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
-#endif
+	percpu_setup_exception_stacks(cpu);
+
 	percpu_setup_debug_store(cpu);
 }
 
-- 
GitLab


From 7623f37e411156e6e09b95cf5c76e509c5fda640 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:49 +0200
Subject: [PATCH 1255/1861] x86/cpu_entry_area: Provide exception stack
 accessor

Store a pointer to the per cpu entry area exception stack mappings to allow
fast retrieval.

Required for converting various places from using the shadow IST array to
directly doing address calculations on the actual mapping address.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.680960459@linutronix.de
---
 arch/x86/include/asm/cpu_entry_area.h | 4 ++++
 arch/x86/mm/cpu_entry_area.c          | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index af8c312673ded..9b406f067ecf4 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -99,6 +99,7 @@ struct cpu_entry_area {
 #define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
 
 DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
 
 extern void setup_cpu_entry_areas(void);
 extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
@@ -118,4 +119,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
 	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
 }
 
+#define __this_cpu_ist_top_va(name)					\
+	CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
+
 #endif
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 2b1407662a6d5..a00d0d059c8ad 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -14,6 +14,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage)
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
+DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
 #endif
 
 struct cpu_entry_area *get_cpu_entry_area(int cpu)
@@ -92,6 +93,9 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
 	unsigned int npages;
 
 	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+
+	per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
+
 	/*
 	 * The exceptions stack mappings in the per cpu area are protected
 	 * by guard pages so each stack must be mapped separately.
-- 
GitLab


From d876b67343a648f3613506c7dbfed088fa0c875b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:50 +0200
Subject: [PATCH 1256/1861] x86/traps: Use cpu_entry_area instead of orig_ist

The orig_ist[] array is a shadow copy of the IST array in the TSS. The
reason why it exists is that older kernels used two TSS variants with
different pointers into the debug stack. orig_ist[] contains the real
starting points.

There is no point anymore to do so because the same information can be
retrieved using the base address of the cpu entry area mapping and the
offsets of the various exception stacks.

No functional change. Preparation for removing orig_ist.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.784487230@linutronix.de
---
 arch/x86/mm/fault.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 0524e1d74f245..06c089513d398 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -28,6 +28,7 @@
 #include <asm/mmu_context.h>		/* vma_pkey()			*/
 #include <asm/efi.h>			/* efi_recover_from_page_fault()*/
 #include <asm/desc.h>			/* store_idt(), ...		*/
+#include <asm/cpu_entry_area.h>		/* exception stack		*/
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -793,7 +794,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	if (is_vmalloc_addr((void *)address) &&
 	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
 	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
-		unsigned long stack = this_cpu_read(orig_ist.ist[ESTACK_DF]) - sizeof(void *);
+		unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
 		/*
 		 * We're likely to be running with very little stack space
 		 * left.  It's plausible that we'd hit this condition but
-- 
GitLab


From bf5882abab773afd1277415e2f826b21de28f30d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:51 +0200
Subject: [PATCH 1257/1861] x86/irq/64: Use cpu entry area instead of orig_ist

The orig_ist[] array is a shadow copy of the IST array in the TSS. The
reason why it exists is that older kernels used two TSS variants with
different pointers into the debug stack. orig_ist[] contains the real
starting points.

There is no point anymore to do so because the same information can be
retrieved using the base address of the cpu entry area mapping and the
offsets of the various exception stacks.

No functional change. Preparation for removing orig_ist.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.885741626@linutronix.de
---
 arch/x86/kernel/irq_64.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 182e8b245e063..7eb6f8d11bfd3 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,6 +18,8 @@
 #include <linux/uaccess.h>
 #include <linux/smp.h>
 #include <linux/sched/task_stack.h>
+
+#include <asm/cpu_entry_area.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
 
@@ -43,10 +45,9 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 {
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 #define STACK_MARGIN	128
-	struct orig_ist *oist;
-	u64 irq_stack_top, irq_stack_bottom;
-	u64 estack_top, estack_bottom;
+	u64 irq_stack_top, irq_stack_bottom, estack_top, estack_bottom;
 	u64 curbase = (u64)task_stack_page(current);
+	struct cea_exception_stacks *estacks;
 
 	if (user_mode(regs))
 		return;
@@ -60,9 +61,9 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 	if (regs->sp >= irq_stack_bottom && regs->sp <= irq_stack_top)
 		return;
 
-	oist = this_cpu_ptr(&orig_ist);
-	estack_top = (u64)oist->ist[ESTACK_DB];
-	estack_bottom = estack_top - DEBUG_STKSZ + STACK_MARGIN;
+	estacks = __this_cpu_read(cea_exception_stacks);
+	estack_top = CEA_ESTACK_TOP(estacks, DB);
+	estack_bottom = CEA_ESTACK_BOT(estacks, DB) + STACK_MARGIN;
 	if (regs->sp >= estack_bottom && regs->sp <= estack_top)
 		return;
 
-- 
GitLab


From afcd21dad88b68d646e91ed36948117d58b4c197 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:52 +0200
Subject: [PATCH 1258/1861] x86/dumpstack/64: Use cpu_entry_area instead of
 orig_ist

The orig_ist[] array is a shadow copy of the IST array in the TSS. The
reason why it exists is that older kernels used two TSS variants with
different pointers into the debug stack. orig_ist[] contains the real
starting points.

There is no point anymore to do so because the same information can be
retrieved using the base address of the cpu entry area mapping and the
offsets of the various exception stacks.

No functional change. Preparation for removing orig_ist.

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160144.974900463@linutronix.de
---
 arch/x86/kernel/dumpstack_64.c | 39 +++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 455b47ef92509..f6fbd0438f9e0 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,6 +16,7 @@
 #include <linux/bug.h>
 #include <linux/nmi.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 
 static const char *exception_stack_names[N_EXCEPTION_STACKS] = {
@@ -25,11 +26,6 @@ static const char *exception_stack_names[N_EXCEPTION_STACKS] = {
 		[ ESTACK_MCE	]	= "#MC",
 };
 
-static const unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
-	[0 ... N_EXCEPTION_STACKS - 1]		= EXCEPTION_STKSZ,
-	[ESTACK_DB]				= DEBUG_STKSZ
-};
-
 const char *stack_type_name(enum stack_type type)
 {
 	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
@@ -52,25 +48,44 @@ const char *stack_type_name(enum stack_type type)
 	return NULL;
 }
 
+struct estack_layout {
+	unsigned int	begin;
+	unsigned int	end;
+};
+
+#define	ESTACK_ENTRY(x)	{						  \
+	.begin	= offsetof(struct cea_exception_stacks, x## _stack),	  \
+	.end	= offsetof(struct cea_exception_stacks, x## _stack_guard) \
+	}
+
+static const struct estack_layout layout[N_EXCEPTION_STACKS] = {
+	[ ESTACK_DF	]	= ESTACK_ENTRY(DF),
+	[ ESTACK_NMI	]	= ESTACK_ENTRY(NMI),
+	[ ESTACK_DB	]	= ESTACK_ENTRY(DB),
+	[ ESTACK_MCE	]	= ESTACK_ENTRY(MCE),
+};
+
 static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long *begin, *end;
+	unsigned long estacks, begin, end, stk = (unsigned long)stack;
 	struct pt_regs *regs;
-	unsigned k;
+	unsigned int k;
 
 	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
 
+	estacks = (unsigned long)__this_cpu_read(cea_exception_stacks);
+
 	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
-		end   = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
-		begin = end - (exception_stack_sizes[k] / sizeof(long));
+		begin = estacks + layout[k].begin;
+		end   = estacks + layout[k].end;
 		regs  = (struct pt_regs *)end - 1;
 
-		if (stack < begin || stack >= end)
+		if (stk < begin || stk >= end)
 			continue;
 
 		info->type	= STACK_TYPE_EXCEPTION + k;
-		info->begin	= begin;
-		info->end	= end;
+		info->begin	= (unsigned long *)begin;
+		info->end	= (unsigned long *)end;
 		info->next_sp	= (unsigned long *)regs->sp;
 
 		return true;
-- 
GitLab


From eb9d7a62c38628ab0ba6e59d22d7cb7930e415d1 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 3 Apr 2019 15:12:32 +1100
Subject: [PATCH 1259/1861] powerpc/mm_iommu: Fix potential deadlock

Currently mm_iommu_do_alloc() is called in 2 cases:
- VFIO_IOMMU_SPAPR_REGISTER_MEMORY ioctl() for normal memory:
	this locks &mem_list_mutex and then locks mm::mmap_sem
	several times when adjusting locked_vm or pinning pages;
- vfio_pci_nvgpu_regops::mmap() for GPU memory:
	this is called with mm::mmap_sem held already and it locks
	&mem_list_mutex.

So one can craft a userspace program to do special ioctl and mmap in
2 threads concurrently and cause a deadlock which lockdep warns about
(below).

We did not hit this yet because QEMU constructs the machine in a single
thread.

This moves the overlap check next to where the new entry is added and
reduces the amount of time spent with &mem_list_mutex held.

This moves locked_vm adjustment from under &mem_list_mutex.

This relies on mm_iommu_adjust_locked_vm() doing nothing when entries==0.

This is one of the lockdep warnings:

======================================================
WARNING: possible circular locking dependency detected
5.1.0-rc2-le_nv2_aikATfstn1-p1 #363 Not tainted
------------------------------------------------------
qemu-system-ppc/8038 is trying to acquire lock:
000000002ec6c453 (mem_list_mutex){+.+.}, at: mm_iommu_do_alloc+0x70/0x490

but task is already holding lock:
00000000fd7da97f (&mm->mmap_sem){++++}, at: vm_mmap_pgoff+0xf0/0x160

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&mm->mmap_sem){++++}:
       lock_acquire+0xf8/0x260
       down_write+0x44/0xa0
       mm_iommu_adjust_locked_vm.part.1+0x4c/0x190
       mm_iommu_do_alloc+0x310/0x490
       tce_iommu_ioctl.part.9+0xb84/0x1150 [vfio_iommu_spapr_tce]
       vfio_fops_unl_ioctl+0x94/0x430 [vfio]
       do_vfs_ioctl+0xe4/0x930
       ksys_ioctl+0xc4/0x110
       sys_ioctl+0x28/0x80
       system_call+0x5c/0x70

-> #0 (mem_list_mutex){+.+.}:
       __lock_acquire+0x1484/0x1900
       lock_acquire+0xf8/0x260
       __mutex_lock+0x88/0xa70
       mm_iommu_do_alloc+0x70/0x490
       vfio_pci_nvgpu_mmap+0xc0/0x130 [vfio_pci]
       vfio_pci_mmap+0x198/0x2a0 [vfio_pci]
       vfio_device_fops_mmap+0x44/0x70 [vfio]
       mmap_region+0x5d4/0x770
       do_mmap+0x42c/0x650
       vm_mmap_pgoff+0x124/0x160
       ksys_mmap_pgoff+0xdc/0x2f0
       sys_mmap+0x40/0x80
       system_call+0x5c/0x70

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&mm->mmap_sem);
                               lock(mem_list_mutex);
                               lock(&mm->mmap_sem);
  lock(mem_list_mutex);

 *** DEADLOCK ***

1 lock held by qemu-system-ppc/8038:
 #0: 00000000fd7da97f (&mm->mmap_sem){++++}, at: vm_mmap_pgoff+0xf0/0x160

Fixes: c10c21efa4bc ("powerpc/vfio/iommu/kvm: Do not pin device memory", 2018-12-19)
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_iommu.c | 75 +++++++++++++++--------------
 1 file changed, 39 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index e7a9c4f6bfca4..9d9be850f8c20 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -95,28 +95,14 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 			      unsigned long entries, unsigned long dev_hpa,
 			      struct mm_iommu_table_group_mem_t **pmem)
 {
-	struct mm_iommu_table_group_mem_t *mem;
-	long i, ret, locked_entries = 0;
+	struct mm_iommu_table_group_mem_t *mem, *mem2;
+	long i, ret, locked_entries = 0, pinned = 0;
 	unsigned int pageshift;
 
-	mutex_lock(&mem_list_mutex);
-
-	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
-			next) {
-		/* Overlap? */
-		if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
-				(ua < (mem->ua +
-				       (mem->entries << PAGE_SHIFT)))) {
-			ret = -EINVAL;
-			goto unlock_exit;
-		}
-
-	}
-
 	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
 		ret = mm_iommu_adjust_locked_vm(mm, entries, true);
 		if (ret)
-			goto unlock_exit;
+			return ret;
 
 		locked_entries = entries;
 	}
@@ -150,15 +136,10 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 	down_read(&mm->mmap_sem);
 	ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
 	up_read(&mm->mmap_sem);
+	pinned = ret > 0 ? ret : 0;
 	if (ret != entries) {
-		/* free the reference taken */
-		for (i = 0; i < ret; i++)
-			put_page(mem->hpages[i]);
-
-		vfree(mem->hpas);
-		kfree(mem);
 		ret = -EFAULT;
-		goto unlock_exit;
+		goto free_exit;
 	}
 
 	pageshift = PAGE_SHIFT;
@@ -183,21 +164,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 	}
 
 good_exit:
-	ret = 0;
 	atomic64_set(&mem->mapped, 1);
 	mem->used = 1;
 	mem->ua = ua;
 	mem->entries = entries;
-	*pmem = mem;
 
-	list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+	mutex_lock(&mem_list_mutex);
 
-unlock_exit:
-	if (locked_entries && ret)
-		mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+	list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
+		/* Overlap? */
+		if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
+				(ua < (mem2->ua +
+				       (mem2->entries << PAGE_SHIFT)))) {
+			ret = -EINVAL;
+			mutex_unlock(&mem_list_mutex);
+			goto free_exit;
+		}
+	}
+
+	list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
 
 	mutex_unlock(&mem_list_mutex);
 
+	*pmem = mem;
+
+	return 0;
+
+free_exit:
+	/* free the reference taken */
+	for (i = 0; i < pinned; i++)
+		put_page(mem->hpages[i]);
+
+	vfree(mem->hpas);
+	kfree(mem);
+
+unlock_exit:
+	mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+
 	return ret;
 }
 
@@ -266,7 +269,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
 {
 	long ret = 0;
-	unsigned long entries, dev_hpa;
+	unsigned long unlock_entries = 0;
 
 	mutex_lock(&mem_list_mutex);
 
@@ -287,17 +290,17 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
 		goto unlock_exit;
 	}
 
+	if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+		unlock_entries = mem->entries;
+
 	/* @mapped became 0 so now mappings are disabled, release the region */
-	entries = mem->entries;
-	dev_hpa = mem->dev_hpa;
 	mm_iommu_release(mem);
 
-	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
-		mm_iommu_adjust_locked_vm(mm, entries, false);
-
 unlock_exit:
 	mutex_unlock(&mem_list_mutex);
 
+	mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mm_iommu_put);
-- 
GitLab


From 7a3a4d763837d3aa654cd1059030950410c04d77 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 3 Apr 2019 15:12:33 +1100
Subject: [PATCH 1260/1861] powerpc/mm_iommu: Allow pinning large regions

When called with vmas_arg==NULL, get_user_pages_longterm() allocates
an array of nr_pages*8 which can easily get greater that the max order,
for example, registering memory for a 256GB guest does this and fails
in __alloc_pages_nodemask().

This adds a loop over chunks of entries to fit the max order limit.

Fixes: 678e174c4c16 ("powerpc/mm/iommu: allow migration of cma allocated pages during mm_iommu_do_alloc", 2019-03-05)
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_iommu.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 9d9be850f8c20..8330f135294f4 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -98,6 +98,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 	struct mm_iommu_table_group_mem_t *mem, *mem2;
 	long i, ret, locked_entries = 0, pinned = 0;
 	unsigned int pageshift;
+	unsigned long entry, chunk;
 
 	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
 		ret = mm_iommu_adjust_locked_vm(mm, entries, true);
@@ -134,11 +135,26 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 	}
 
 	down_read(&mm->mmap_sem);
-	ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
+	chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
+			sizeof(struct vm_area_struct *);
+	chunk = min(chunk, entries);
+	for (entry = 0; entry < entries; entry += chunk) {
+		unsigned long n = min(entries - entry, chunk);
+
+		ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n,
+				FOLL_WRITE, mem->hpages + entry, NULL);
+		if (ret == n) {
+			pinned += n;
+			continue;
+		}
+		if (ret > 0)
+			pinned += ret;
+		break;
+	}
 	up_read(&mm->mmap_sem);
-	pinned = ret > 0 ? ret : 0;
-	if (ret != entries) {
-		ret = -EFAULT;
+	if (pinned != entries) {
+		if (!ret)
+			ret = -EFAULT;
 		goto free_exit;
 	}
 
-- 
GitLab


From f6ef73224a0f0400c3979c8bc68b383f9d2eb9d8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:53 +0200
Subject: [PATCH 1261/1861] x86/cpu: Prepare TSS.IST setup for guard pages

Convert the TSS.IST setup code to use the cpu entry area information
directly instead of assuming a linear mapping of the IST stacks.

The store to orig_ist[] is no longer required as there are no users
anymore.

This is the last preparatory step towards IST guard pages.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.061686012@linutronix.de
---
 arch/x86/kernel/cpu/common.c | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 24b801ea75221..4b01b71415f55 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -507,19 +507,6 @@ void load_percpu_segment(int cpu)
 DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 #endif
 
-#ifdef CONFIG_X86_64
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
-	  [ESTACK_DB]				= DEBUG_STKSZ
-};
-#endif
-
 /* Load the original GDT from the per-cpu structure */
 void load_direct_gdt(int cpu)
 {
@@ -1690,17 +1677,14 @@ static void setup_getcpu(int cpu)
  * initialized (naturally) in the bootstrap process, such as the GDT
  * and IDT. We reload them nevertheless, this function acts as a
  * 'CPU state barrier', nothing should get across.
- * A lot of state is already set up in PDA init for 64 bit
  */
 #ifdef CONFIG_X86_64
 
 void cpu_init(void)
 {
-	struct orig_ist *oist;
+	int cpu = raw_smp_processor_id();
 	struct task_struct *me;
 	struct tss_struct *t;
-	unsigned long v;
-	int cpu = raw_smp_processor_id();
 	int i;
 
 	wait_for_master_cpu(cpu);
@@ -1715,7 +1699,6 @@ void cpu_init(void)
 		load_ucode_ap();
 
 	t = &per_cpu(cpu_tss_rw, cpu);
-	oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
 	if (this_cpu_read(numa_node) == 0 &&
@@ -1753,16 +1736,12 @@ void cpu_init(void)
 	/*
 	 * set up and load the per-CPU TSS
 	 */
-	if (!oist->ist[0]) {
-		char *estacks = (char *)&get_cpu_entry_area(cpu)->estacks;
-
-		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
-			estacks += exception_stack_sizes[v];
-			oist->ist[v] = t->x86_tss.ist[v] =
-					(unsigned long)estacks;
-			if (v == ESTACK_DB)
-				per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
-		}
+	if (!t->x86_tss.ist[0]) {
+		t->x86_tss.ist[ESTACK_DF] = __this_cpu_ist_top_va(DF);
+		t->x86_tss.ist[ESTACK_NMI] = __this_cpu_ist_top_va(NMI);
+		t->x86_tss.ist[ESTACK_DB] = __this_cpu_ist_top_va(DB);
+		t->x86_tss.ist[ESTACK_MCE] = __this_cpu_ist_top_va(MCE);
+		per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[ESTACK_DB];
 	}
 
 	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-- 
GitLab


From 4d68c3d0ecd5fcba8876e8a58ac41ffb360de43e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:54 +0200
Subject: [PATCH 1262/1861] x86/cpu: Remove orig_ist array

All users gone.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.151435667@linutronix.de
---
 arch/x86/include/asm/processor.h | 9 ---------
 arch/x86/kernel/cpu/common.c     | 6 ------
 2 files changed, 15 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2bb3a648fc12c..8fcfcd1a8375a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -374,16 +374,7 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 #define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
 #endif
 
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
-	unsigned long		ist[7];
-};
-
 #ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
 union irq_stack_union {
 	char irq_stack[IRQ_STACK_SIZE];
 	/*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b01b71415f55..8243f198fb7fc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1549,12 +1549,6 @@ void syscall_init(void)
 	       X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
 }
 
-/*
- * Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
 static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
 DEFINE_PER_CPU(int, debug_stack_usage);
 
-- 
GitLab


From b2ecf00631362a83744e5ec249947620db5e240c Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Thu, 4 Apr 2019 20:53:28 -0400
Subject: [PATCH 1263/1861] vt: fix cursor when clearing the screen

The patch a6dbe4427559 ("vt: perform safe console erase in the right
order") introduced a bug. The conditional do_update_region() was
replaced by a call to update_region() that does contain the conditional
already, but with unwanted extra side effects such as restoring the cursor
drawing.

In order to reproduce the bug:
- use framebuffer console with the AMDGPU driver
- type "links" to start the console www browser
- press 'q' and space to exit links

Now the cursor will be permanently visible in the center of the
screen. It will stay there until something overwrites it.

The bug goes away if we change update_region() back to the conditional
do_update_region().

[ nico: reworded changelog ]

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Cc: stable@vger.kernel.org
Fixes: a6dbe4427559 ("vt: perform safe console erase in the right order")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index d34984aa646dc..650c66886c80f 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1520,7 +1520,8 @@ static void csi_J(struct vc_data *vc, int vpar)
 			return;
 	}
 	scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
-	update_region(vc, (unsigned long) start, count);
+	if (con_should_update(vc))
+		do_update_region(vc, (unsigned long) start, count);
 	vc->vc_need_wrap = 0;
 }
 
-- 
GitLab


From 3207426925d2b4da390be8068df1d1c2b36e5918 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:55 +0200
Subject: [PATCH 1264/1861] x86/exceptions: Disconnect IST index and stack
 order

The entry order of the TSS.IST array and the order of the stack
storage/mapping are not required to be the same.

With the upcoming split of the debug stack this is going to fall apart as
the number of TSS.IST array entries stays the same while the actual stacks
are increasing.

Make them separate so that code like dumpstack can just utilize the mapping
order. The IST index is solely required for the actual TSS.IST array
initialization.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qian Cai <cai@lca.pw>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.241588113@linutronix.de
---
 arch/x86/entry/entry_64.S             |  2 +-
 arch/x86/include/asm/cpu_entry_area.h | 11 +++++++++++
 arch/x86/include/asm/page_64_types.h  |  9 ++++-----
 arch/x86/include/asm/stacktrace.h     |  2 ++
 arch/x86/kernel/cpu/common.c          | 10 +++++-----
 arch/x86/kernel/idt.c                 |  8 ++++----
 6 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index fd0a50452cb35..5c0348504a4bc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1129,7 +1129,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
 	hv_stimer0_callback_vector hv_stimer0_vector_handler
 #endif /* CONFIG_HYPERV */
 
-idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=ESTACK_DB
+idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=IST_INDEX_DB
 idtentry int3			do_int3			has_error_code=0
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 9b406f067ecf4..310eeb62d4184 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -35,6 +35,17 @@ struct cea_exception_stacks {
 	ESTACKS_MEMBERS(0)
 };
 
+/*
+ * The exception stack ordering in [cea_]exception_stacks
+ */
+enum exception_stack_ordering {
+	ESTACK_DF,
+	ESTACK_NMI,
+	ESTACK_DB,
+	ESTACK_MCE,
+	N_EXCEPTION_STACKS
+};
+
 #define CEA_ESTACK_SIZE(st)					\
 	sizeof(((struct cea_exception_stacks *)0)->st## _stack)
 
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 6ab2c54c1bf97..056de887b2208 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -27,11 +27,10 @@
 /*
  * The index for the tss.ist[] array. The hardware limit is 7 entries.
  */
-#define	ESTACK_DF		0
-#define	ESTACK_NMI		1
-#define	ESTACK_DB		2
-#define	ESTACK_MCE		3
-#define	N_EXCEPTION_STACKS	4
+#define	IST_INDEX_DF		0
+#define	IST_INDEX_NMI		1
+#define	IST_INDEX_DB		2
+#define	IST_INDEX_MCE		3
 
 /*
  * Set __PAGE_OFFSET to the most negative possible address +
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f335aad404a47..d6d758a187b6c 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -9,6 +9,8 @@
 
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
+
+#include <asm/cpu_entry_area.h>
 #include <asm/switch_to.h>
 
 enum stack_type {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8243f198fb7fc..143aceaf9a9a5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1731,11 +1731,11 @@ void cpu_init(void)
 	 * set up and load the per-CPU TSS
 	 */
 	if (!t->x86_tss.ist[0]) {
-		t->x86_tss.ist[ESTACK_DF] = __this_cpu_ist_top_va(DF);
-		t->x86_tss.ist[ESTACK_NMI] = __this_cpu_ist_top_va(NMI);
-		t->x86_tss.ist[ESTACK_DB] = __this_cpu_ist_top_va(DB);
-		t->x86_tss.ist[ESTACK_MCE] = __this_cpu_ist_top_va(MCE);
-		per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[ESTACK_DB];
+		t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
+		t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
+		t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
+		t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+		per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[IST_INDEX_DB];
 	}
 
 	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 2188f734ec613..6d8917875f44a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -183,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
  * cpu_init() when the TSS has been initialized.
  */
 static const __initconst struct idt_data ist_idts[] = {
-	ISTG(X86_TRAP_DB,	debug,		ESTACK_DB),
-	ISTG(X86_TRAP_NMI,	nmi,		ESTACK_NMI),
-	ISTG(X86_TRAP_DF,	double_fault,	ESTACK_DF),
+	ISTG(X86_TRAP_DB,	debug,		IST_INDEX_DB),
+	ISTG(X86_TRAP_NMI,	nmi,		IST_INDEX_NMI),
+	ISTG(X86_TRAP_DF,	double_fault,	IST_INDEX_DF),
 #ifdef CONFIG_X86_MCE
-	ISTG(X86_TRAP_MC,	&machine_check,	ESTACK_MCE),
+	ISTG(X86_TRAP_MC,	&machine_check,	IST_INDEX_MCE),
 #endif
 };
 
-- 
GitLab


From 1bdb67e5aa2d5d43c48cb7d93393fcba276c9e71 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:56 +0200
Subject: [PATCH 1265/1861] x86/exceptions: Enable IST guard pages

All usage sites which expected that the exception stacks in the CPU entry
area are mapped linearly are fixed up. Enable guard pages between the
IST stacks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.349862042@linutronix.de
---
 arch/x86/include/asm/cpu_entry_area.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 310eeb62d4184..9c96406e6d2b8 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -26,13 +26,9 @@ struct exception_stacks {
 	ESTACKS_MEMBERS(0)
 };
 
-/*
- * The effective cpu entry area mapping with guard pages. Guard size is
- * zero until the code which makes assumptions about linear mappings is
- * cleaned up.
- */
+/* The effective cpu entry area mapping with guard pages. */
 struct cea_exception_stacks {
-	ESTACKS_MEMBERS(0)
+	ESTACKS_MEMBERS(PAGE_SIZE)
 };
 
 /*
-- 
GitLab


From 2a594d4ccf3f10f80b77d71bd3dad10813ac0137 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:57 +0200
Subject: [PATCH 1266/1861] x86/exceptions: Split debug IST stack

The debug IST stack is actually two separate debug stacks to handle #DB
recursion. This is required because the CPU starts always at top of stack
on exception entry, which means on #DB recursion the second #DB would
overwrite the stack of the first.

The low level entry code therefore adjusts the top of stack on entry so a
secondary #DB starts from a different stack page. But the stack pages are
adjacent without a guard page between them.

Split the debug stack into 3 stacks which are separated by guard pages. The
3rd stack is never mapped into the cpu_entry_area and is only there to
catch triple #DB nesting:

      --- top of DB_stack	<- Initial stack
      --- end of DB_stack
      	  guard page

      --- top of DB1_stack	<- Top of stack after entering first #DB
      --- end of DB1_stack
      	  guard page

      --- top of DB2_stack	<- Top of stack after entering second #DB
      --- end of DB2_stack
      	  guard page

If DB2 would not act as the final guard hole, a second #DB would point the
top of #DB stack to the stack below #DB1 which would be valid and not catch
the not so desired triple nesting.

The backing store does not allocate any memory for DB2 and its guard page
as it is not going to be mapped into the cpu_entry_area.

 - Adjust the low level entry code so it adjusts top of #DB with the offset
   between the stacks instead of exception stack size.

 - Make the dumpstack code aware of the new stacks.

 - Adjust the in_debug_stack() implementation and move it into the NMI code
   where it belongs. As this is NMI hotpath code, it just checks the full
   area between top of DB_stack and bottom of DB1_stack without checking
   for the guard page. That's correct because the NMI cannot hit a
   stackpointer pointing to the guard page between DB and DB1 stack.  Even
   if it would, then the NMI operation still is unaffected, but the resume
   of the debug exception on the topmost DB stack will crash by touching
   the guard page.

  [ bp: Make exception_stack_names static const char * const ]

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: linux-doc@vger.kernel.org
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qian Cai <cai@lca.pw>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.439944544@linutronix.de
---
 Documentation/x86/kernel-stacks       |  7 ++++++-
 arch/x86/entry/entry_64.S             |  8 ++++----
 arch/x86/include/asm/cpu_entry_area.h | 14 ++++++++++----
 arch/x86/include/asm/debugreg.h       |  2 --
 arch/x86/include/asm/page_64_types.h  |  3 ---
 arch/x86/kernel/asm-offsets_64.c      |  2 ++
 arch/x86/kernel/cpu/common.c          | 11 -----------
 arch/x86/kernel/dumpstack_64.c        | 12 ++++++++----
 arch/x86/kernel/nmi.c                 | 20 +++++++++++++++++++-
 arch/x86/mm/cpu_entry_area.c          |  4 +++-
 10 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks
index 1b04596caea98..d1bfb0b95ee0b 100644
--- a/Documentation/x86/kernel-stacks
+++ b/Documentation/x86/kernel-stacks
@@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
   middle of switching stacks.  Using IST for NMI events avoids making
   assumptions about the previous state of the kernel stack.
 
-* ESTACK_DB.  DEBUG_STKSZ
+* ESTACK_DB.  EXCEPTION_STKSZ (PAGE_SIZE).
 
   Used for hardware debug interrupts (interrupt 1) and for software
   debug interrupts (INT3).
@@ -86,6 +86,11 @@ The currently assigned IST stacks are :-
   avoids making assumptions about the previous state of the kernel
   stack.
 
+  To handle nested #DB correctly there exist two instances of DB stacks. On
+  #DB entry the IST stackpointer for #DB is switched to the second instance
+  so a nested #DB starts from a clean stack. The nested #DB switches
+  the IST stackpointer to a guard hole to catch triple nesting.
+
 * ESTACK_MCE.  EXCEPTION_STKSZ (PAGE_SIZE).
 
   Used for interrupt 18 - Machine Check Exception (#MC).
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5c0348504a4bc..ee649f1f279ee 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -925,13 +925,13 @@ ENTRY(\sym)
 	.endif
 
 	.if \shift_ist != -1
-	subq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
 	.endif
 
 	call	\do_sym
 
 	.if \shift_ist != -1
-	addq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
 	.endif
 
 	/* these procedures expect "no swapgs" flag in ebx */
@@ -1129,7 +1129,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
 	hv_stimer0_callback_vector hv_stimer0_vector_handler
 #endif /* CONFIG_HYPERV */
 
-idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=IST_INDEX_DB
+idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
 idtentry int3			do_int3			has_error_code=0
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 9c96406e6d2b8..cff3f3f3bfe08 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -10,25 +10,29 @@
 #ifdef CONFIG_X86_64
 
 /* Macro to enforce the same ordering and stack sizes */
-#define ESTACKS_MEMBERS(guardsize)		\
+#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
 	char	DF_stack_guard[guardsize];	\
 	char	DF_stack[EXCEPTION_STKSZ];	\
 	char	NMI_stack_guard[guardsize];	\
 	char	NMI_stack[EXCEPTION_STKSZ];	\
+	char	DB2_stack_guard[guardsize];	\
+	char	DB2_stack[db2_holesize];	\
+	char	DB1_stack_guard[guardsize];	\
+	char	DB1_stack[EXCEPTION_STKSZ];	\
 	char	DB_stack_guard[guardsize];	\
-	char	DB_stack[DEBUG_STKSZ];		\
+	char	DB_stack[EXCEPTION_STKSZ];	\
 	char	MCE_stack_guard[guardsize];	\
 	char	MCE_stack[EXCEPTION_STKSZ];	\
 	char	IST_top_guard[guardsize];	\
 
 /* The exception stacks' physical storage. No guard pages required */
 struct exception_stacks {
-	ESTACKS_MEMBERS(0)
+	ESTACKS_MEMBERS(0, 0)
 };
 
 /* The effective cpu entry area mapping with guard pages. */
 struct cea_exception_stacks {
-	ESTACKS_MEMBERS(PAGE_SIZE)
+	ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
 };
 
 /*
@@ -37,6 +41,8 @@ struct cea_exception_stacks {
 enum exception_stack_ordering {
 	ESTACK_DF,
 	ESTACK_NMI,
+	ESTACK_DB2,
+	ESTACK_DB1,
 	ESTACK_DB,
 	ESTACK_MCE,
 	N_EXCEPTION_STACKS
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 9e5ca30738e58..1a8609a15856f 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
 {
 	__this_cpu_dec(debug_stack_usage);
 }
-int is_debug_stack(unsigned long addr);
 void debug_stack_set_zero(void);
 void debug_stack_reset(void);
 #else /* !X86_64 */
-static inline int is_debug_stack(unsigned long addr) { return 0; }
 static inline void debug_stack_set_zero(void) { }
 static inline void debug_stack_reset(void) { }
 static inline void debug_stack_usage_inc(void) { }
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 056de887b2208..793c14c372cba 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -18,9 +18,6 @@
 #define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
 #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
 
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
 #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index ddced33184b55..f5281567e28ef 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -68,6 +68,8 @@ int main(void)
 #undef ENTRY
 
 	OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+	DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
+	       offsetof(struct cea_exception_stacks, DB1_stack));
 	BLANK();
 
 #ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 143aceaf9a9a5..88cab45707a9a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1549,17 +1549,7 @@ void syscall_init(void)
 	       X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
 }
 
-static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
 DEFINE_PER_CPU(int, debug_stack_usage);
-
-int is_debug_stack(unsigned long addr)
-{
-	return __this_cpu_read(debug_stack_usage) ||
-		(addr <= __this_cpu_read(debug_stack_addr) &&
-		 addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
-}
-NOKPROBE_SYMBOL(is_debug_stack);
-
 DEFINE_PER_CPU(u32, debug_idt_ctr);
 
 void debug_stack_set_zero(void)
@@ -1735,7 +1725,6 @@ void cpu_init(void)
 		t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
 		t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
 		t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
-		per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[IST_INDEX_DB];
 	}
 
 	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index f6fbd0438f9e0..fca97bd3d8ae7 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -19,16 +19,18 @@
 #include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 
-static const char *exception_stack_names[N_EXCEPTION_STACKS] = {
+static const char * const exception_stack_names[] = {
 		[ ESTACK_DF	]	= "#DF",
 		[ ESTACK_NMI	]	= "NMI",
+		[ ESTACK_DB2	]	= "#DB2",
+		[ ESTACK_DB1	]	= "#DB1",
 		[ ESTACK_DB	]	= "#DB",
 		[ ESTACK_MCE	]	= "#MC",
 };
 
 const char *stack_type_name(enum stack_type type)
 {
-	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+	BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
 
 	if (type == STACK_TYPE_IRQ)
 		return "IRQ";
@@ -58,9 +60,11 @@ struct estack_layout {
 	.end	= offsetof(struct cea_exception_stacks, x## _stack_guard) \
 	}
 
-static const struct estack_layout layout[N_EXCEPTION_STACKS] = {
+static const struct estack_layout layout[] = {
 	[ ESTACK_DF	]	= ESTACK_ENTRY(DF),
 	[ ESTACK_NMI	]	= ESTACK_ENTRY(NMI),
+	[ ESTACK_DB2	]	= { .begin = 0, .end = 0},
+	[ ESTACK_DB1	]	= ESTACK_ENTRY(DB1),
 	[ ESTACK_DB	]	= ESTACK_ENTRY(DB),
 	[ ESTACK_MCE	]	= ESTACK_ENTRY(MCE),
 };
@@ -71,7 +75,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 	struct pt_regs *regs;
 	unsigned int k;
 
-	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+	BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
 
 	estacks = (unsigned long)__this_cpu_read(cea_exception_stacks);
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 18bc9b51ac9b9..3755d0310026a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -21,13 +21,14 @@
 #include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/atomic.h>
 #include <linux/sched/clock.h>
 
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
 #endif
 
-#include <linux/atomic.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/traps.h>
 #include <asm/mach_traps.h>
 #include <asm/nmi.h>
@@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2);
  * switch back to the original IDT.
  */
 static DEFINE_PER_CPU(int, update_debug_stack);
+
+static bool notrace is_debug_stack(unsigned long addr)
+{
+	struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
+	unsigned long top = CEA_ESTACK_TOP(cs, DB);
+	unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
+
+	if (__this_cpu_read(debug_stack_usage))
+		return true;
+	/*
+	 * Note, this covers the guard page between DB and DB1 as well to
+	 * avoid two checks. But by all means @addr can never point into
+	 * the guard page.
+	 */
+	return addr >= bot && addr < top;
+}
+NOKPROBE_SYMBOL(is_debug_stack);
 #endif
 
 dotraplinkage notrace void
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index a00d0d059c8ad..752ad11d68682 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -98,10 +98,12 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
 
 	/*
 	 * The exceptions stack mappings in the per cpu area are protected
-	 * by guard pages so each stack must be mapped separately.
+	 * by guard pages so each stack must be mapped separately. DB2 is
+	 * not mapped; it just exists to catch triple nesting of #DB.
 	 */
 	cea_map_stack(DF);
 	cea_map_stack(NMI);
+	cea_map_stack(DB1);
 	cea_map_stack(DB);
 	cea_map_stack(MCE);
 }
-- 
GitLab


From c450c8f532b63475b30e29bc600c25ab0a4ab282 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:58 +0200
Subject: [PATCH 1267/1861] x86/dumpstack/64: Speedup in_exception_stack()

The current implementation of in_exception_stack() iterates over the
exception stacks array. Most of the time this is an useless exercise, but
even for the actual use cases (perf and ftrace) it takes at least 2
iterations to get to the NMI stack.

As the exception stacks and the guard pages are page aligned the loop can
be avoided completely.

Add a initial check whether the stack pointer is inside the full exception
stack area and leave early if not.

Create a lookup table which describes the stack area. The table index is
the page offset from the beginning of the exception stacks. So for any
given stack pointer the page offset is computed and a lookup in the
description table is performed. If it is inside a guard page, return. If
not, use the descriptor to fill in the info structure.

The table is filled at compile time and for the !KASAN case the interesting
page descriptors exactly fit into a single cache line. Just the last guard
page descriptor is in the next cacheline, but that should not be accessed
in the regular case.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.543320386@linutronix.de
---
 arch/x86/kernel/dumpstack_64.c | 82 +++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index fca97bd3d8ae7..f356d3ea0c706 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -50,52 +50,72 @@ const char *stack_type_name(enum stack_type type)
 	return NULL;
 }
 
-struct estack_layout {
-	unsigned int	begin;
-	unsigned int	end;
+/**
+ * struct estack_pages - Page descriptor for exception stacks
+ * @offs:	Offset from the start of the exception stack area
+ * @size:	Size of the exception stack
+ * @type:	Type to store in the stack_info struct
+ */
+struct estack_pages {
+	u32	offs;
+	u16	size;
+	u16	type;
 };
 
-#define	ESTACK_ENTRY(x)	{						  \
-	.begin	= offsetof(struct cea_exception_stacks, x## _stack),	  \
-	.end	= offsetof(struct cea_exception_stacks, x## _stack_guard) \
-	}
+#define EPAGERANGE(st)							\
+	[PFN_DOWN(CEA_ESTACK_OFFS(st)) ...				\
+	 PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = {	\
+		.offs	= CEA_ESTACK_OFFS(st),				\
+		.size	= CEA_ESTACK_SIZE(st),				\
+		.type	= STACK_TYPE_EXCEPTION + ESTACK_ ##st, }
 
-static const struct estack_layout layout[] = {
-	[ ESTACK_DF	]	= ESTACK_ENTRY(DF),
-	[ ESTACK_NMI	]	= ESTACK_ENTRY(NMI),
-	[ ESTACK_DB2	]	= { .begin = 0, .end = 0},
-	[ ESTACK_DB1	]	= ESTACK_ENTRY(DB1),
-	[ ESTACK_DB	]	= ESTACK_ENTRY(DB),
-	[ ESTACK_MCE	]	= ESTACK_ENTRY(MCE),
+/*
+ * Array of exception stack page descriptors. If the stack is larger than
+ * PAGE_SIZE, all pages covering a particular stack will have the same
+ * info. The guard pages including the not mapped DB2 stack are zeroed
+ * out.
+ */
+static const
+struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
+	EPAGERANGE(DF),
+	EPAGERANGE(NMI),
+	EPAGERANGE(DB1),
+	EPAGERANGE(DB),
+	EPAGERANGE(MCE),
 };
 
 static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long estacks, begin, end, stk = (unsigned long)stack;
+	unsigned long begin, end, stk = (unsigned long)stack;
+	const struct estack_pages *ep;
 	struct pt_regs *regs;
 	unsigned int k;
 
 	BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
 
-	estacks = (unsigned long)__this_cpu_read(cea_exception_stacks);
-
-	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
-		begin = estacks + layout[k].begin;
-		end   = estacks + layout[k].end;
-		regs  = (struct pt_regs *)end - 1;
+	begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
+	end = begin + sizeof(struct cea_exception_stacks);
+	/* Bail if @stack is outside the exception stack area. */
+	if (stk < begin || stk >= end)
+		return false;
 
-		if (stk < begin || stk >= end)
-			continue;
+	/* Calc page offset from start of exception stacks */
+	k = (stk - begin) >> PAGE_SHIFT;
+	/* Lookup the page descriptor */
+	ep = &estack_pages[k];
+	/* Guard page? */
+	if (!ep->size)
+		return false;
 
-		info->type	= STACK_TYPE_EXCEPTION + k;
-		info->begin	= (unsigned long *)begin;
-		info->end	= (unsigned long *)end;
-		info->next_sp	= (unsigned long *)regs->sp;
+	begin += (unsigned long)ep->offs;
+	end = begin + (unsigned long)ep->size;
+	regs = (struct pt_regs *)end - 1;
 
-		return true;
-	}
-
-	return false;
+	info->type	= ep->type;
+	info->begin	= (unsigned long *)begin;
+	info->end	= (unsigned long *)end;
+	info->next_sp	= (unsigned long *)regs->sp;
+	return true;
 }
 
 static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
-- 
GitLab


From aa641c287b2f7676f6f0064a8351daf08eca6b0a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 17:59:59 +0200
Subject: [PATCH 1268/1861] x86/irq/32: Define IRQ_STACK_SIZE

On 32-bit IRQ_STACK_SIZE is the same as THREAD_SIZE.

To allow sharing struct irq_stack with 32-bit, define IRQ_STACK_SIZE for
32-bit and use it for struct irq_stack.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.632513987@linutronix.de
---
 arch/x86/include/asm/page_32_types.h | 2 ++
 arch/x86/include/asm/processor.h     | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 57b5dc15ca7e1..565ad755c785e 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -22,6 +22,8 @@
 #define THREAD_SIZE_ORDER	1
 #define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
 
+#define IRQ_STACK_SIZE		THREAD_SIZE
+
 #define N_EXCEPTION_STACKS	1
 
 #ifdef CONFIG_X86_PAE
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 8fcfcd1a8375a..8b4bf732e1b54 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -422,8 +422,8 @@ DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
  * per-CPU IRQ handling stacks
  */
 struct irq_stack {
-	u32                     stack[THREAD_SIZE/sizeof(u32)];
-} __aligned(THREAD_SIZE);
+	u32			stack[IRQ_STACK_SIZE / sizeof(u32)];
+} __aligned(IRQ_STACK_SIZE);
 
 DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
-- 
GitLab


From 231c4846b106d526fa212b02b37447d3f2fcc99d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 18:00:00 +0200
Subject: [PATCH 1269/1861] x86/irq/32: Make irq stack a character array

There is no reason to have an u32 array in struct irq_stack. The only
purpose of the array is to size the struct properly.

Preparatory change for sharing struct irq_stack with 64-bit.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.736241969@linutronix.de
---
 arch/x86/include/asm/processor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 8b4bf732e1b54..ff3f469ab2d45 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -422,7 +422,7 @@ DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
  * per-CPU IRQ handling stacks
  */
 struct irq_stack {
-	u32			stack[IRQ_STACK_SIZE / sizeof(u32)];
+	char			stack[IRQ_STACK_SIZE];
 } __aligned(IRQ_STACK_SIZE);
 
 DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
-- 
GitLab


From a754fe2b76d1d6bce7069657bba975034f3ad961 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 18:00:01 +0200
Subject: [PATCH 1270/1861] x86/irq/32: Rename hard/softirq_stack to
 hard/softirq_stack_ptr

The percpu storage holds a pointer to the stack not the stack
itself. Rename it before sharing struct irq_stack with 64-bit.

No functional changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.824805922@linutronix.de
---
 arch/x86/include/asm/processor.h |  4 ++--
 arch/x86/kernel/dumpstack_32.c   |  4 ++--
 arch/x86/kernel/irq_32.c         | 19 ++++++++++---------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ff3f469ab2d45..0b7d866a1ad53 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -425,8 +425,8 @@ struct irq_stack {
 	char			stack[IRQ_STACK_SIZE];
 } __aligned(IRQ_STACK_SIZE);
 
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
+DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 #endif	/* X86_64 */
 
 extern unsigned int fpu_kernel_xstate_size;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index d305440ebe9c2..64a59d7266395 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -34,7 +34,7 @@ const char *stack_type_name(enum stack_type type)
 
 static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
+	unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
 	unsigned long *end   = begin + (THREAD_SIZE / sizeof(long));
 
 	/*
@@ -59,7 +59,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
 
 static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
+	unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
 	unsigned long *end   = begin + (THREAD_SIZE / sizeof(long));
 
 	/*
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 95600a99ae936..f37489c806faf 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -51,8 +51,8 @@ static inline int check_stack_overflow(void) { return 0; }
 static inline void print_stack_overflow(void) { }
 #endif
 
-DEFINE_PER_CPU(struct irq_stack *, hardirq_stack);
-DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
+DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 
 static void call_on_stack(void *func, void *stack)
 {
@@ -76,7 +76,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 	u32 *isp, *prev_esp, arg1;
 
 	curstk = (struct irq_stack *) current_stack();
-	irqstk = __this_cpu_read(hardirq_stack);
+	irqstk = __this_cpu_read(hardirq_stack_ptr);
 
 	/*
 	 * this is where we switch to the IRQ stack. However, if we are
@@ -113,21 +113,22 @@ void irq_ctx_init(int cpu)
 {
 	struct irq_stack *irqstk;
 
-	if (per_cpu(hardirq_stack, cpu))
+	if (per_cpu(hardirq_stack_ptr, cpu))
 		return;
 
 	irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
 					       THREADINFO_GFP,
 					       THREAD_SIZE_ORDER));
-	per_cpu(hardirq_stack, cpu) = irqstk;
+	per_cpu(hardirq_stack_ptr, cpu) = irqstk;
 
 	irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
 					       THREADINFO_GFP,
 					       THREAD_SIZE_ORDER));
-	per_cpu(softirq_stack, cpu) = irqstk;
+	per_cpu(softirq_stack_ptr, cpu) = irqstk;
 
-	printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
-	       cpu, per_cpu(hardirq_stack, cpu),  per_cpu(softirq_stack, cpu));
+	pr_debug("CPU %u irqstacks, hard=%p soft=%p\n",
+		 cpu, per_cpu(hardirq_stack_ptr, cpu),
+		 per_cpu(softirq_stack_ptr, cpu));
 }
 
 void do_softirq_own_stack(void)
@@ -135,7 +136,7 @@ void do_softirq_own_stack(void)
 	struct irq_stack *irqstk;
 	u32 *isp, *prev_esp;
 
-	irqstk = __this_cpu_read(softirq_stack);
+	irqstk = __this_cpu_read(softirq_stack_ptr);
 
 	/* build the stack frame on the softirq stack */
 	isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
-- 
GitLab


From 758a2e312228410f2f5092ade558109e93dc3ee8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 18:00:02 +0200
Subject: [PATCH 1271/1861] x86/irq/64: Rename irq_stack_ptr to
 hardirq_stack_ptr

Preparatory patch to share code with 32bit.

No functional changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.912584074@linutronix.de
---
 arch/x86/entry/entry_64.S        | 2 +-
 arch/x86/include/asm/processor.h | 2 +-
 arch/x86/kernel/cpu/common.c     | 2 +-
 arch/x86/kernel/dumpstack_64.c   | 2 +-
 arch/x86/kernel/irq_64.c         | 2 +-
 arch/x86/kernel/setup_percpu.c   | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ee649f1f279ee..726abbe6c6d84 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -431,7 +431,7 @@ END(irq_entries_start)
 	 */
 
 	movq	\old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
-	movq	PER_CPU_VAR(irq_stack_ptr), %rsp
+	movq	PER_CPU_VAR(hardirq_stack_ptr), %rsp
 
 #ifdef CONFIG_DEBUG_ENTRY
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0b7d866a1ad53..5e3dd4e2136dd 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -396,7 +396,7 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 	return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
 }
 
-DECLARE_PER_CPU(char *, irq_stack_ptr);
+DECLARE_PER_CPU(char *, hardirq_stack_ptr);
 DECLARE_PER_CPU(unsigned int, irq_count);
 extern asmlinkage void ignore_sysret(void);
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 88cab45707a9a..13ec72bb8f368 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1510,7 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
 	&init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
+DEFINE_PER_CPU(char *, hardirq_stack_ptr) =
 	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
 
 DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index f356d3ea0c706..753b8cfe8b8a2 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -120,7 +120,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 
 static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long *end   = (unsigned long *)this_cpu_read(irq_stack_ptr);
+	unsigned long *end   = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
 	unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
 
 	/*
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 7eb6f8d11bfd3..f0c7356c89699 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -56,7 +56,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 	    regs->sp <= curbase + THREAD_SIZE)
 		return;
 
-	irq_stack_top = (u64)__this_cpu_read(irq_stack_ptr);
+	irq_stack_top = (u64)__this_cpu_read(hardirq_stack_ptr);
 	irq_stack_bottom = irq_stack_top - IRQ_STACK_SIZE + STACK_MARGIN;
 	if (regs->sp >= irq_stack_bottom && regs->sp <= irq_stack_top)
 		return;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 4bf46575568a2..657343ecc2da4 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -245,7 +245,7 @@ void __init setup_per_cpu_areas(void)
 			early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
 #endif
 #ifdef CONFIG_X86_64
-		per_cpu(irq_stack_ptr, cpu) =
+		per_cpu(hardirq_stack_ptr, cpu) =
 			per_cpu(irq_stack_union.irq_stack, cpu) +
 			IRQ_STACK_SIZE;
 #endif
-- 
GitLab


From 451f743a64e1cf979f5fe21a1b2a015feb559f72 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 18:00:03 +0200
Subject: [PATCH 1272/1861] x86/irq/32: Invoke irq_ctx_init() from init_IRQ()

irq_ctx_init() is invoked from native_init_IRQ() or from xen_init_IRQ()
code. There is no reason to have this split. The interrupt stacks must be
allocated no matter what.

Invoke it from init_IRQ() before invoking the native or XEN init
implementation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Abraham <j.abraham1776@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: x86-ml <x86@kernel.org>
Cc: xen-devel@lists.xenproject.org
Link: https://lkml.kernel.org/r/20190414160146.001162606@linutronix.de
---
 arch/x86/kernel/irqinit.c        | 4 ++--
 drivers/xen/events/events_base.c | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index a0693b71cfc1c..26b5cb5386b9b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -91,6 +91,8 @@ void __init init_IRQ(void)
 	for (i = 0; i < nr_legacy_irqs(); i++)
 		per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
 
+	irq_ctx_init(smp_processor_id());
+
 	x86_init.irqs.intr_init();
 }
 
@@ -104,6 +106,4 @@ void __init native_init_IRQ(void)
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);
-
-	irq_ctx_init(smp_processor_id());
 }
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 117e76b2f9391..084e45882c737 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1687,7 +1687,6 @@ void __init xen_init_IRQ(void)
 
 #ifdef CONFIG_X86
 	if (xen_pv_domain()) {
-		irq_ctx_init(smp_processor_id());
 		if (xen_initial_domain())
 			pci_xen_initial_domain();
 	}
-- 
GitLab


From 66c7ceb47f628c8bd4f84a6d01c2725ded6a342d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 18:00:04 +0200
Subject: [PATCH 1273/1861] x86/irq/32: Handle irq stack allocation failure
 proper

irq_ctx_init() crashes hard on page allocation failures. While that's ok
during early boot, it's just wrong in the CPU hotplug bringup code.

Check the page allocation failure and return -ENOMEM and handle it at the
call sites. On early boot the only way out is to BUG(), but on CPU hotplug
there is no reason to crash, so just abort the operation.

Rename the function to something more sensible while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: x86-ml <x86@kernel.org>
Cc: xen-devel@lists.xenproject.org
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: Yi Wang <wang.yi59@zte.com.cn>
Cc: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Link: https://lkml.kernel.org/r/20190414160146.089060584@linutronix.de
---
 arch/x86/include/asm/irq.h |  4 ++--
 arch/x86/include/asm/smp.h |  2 +-
 arch/x86/kernel/irq_32.c   | 32 ++++++++++++++++----------------
 arch/x86/kernel/irqinit.c  |  2 +-
 arch/x86/kernel/smpboot.c  | 15 ++++++++++++---
 arch/x86/xen/smp_pv.c      |  4 +++-
 6 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index fbb16e6b6c18b..d751e8440a6bb 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -17,9 +17,9 @@ static inline int irq_canonicalize(int irq)
 }
 
 #ifdef CONFIG_X86_32
-extern void irq_ctx_init(int cpu);
+extern int irq_init_percpu_irqstack(unsigned int cpu);
 #else
-# define irq_ctx_init(cpu) do { } while (0)
+static inline int irq_init_percpu_irqstack(unsigned int cpu) { return 0; }
 #endif
 
 #define __ARCH_HAS_DO_SOFTIRQ
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2e95b6c1bca3f..da545df207b2a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
-void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
 int common_cpu_die(unsigned int cpu);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index f37489c806faf..fc34816c6f044 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -107,28 +107,28 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 }
 
 /*
- * allocate per-cpu stacks for hardirq and for softirq processing
+ * Allocate per-cpu stacks for hardirq and softirq processing
  */
-void irq_ctx_init(int cpu)
+int irq_init_percpu_irqstack(unsigned int cpu)
 {
-	struct irq_stack *irqstk;
+	int node = cpu_to_node(cpu);
+	struct page *ph, *ps;
 
 	if (per_cpu(hardirq_stack_ptr, cpu))
-		return;
-
-	irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
-					       THREADINFO_GFP,
-					       THREAD_SIZE_ORDER));
-	per_cpu(hardirq_stack_ptr, cpu) = irqstk;
+		return 0;
 
-	irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
-					       THREADINFO_GFP,
-					       THREAD_SIZE_ORDER));
-	per_cpu(softirq_stack_ptr, cpu) = irqstk;
+	ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
+	if (!ph)
+		return -ENOMEM;
+	ps = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
+	if (!ps) {
+		__free_pages(ph, THREAD_SIZE_ORDER);
+		return -ENOMEM;
+	}
 
-	pr_debug("CPU %u irqstacks, hard=%p soft=%p\n",
-		 cpu, per_cpu(hardirq_stack_ptr, cpu),
-		 per_cpu(softirq_stack_ptr, cpu));
+	per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
+	per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
+	return 0;
 }
 
 void do_softirq_own_stack(void)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 26b5cb5386b9b..16919a9671fa9 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -91,7 +91,7 @@ void __init init_IRQ(void)
 	for (i = 0; i < nr_legacy_irqs(); i++)
 		per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
 
-	irq_ctx_init(smp_processor_id());
+	BUG_ON(irq_init_percpu_irqstack(smp_processor_id()));
 
 	x86_init.irqs.intr_init();
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ce1a67b70168e..c92b21f9e9dc6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -935,20 +935,27 @@ out:
 	return boot_error;
 }
 
-void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+int common_cpu_up(unsigned int cpu, struct task_struct *idle)
 {
+	int ret;
+
 	/* Just in case we booted with a single CPU. */
 	alternatives_enable_smp();
 
 	per_cpu(current_task, cpu) = idle;
 
+	/* Initialize the interrupt stack(s) */
+	ret = irq_init_percpu_irqstack(cpu);
+	if (ret)
+		return ret;
+
 #ifdef CONFIG_X86_32
 	/* Stack for startup_32 can be just as for start_secondary onwards */
-	irq_ctx_init(cpu);
 	per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
 #else
 	initial_gs = per_cpu_offset(cpu);
 #endif
+	return 0;
 }
 
 /*
@@ -1106,7 +1113,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 	/* the FPU context is blank, nobody can own it */
 	per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
 
-	common_cpu_up(cpu, tidle);
+	err = common_cpu_up(cpu, tidle);
+	if (err)
+		return err;
 
 	err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
 	if (err) {
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 145506f9fdbe1..590fcf8630060 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -361,7 +361,9 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int rc;
 
-	common_cpu_up(cpu, idle);
+	rc = common_cpu_up(cpu, idle);
+	if (rc)
+		return rc;
 
 	xen_setup_runstate_info(cpu);
 
-- 
GitLab


From 0ac26104208450d35c4e68754ce0c67b3a4d7802 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 18:00:05 +0200
Subject: [PATCH 1274/1861] x86/irq/64: Init hardirq_stack_ptr during CPU
 hotplug

Preparatory change for disentangling the irq stack union as a
prerequisite for irq stacks with guard pages.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Cc: Yi Wang <wang.yi59@zte.com.cn>
Link: https://lkml.kernel.org/r/20190414160146.177558566@linutronix.de
---
 arch/x86/include/asm/irq.h   |  4 ----
 arch/x86/kernel/cpu/common.c |  4 +---
 arch/x86/kernel/irq_64.c     | 15 +++++++++++++++
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index d751e8440a6bb..8f95686ec27e0 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq)
 	return ((irq == 2) ? 9 : irq);
 }
 
-#ifdef CONFIG_X86_32
 extern int irq_init_percpu_irqstack(unsigned int cpu);
-#else
-static inline int irq_init_percpu_irqstack(unsigned int cpu) { return 0; }
-#endif
 
 #define __ARCH_HAS_DO_SOFTIRQ
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 13ec72bb8f368..1222080838dac 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1510,9 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
 	&init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 
-DEFINE_PER_CPU(char *, hardirq_stack_ptr) =
-	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
-
+DEFINE_PER_CPU(char *, hardirq_stack_ptr);
 DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f0c7356c89699..c0bea0d7d76ae 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -87,3 +87,18 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 	generic_handle_irq_desc(desc);
 	return true;
 }
+
+static int map_irq_stack(unsigned int cpu)
+{
+	void *va = per_cpu_ptr(irq_stack_union.irq_stack, cpu);
+
+	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+	return 0;
+}
+
+int irq_init_percpu_irqstack(unsigned int cpu)
+{
+	if (per_cpu(hardirq_stack_ptr, cpu))
+		return 0;
+	return map_irq_stack(cpu);
+}
-- 
GitLab


From e6401c13093173aad709a5c6de00cf8d692ee786 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 14 Apr 2019 18:00:06 +0200
Subject: [PATCH 1275/1861] x86/irq/64: Split the IRQ stack into its own pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the IRQ stack is hardcoded as the first page of the percpu
area, and the stack canary lives on the IRQ stack. The former gets in
the way of adding an IRQ stack guard page, and the latter is a potential
weakness in the stack canary mechanism.

Split the IRQ stack into its own private percpu pages.

[ tglx: Make 64 and 32 bit share struct irq_stack ]

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Feng Tang <feng.tang@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jordan Borgner <mail@jordan-borgner.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Maran Wilson <maran.wilson@oracle.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: "Rafael Ávila de Espíndola" <rafael@espindo.la>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: x86-ml <x86@kernel.org>
Cc: xen-devel@lists.xenproject.org
Link: https://lkml.kernel.org/r/20190414160146.267376656@linutronix.de
---
 arch/x86/entry/entry_64.S             |  4 ++--
 arch/x86/include/asm/processor.h      | 32 ++++++++++++---------------
 arch/x86/include/asm/stackprotector.h |  6 ++---
 arch/x86/kernel/asm-offsets_64.c      |  2 +-
 arch/x86/kernel/cpu/common.c          |  8 +++----
 arch/x86/kernel/head_64.S             |  2 +-
 arch/x86/kernel/irq_64.c              |  5 ++++-
 arch/x86/kernel/setup_percpu.c        |  5 -----
 arch/x86/kernel/vmlinux.lds.S         |  7 +++---
 arch/x86/tools/relocs.c               |  2 +-
 arch/x86/xen/xen-head.S               | 10 ++++-----
 11 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 726abbe6c6d84..cfe4d6ea258dc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)
 
 #ifdef CONFIG_STACKPROTECTOR
 	movq	TASK_stack_canary(%rsi), %rbx
-	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+	movq	%rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
 #endif
 
 #ifdef CONFIG_RETPOLINE
@@ -430,7 +430,7 @@ END(irq_entries_start)
 	 * it before we actually move ourselves to the IRQ stack.
 	 */
 
-	movq	\old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
+	movq	\old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
 	movq	PER_CPU_VAR(hardirq_stack_ptr), %rsp
 
 #ifdef CONFIG_DEBUG_ENTRY
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5e3dd4e2136dd..7e99ef67bff08 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 #define __KERNEL_TSS_LIMIT	\
 	(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
 
+/* Per CPU interrupt stacks */
+struct irq_stack {
+	char		stack[IRQ_STACK_SIZE];
+} __aligned(IRQ_STACK_SIZE);
+
+DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 #else
@@ -375,28 +382,24 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 #endif
 
 #ifdef CONFIG_X86_64
-union irq_stack_union {
-	char irq_stack[IRQ_STACK_SIZE];
+struct fixed_percpu_data {
 	/*
 	 * GCC hardcodes the stack canary as %gs:40.  Since the
 	 * irq_stack is the object at %gs:0, we reserve the bottom
 	 * 48 bytes of the irq stack for the canary.
 	 */
-	struct {
-		char gs_base[40];
-		unsigned long stack_canary;
-	};
+	char		gs_base[40];
+	unsigned long	stack_canary;
 };
 
-DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_union);
+DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
+DECLARE_INIT_PER_CPU(fixed_percpu_data);
 
 static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 {
-	return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+	return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
 }
 
-DECLARE_PER_CPU(char *, hardirq_stack_ptr);
 DECLARE_PER_CPU(unsigned int, irq_count);
 extern asmlinkage void ignore_sysret(void);
 
@@ -418,14 +421,7 @@ struct stack_canary {
 };
 DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
-/*
- * per-CPU IRQ handling stacks
- */
-struct irq_stack {
-	char			stack[IRQ_STACK_SIZE];
-} __aligned(IRQ_STACK_SIZE);
-
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+/* Per CPU softirq stack pointer */
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 #endif	/* X86_64 */
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 8ec97a62c2451..91e29b6a86a5e 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -13,7 +13,7 @@
  * On x86_64, %gs is shared by percpu area and stack canary.  All
  * percpu symbols are zero based and %gs points to the base of percpu
  * area.  The first occupant of the percpu area is always
- * irq_stack_union which contains stack_canary at offset 40.  Userland
+ * fixed_percpu_data which contains stack_canary at offset 40.  Userland
  * %gs is always saved and restored on kernel entry and exit using
  * swapgs, so stack protector doesn't add any complexity there.
  *
@@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
 	u64 tsc;
 
 #ifdef CONFIG_X86_64
-	BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
+	BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
 #endif
 	/*
 	 * We both use the random pool and the current TSC as a source
@@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
 
 	current->stack_canary = canary;
 #ifdef CONFIG_X86_64
-	this_cpu_write(irq_stack_union.stack_canary, canary);
+	this_cpu_write(fixed_percpu_data.stack_canary, canary);
 #else
 	this_cpu_write(stack_canary.canary, canary);
 #endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f5281567e28ef..d3d075226c0aa 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -73,7 +73,7 @@ int main(void)
 	BLANK();
 
 #ifdef CONFIG_STACKPROTECTOR
-	DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
+	DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
 	BLANK();
 #endif
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1222080838dac..801c6f040faa7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1498,9 +1498,9 @@ static __init int setup_clearcpuid(char *arg)
 __setup("clearcpuid=", setup_clearcpuid);
 
 #ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(union irq_stack_union,
-		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
+DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
+		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
 
 /*
  * The following percpu variables are hot.  Align current_task to
@@ -1510,7 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
 	&init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 
-DEFINE_PER_CPU(char *, hardirq_stack_ptr);
+DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
 DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d1dbe8e4eb824..bcd206c8ac900 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -265,7 +265,7 @@ ENDPROC(start_cpu0)
 	GLOBAL(initial_code)
 	.quad	x86_64_start_kernel
 	GLOBAL(initial_gs)
-	.quad	INIT_PER_CPU_VAR(irq_stack_union)
+	.quad	INIT_PER_CPU_VAR(fixed_percpu_data)
 	GLOBAL(initial_stack)
 	/*
 	 * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index c0bea0d7d76ae..c0f89d136b803 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -23,6 +23,9 @@
 #include <asm/io_apic.h>
 #include <asm/apic.h>
 
+DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
+DECLARE_INIT_PER_CPU(irq_stack_backing_store);
+
 int sysctl_panic_on_stackoverflow;
 
 /*
@@ -90,7 +93,7 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 
 static int map_irq_stack(unsigned int cpu)
 {
-	void *va = per_cpu_ptr(irq_stack_union.irq_stack, cpu);
+	void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
 
 	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
 	return 0;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 657343ecc2da4..86663874ef042 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(x86_cpu_to_logical_apicid, cpu) =
 			early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
 #endif
-#ifdef CONFIG_X86_64
-		per_cpu(hardirq_stack_ptr, cpu) =
-			per_cpu(irq_stack_union.irq_stack, cpu) +
-			IRQ_STACK_SIZE;
-#endif
 #ifdef CONFIG_NUMA
 		per_cpu(x86_cpu_to_node_map, cpu) =
 			early_per_cpu_map(x86_cpu_to_node_map, cpu);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6ee..a5af9a7c4be47 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -403,7 +403,8 @@ SECTIONS
  */
 #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
 INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(irq_stack_union);
+INIT_PER_CPU(fixed_percpu_data);
+INIT_PER_CPU(irq_stack_backing_store);
 
 /*
  * Build-time check on the image size:
@@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union);
 	   "kernel image bigger than KERNEL_IMAGE_SIZE");
 
 #ifdef CONFIG_SMP
-. = ASSERT((irq_stack_union == 0),
-           "irq_stack_union is not at start of per-cpu area");
+. = ASSERT((fixed_percpu_data == 0),
+           "fixed_percpu_data is not at start of per-cpu area");
 #endif
 
 #endif /* CONFIG_X86_32 */
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index b629f6992d9f6..efa483205e436 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -738,7 +738,7 @@ static void percpu_init(void)
  *	__per_cpu_load
  *
  * The "gold" linker incorrectly associates:
- *	init_per_cpu__irq_stack_union
+ *	init_per_cpu__fixed_percpu_data
  *	init_per_cpu__gdt_page
  */
 static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 5077ead5e59ca..c1d8b90aa4e2d 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -40,13 +40,13 @@ ENTRY(startup_xen)
 #ifdef CONFIG_X86_64
 	/* Set up %gs.
 	 *
-	 * The base of %gs always points to the bottom of the irqstack
-	 * union.  If the stack protector canary is enabled, it is
-	 * located at %gs:40.  Note that, on SMP, the boot cpu uses
-	 * init data section till per cpu areas are set up.
+	 * The base of %gs always points to fixed_percpu_data.  If the
+	 * stack protector canary is enabled, it is located at %gs:40.
+	 * Note that, on SMP, the boot cpu uses init data section until
+	 * the per cpu areas are set up.
 	 */
 	movl	$MSR_GS_BASE,%ecx
-	movq	$INIT_PER_CPU_VAR(irq_stack_union),%rax
+	movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
 	cdq
 	wrmsr
 #endif
-- 
GitLab


From 18b7a6bef62de1d598fbff23b52114b7775ecf00 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 14 Apr 2019 18:00:07 +0200
Subject: [PATCH 1276/1861] x86/irq/64: Remap the IRQ stack with guard pages

The IRQ stack lives in percpu space, so an IRQ handler that overflows it
will overwrite other data structures.

Use vmap() to remap the IRQ stack so that it will have the usual guard
pages that vmap()/vmalloc() allocations have. With this, the kernel will
panic immediately on an IRQ stack overflow.

[ tglx: Move the map code to a proper place and invoke it only when a CPU
  	is about to be brought online. No point in installing the map at
  	early boot for all possible CPUs. Fail the CPU bringup if the vmap()
  	fails as done for all other preparatory stages in CPU hotplug. ]

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160146.363733568@linutronix.de
---
 arch/x86/kernel/irq_64.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index c0f89d136b803..f107eb2021f60 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -91,6 +91,35 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 	return true;
 }
 
+#ifdef CONFIG_VMAP_STACK
+/*
+ * VMAP the backing store with guard pages
+ */
+static int map_irq_stack(unsigned int cpu)
+{
+	char *stack = (char *)per_cpu_ptr(&irq_stack_backing_store, cpu);
+	struct page *pages[IRQ_STACK_SIZE / PAGE_SIZE];
+	void *va;
+	int i;
+
+	for (i = 0; i < IRQ_STACK_SIZE / PAGE_SIZE; i++) {
+		phys_addr_t pa = per_cpu_ptr_to_phys(stack + (i << PAGE_SHIFT));
+
+		pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
+	}
+
+	va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+	if (!va)
+		return -ENOMEM;
+
+	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+	return 0;
+}
+#else
+/*
+ * If VMAP stacks are disabled due to KASAN, just use the per cpu
+ * backing store without guard pages.
+ */
 static int map_irq_stack(unsigned int cpu)
 {
 	void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
@@ -98,6 +127,7 @@ static int map_irq_stack(unsigned int cpu)
 	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
 	return 0;
 }
+#endif
 
 int irq_init_percpu_irqstack(unsigned int cpu)
 {
-- 
GitLab


From 117ed45485413b1977bfc638c32bf5b01d53c62b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 14 Apr 2019 18:00:08 +0200
Subject: [PATCH 1277/1861] x86/irq/64: Remove stack overflow debug code

All stack types on x86 64-bit have guard pages now.

So there is no point in executing probabilistic overflow checks as the
guard pages are a accurate and reliable overflow prevention.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Nicolai Stange <nstange@suse.de>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160146.466354762@linutronix.de
---
 arch/x86/Kconfig         |  2 +-
 arch/x86/kernel/irq_64.c | 56 ----------------------------------------
 2 files changed, 1 insertion(+), 57 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19c..fd06614b09a71 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -14,6 +14,7 @@ config X86_32
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select CLKSRC_I8253
 	select CLONE_BACKWARDS
+	select HAVE_DEBUG_STACKOVERFLOW
 	select MODULES_USE_ELF_REL
 	select OLD_SIGACTION
 
@@ -138,7 +139,6 @@ config X86
 	select HAVE_COPY_THREAD_TLS
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f107eb2021f60..6bf6517a05bba 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,64 +26,8 @@
 DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
 DECLARE_INIT_PER_CPU(irq_stack_backing_store);
 
-int sysctl_panic_on_stackoverflow;
-
-/*
- * Probabilistic stack overflow check:
- *
- * Regular device interrupts can enter on the following stacks:
- *
- * - User stack
- *
- * - Kernel task stack
- *
- * - Interrupt stack if a device driver reenables interrupts
- *   which should only happen in really old drivers.
- *
- * - Debug IST stack
- *
- * All other contexts are invalid.
- */
-static inline void stack_overflow_check(struct pt_regs *regs)
-{
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-#define STACK_MARGIN	128
-	u64 irq_stack_top, irq_stack_bottom, estack_top, estack_bottom;
-	u64 curbase = (u64)task_stack_page(current);
-	struct cea_exception_stacks *estacks;
-
-	if (user_mode(regs))
-		return;
-
-	if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_MARGIN &&
-	    regs->sp <= curbase + THREAD_SIZE)
-		return;
-
-	irq_stack_top = (u64)__this_cpu_read(hardirq_stack_ptr);
-	irq_stack_bottom = irq_stack_top - IRQ_STACK_SIZE + STACK_MARGIN;
-	if (regs->sp >= irq_stack_bottom && regs->sp <= irq_stack_top)
-		return;
-
-	estacks = __this_cpu_read(cea_exception_stacks);
-	estack_top = CEA_ESTACK_TOP(estacks, DB);
-	estack_bottom = CEA_ESTACK_BOT(estacks, DB) + STACK_MARGIN;
-	if (regs->sp >= estack_bottom && regs->sp <= estack_top)
-		return;
-
-	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx, irq stack:%Lx-%Lx, exception stack: %Lx-%Lx, ip:%pF)\n",
-		current->comm, curbase, regs->sp,
-		irq_stack_bottom, irq_stack_top,
-		estack_bottom, estack_top, (void *)regs->ip);
-
-	if (sysctl_panic_on_stackoverflow)
-		panic("low stack detected by irq handler - check messages\n");
-#endif
-}
-
 bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 {
-	stack_overflow_check(regs);
-
 	if (IS_ERR_OR_NULL(desc))
 		return false;
 
-- 
GitLab


From 3b9a907223d7f6b9d1dadea29436842ae9bcd76d Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 3 Apr 2019 15:58:16 -0500
Subject: [PATCH 1278/1861] ipmi: fix sleep-in-atomic in free_user at cleanup
 SRCU user->release_barrier

free_user() could be called in atomic context.

This patch pushed the free operation off into a workqueue.

Example:

 BUG: sleeping function called from invalid context at kernel/workqueue.c:2856
 in_atomic(): 1, irqs_disabled(): 0, pid: 177, name: ksoftirqd/27
 CPU: 27 PID: 177 Comm: ksoftirqd/27 Not tainted 4.19.25-3 #1
 Hardware name: AIC 1S-HV26-08/MB-DPSB04-06, BIOS IVYBV060 10/21/2015
 Call Trace:
  dump_stack+0x5c/0x7b
  ___might_sleep+0xec/0x110
  __flush_work+0x48/0x1f0
  ? try_to_del_timer_sync+0x4d/0x80
  _cleanup_srcu_struct+0x104/0x140
  free_user+0x18/0x30 [ipmi_msghandler]
  ipmi_free_recv_msg+0x3a/0x50 [ipmi_msghandler]
  deliver_response+0xbd/0xd0 [ipmi_msghandler]
  deliver_local_response+0xe/0x30 [ipmi_msghandler]
  handle_one_recv_msg+0x163/0xc80 [ipmi_msghandler]
  ? dequeue_entity+0xa0/0x960
  handle_new_recv_msgs+0x15c/0x1f0 [ipmi_msghandler]
  tasklet_action_common.isra.22+0x103/0x120
  __do_softirq+0xf8/0x2d7
  run_ksoftirqd+0x26/0x50
  smpboot_thread_fn+0x11d/0x1e0
  kthread+0x103/0x140
  ? sort_range+0x20/0x20
  ? kthread_destroy_worker+0x40/0x40
  ret_from_fork+0x1f/0x40

Fixes: 77f8269606bf ("ipmi: fix use-after-free of user->release_barrier.rda")

Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Cc: stable@vger.kernel.org # 5.0
Cc: Yang Yingliang <yangyingliang@huawei.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index e8ba678347466..00bf4b17edbfa 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -214,6 +214,9 @@ struct ipmi_user {
 
 	/* Does this interface receive IPMI events? */
 	bool gets_events;
+
+	/* Free must run in process context for RCU cleanup. */
+	struct work_struct remove_work;
 };
 
 static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index)
@@ -1157,6 +1160,15 @@ static int intf_err_seq(struct ipmi_smi *intf,
 	return rv;
 }
 
+static void free_user_work(struct work_struct *work)
+{
+	struct ipmi_user *user = container_of(work, struct ipmi_user,
+					      remove_work);
+
+	cleanup_srcu_struct(&user->release_barrier);
+	kfree(user);
+}
+
 int ipmi_create_user(unsigned int          if_num,
 		     const struct ipmi_user_hndl *handler,
 		     void                  *handler_data,
@@ -1200,6 +1212,8 @@ int ipmi_create_user(unsigned int          if_num,
 	goto out_kfree;
 
  found:
+	INIT_WORK(&new_user->remove_work, free_user_work);
+
 	rv = init_srcu_struct(&new_user->release_barrier);
 	if (rv)
 		goto out_kfree;
@@ -1260,8 +1274,9 @@ EXPORT_SYMBOL(ipmi_get_smi_info);
 static void free_user(struct kref *ref)
 {
 	struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount);
-	cleanup_srcu_struct(&user->release_barrier);
-	kfree(user);
+
+	/* SRCU cleanup must happen in task context. */
+	schedule_work(&user->remove_work);
 }
 
 static void _ipmi_destroy_user(struct ipmi_user *user)
-- 
GitLab


From aa52660231410b13d237299e691c43e346e3a73f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 16 Apr 2019 15:41:51 +0200
Subject: [PATCH 1279/1861] perf bpf: Return NULL when RB tree lookup fails in
 perf_env__find_bpf_prog_info()

We currently don't return NULL in case we don't find the
bpf_prog_info_node, fixing that.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: e4378f0cb90b ("perf bpf: Save bpf_prog_info in a rbtree in perf_env")
Link: http://lkml.kernel.org/r/20190416134151.15282-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/env.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index c6351b557bb0a..34a363f2e71bc 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -57,9 +57,11 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
 		else if (prog_id > node->info_linear->info.id)
 			n = n->rb_right;
 		else
-			break;
+			goto out;
 	}
+	node = NULL;
 
+out:
 	up_read(&env->bpf_progs.lock);
 	return node;
 }
-- 
GitLab


From a93e0b2365e81e5a5b61f25e269b5dc73d242cba Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Tue, 16 Apr 2019 18:01:22 +0200
Subject: [PATCH 1280/1861] perf tools: Check maps for bpf programs

As reported by Jiri Olsa in:

  "[BUG] perf: intel_pt won't display kernel function"
  https://lore.kernel.org/lkml/20190403143738.GB32001@krava

Recent changes to support PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT
broke --kallsyms option. This is because it broke test __map__is_kmodule.

This patch fixes this by adding check for bpf program, so that these maps
are not mistaken as kernel modules.

Signed-off-by: Song Liu <songliubraving@fb.com>
Reported-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Yonghong Song <yhs@fb.com>
Link: http://lkml.kernel.org/r/20190416160127.30203-8-jolsa@kernel.org
Fixes: 76193a94522f ("perf, bpf: Introduce PERF_RECORD_KSYMBOL")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/map.c | 16 ++++++++++++++++
 tools/perf/util/map.h |  4 +++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e32628cd20a7f..28d484ef74ae8 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -261,6 +261,22 @@ bool __map__is_extra_kernel_map(const struct map *map)
 	return kmap && kmap->name[0];
 }
 
+bool __map__is_bpf_prog(const struct map *map)
+{
+	const char *name;
+
+	if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+		return true;
+
+	/*
+	 * If PERF_RECORD_BPF_EVENT is not included, the dso will not have
+	 * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can
+	 * guess the type based on name.
+	 */
+	name = map->dso->short_name;
+	return name && (strstr(name, "bpf_prog_") == name);
+}
+
 bool map__has_symbols(const struct map *map)
 {
 	return dso__has_symbols(map->dso);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e20749f2c55d..dc93787c74f01 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -159,10 +159,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
 
 bool __map__is_kernel(const struct map *map);
 bool __map__is_extra_kernel_map(const struct map *map);
+bool __map__is_bpf_prog(const struct map *map);
 
 static inline bool __map__is_kmodule(const struct map *map)
 {
-	return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+	return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
+	       !__map__is_bpf_prog(map);
 }
 
 bool map__has_symbols(const struct map *map);
-- 
GitLab


From adc6257c4a6f23ff97dca8314fcd33828e2d8db5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 16 Apr 2019 18:01:23 +0200
Subject: [PATCH 1281/1861] perf evlist: Fix side band thread draining

Current perf_evlist__poll_thread() code could finish without draining
the data. Adding the logic that makes sure we won't finish before the
drain.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Fixes: 657ee5531903 ("perf evlist: Introduce side band thread")
Link: http://lkml.kernel.org/r/20190416160127.30203-9-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6689378ee577c..51ead577533fa 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1868,12 +1868,12 @@ static void *perf_evlist__poll_thread(void *arg)
 {
 	struct perf_evlist *evlist = arg;
 	bool draining = false;
-	int i;
+	int i, done = 0;
+
+	while (!done) {
+		bool got_data = false;
 
-	while (draining || !(evlist->thread.done)) {
-		if (draining)
-			draining = false;
-		else if (evlist->thread.done)
+		if (evlist->thread.done)
 			draining = true;
 
 		if (!draining)
@@ -1894,9 +1894,13 @@ static void *perf_evlist__poll_thread(void *arg)
 					pr_warning("cannot locate proper evsel for the side band event\n");
 
 				perf_mmap__consume(map);
+				got_data = true;
 			}
 			perf_mmap__read_done(map);
 		}
+
+		if (draining && !got_data)
+			break;
 	}
 	return NULL;
 }
-- 
GitLab


From b9abbdfa88024d52c8084d8f46ea4f161606c692 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 16 Apr 2019 18:01:24 +0200
Subject: [PATCH 1282/1861] perf tools: Fix map reference counting

By calling maps__insert() we assume to get 2 references on the map,
which we relese within maps__remove call.

However if there's already same map name, we currently don't bump the
reference and can crash, like:

  Program received signal SIGABRT, Aborted.
  0x00007ffff75e60f5 in raise () from /lib64/libc.so.6

  (gdb) bt
  #0  0x00007ffff75e60f5 in raise () from /lib64/libc.so.6
  #1  0x00007ffff75d0895 in abort () from /lib64/libc.so.6
  #2  0x00007ffff75d0769 in __assert_fail_base.cold () from /lib64/libc.so.6
  #3  0x00007ffff75de596 in __assert_fail () from /lib64/libc.so.6
  #4  0x00000000004fc006 in refcount_sub_and_test (i=1, r=0x1224e88) at tools/include/linux/refcount.h:131
  #5  refcount_dec_and_test (r=0x1224e88) at tools/include/linux/refcount.h:148
  #6  map__put (map=0x1224df0) at util/map.c:299
  #7  0x00000000004fdb95 in __maps__remove (map=0x1224df0, maps=0xb17d80) at util/map.c:953
  #8  maps__remove (maps=0xb17d80, map=0x1224df0) at util/map.c:959
  #9  0x00000000004f7d8a in map_groups__remove (map=<optimized out>, mg=<optimized out>) at util/map_groups.h:65
  #10 machine__process_ksymbol_unregister (sample=<optimized out>, event=0x7ffff7279670, machine=<optimized out>) at util/machine.c:728
  #11 machine__process_ksymbol (machine=<optimized out>, event=0x7ffff7279670, sample=<optimized out>) at util/machine.c:741
  #12 0x00000000004fffbb in perf_session__deliver_event (session=0xb11390, event=0x7ffff7279670, tool=0x7fffffffc7b0, file_offset=13936) at util/session.c:1362
  #13 0x00000000005039bb in do_flush (show_progress=false, oe=0xb17e80) at util/ordered-events.c:243
  #14 __ordered_events__flush (oe=0xb17e80, how=OE_FLUSH__ROUND, timestamp=<optimized out>) at util/ordered-events.c:322
  #15 0x00000000005005e4 in perf_session__process_user_event (session=session@entry=0xb11390, event=event@entry=0x7ffff72a4af8,
  ...

Add the map to the list and getting the reference event if we find the
map with same name.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Saint-Etienne <eric.saint.etienne@oracle.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section")
Link: http://lkml.kernel.org/r/20190416160127.30203-10-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/map.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 28d484ef74ae8..ee71efb9db62e 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -926,10 +926,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map)
 		rc = strcmp(m->dso->short_name, map->dso->short_name);
 		if (rc < 0)
 			p = &(*p)->rb_left;
-		else if (rc  > 0)
-			p = &(*p)->rb_right;
 		else
-			return;
+			p = &(*p)->rb_right;
 	}
 	rb_link_node(&map->rb_node_name, parent, p);
 	rb_insert_color(&map->rb_node_name, &maps->names);
-- 
GitLab


From 2db7b1e0bd49d2b0e7d16949e167b1cfaf5c07cf Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 17 Apr 2019 16:55:39 +0200
Subject: [PATCH 1283/1861] perf bpf: Return NULL when RB tree lookup fails in
 perf_env__find_btf()

We don't return NULL when we don't find the bpf_prog_info_node, fix
that.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reported-by: Song Liu <songliubraving@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 3792cb2ff43b ("perf bpf: Save BTF in a rbtree in perf_env")
Link: http://lkml.kernel.org/r/20190417145539.11669-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/env.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 34a363f2e71bc..9494f9dc61eca 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -111,10 +111,12 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
 		else if (btf_id > node->id)
 			n = n->rb_right;
 		else
-			break;
+			goto out;
 	}
+	node = NULL;
 
 	up_read(&env->bpf_progs.lock);
+out:
 	return node;
 }
 
-- 
GitLab


From 74f464e97044da33b25aaed00213914b0edf1f2e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 17 Apr 2019 08:57:48 -0600
Subject: [PATCH 1284/1861] io_uring: fix CQ overflow condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a leftover from when the rings initially were not free flowing,
and hence a test for tail + 1 == head would indicate full. Since we now
let them wrap instead of mask them with the size, we need to check if
they drift more than the ring size from each other.

This fixes a case where we'd overwrite CQ ring entries, if the user
failed to reap completions. Both cases would ultimately result in lost
completions as the application violated the depth it asked for. The only
difference is that before this fix we'd return invalid entries for the
overflowed completions, instead of properly flagging it in the
cq_ring->overflow variable.

Reported-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b35300e4c9a79..f65f85d892174 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -338,7 +338,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 	tail = ctx->cached_cq_tail;
 	/* See comment at the top of the file */
 	smp_rmb();
-	if (tail + 1 == READ_ONCE(ring->r.head))
+	if (tail - READ_ONCE(ring->r.head) == ring->ring_entries)
 		return NULL;
 
 	ctx->cached_cq_tail++;
-- 
GitLab


From 98af8452945c55652de68536afdde3b520fec429 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 12 Apr 2019 15:39:28 -0500
Subject: [PATCH 1285/1861] cpu/speculation: Add 'mitigations=' cmdline option

Keeping track of the number of mitigations for all the CPU speculation
bugs has become overwhelming for many users.  It's getting more and more
complicated to decide which mitigations are needed for a given
architecture.  Complicating matters is the fact that each arch tends to
have its own custom way to mitigate the same vulnerability.

Most users fall into a few basic categories:

a) they want all mitigations off;

b) they want all reasonable mitigations on, with SMT enabled even if
   it's vulnerable; or

c) they want all reasonable mitigations on, with SMT disabled if
   vulnerable.

Define a set of curated, arch-independent options, each of which is an
aggregation of existing options:

- mitigations=off: Disable all mitigations.

- mitigations=auto: [default] Enable all the default mitigations, but
  leave SMT enabled, even if it's vulnerable.

- mitigations=auto,nosmt: Enable all the default mitigations, disabling
  SMT if needed by a mitigation.

Currently, these options are placeholders which don't actually do
anything.  They will be fleshed out in upcoming patches.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86)
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-arch@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Phil Auld <pauld@redhat.com>
Link: https://lkml.kernel.org/r/b07a8ef9b7c5055c3a4637c87d07c296d5016fe0.1555085500.git.jpoimboe@redhat.com
---
 .../admin-guide/kernel-parameters.txt         | 24 +++++++++++++++++++
 include/linux/cpu.h                           | 24 +++++++++++++++++++
 kernel/cpu.c                                  | 15 ++++++++++++
 3 files changed, 63 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 858b6c0b9a15a..720ffa9c4e04b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2513,6 +2513,30 @@
 			in the "bleeding edge" mini2440 support kernel at
 			http://repo.or.cz/w/linux-2.6/mini2440.git
 
+	mitigations=
+			Control optional mitigations for CPU vulnerabilities.
+			This is a set of curated, arch-independent options, each
+			of which is an aggregation of existing arch-specific
+			options.
+
+			off
+				Disable all optional CPU mitigations.  This
+				improves system performance, but it may also
+				expose users to several CPU vulnerabilities.
+
+			auto (default)
+				Mitigate all CPU vulnerabilities, but leave SMT
+				enabled, even if it's vulnerable.  This is for
+				users who don't want to be surprised by SMT
+				getting disabled across kernel upgrades, or who
+				have other ways of avoiding SMT-based attacks.
+				This is the default behavior.
+
+			auto,nosmt
+				Mitigate all CPU vulnerabilities, disabling SMT
+				if needed.  This is for users who always want to
+				be fully mitigated, even if it means losing SMT.
+
 	mminit_loglevel=
 			[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
 			parameter allows control of the logging verbosity for
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 5041357d0297a..2d9c6f4b78f58 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -187,4 +187,28 @@ static inline void cpu_smt_disable(bool force) { }
 static inline void cpu_smt_check_topology(void) { }
 #endif
 
+/*
+ * These are used for a global "mitigations=" cmdline option for toggling
+ * optional CPU mitigations.
+ */
+enum cpu_mitigations {
+	CPU_MITIGATIONS_OFF,
+	CPU_MITIGATIONS_AUTO,
+	CPU_MITIGATIONS_AUTO_NOSMT,
+};
+
+extern enum cpu_mitigations cpu_mitigations;
+
+/* mitigations=off */
+static inline bool cpu_mitigations_off(void)
+{
+	return cpu_mitigations == CPU_MITIGATIONS_OFF;
+}
+
+/* mitigations=auto,nosmt */
+static inline bool cpu_mitigations_auto_nosmt(void)
+{
+	return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+}
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d1c6d152da890..e70a90634b41c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2279,3 +2279,18 @@ void __init boot_cpu_hotplug_init(void)
 #endif
 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 }
+
+enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
+
+static int __init mitigations_parse_cmdline(char *arg)
+{
+	if (!strcmp(arg, "off"))
+		cpu_mitigations = CPU_MITIGATIONS_OFF;
+	else if (!strcmp(arg, "auto"))
+		cpu_mitigations = CPU_MITIGATIONS_AUTO;
+	else if (!strcmp(arg, "auto,nosmt"))
+		cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+
+	return 0;
+}
+early_param("mitigations", mitigations_parse_cmdline);
-- 
GitLab


From d68be4c4d31295ff6ae34a8ddfaa4c1a8ff42812 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 12 Apr 2019 15:39:29 -0500
Subject: [PATCH 1286/1861] x86/speculation: Support 'mitigations=' cmdline
 option

Configure x86 runtime CPU speculation bug mitigations in accordance with
the 'mitigations=' cmdline option.  This affects Meltdown, Spectre v2,
Speculative Store Bypass, and L1TF.

The default behavior is unchanged.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86)
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-arch@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Phil Auld <pauld@redhat.com>
Link: https://lkml.kernel.org/r/6616d0ae169308516cfdf5216bedd169f8a8291b.1555085500.git.jpoimboe@redhat.com
---
 Documentation/admin-guide/kernel-parameters.txt | 16 +++++++++++-----
 arch/x86/kernel/cpu/bugs.c                      | 11 +++++++++--
 arch/x86/mm/pti.c                               |  4 +++-
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 720ffa9c4e04b..779ddeb2929cc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2514,15 +2514,20 @@
 			http://repo.or.cz/w/linux-2.6/mini2440.git
 
 	mitigations=
-			Control optional mitigations for CPU vulnerabilities.
-			This is a set of curated, arch-independent options, each
-			of which is an aggregation of existing arch-specific
-			options.
+			[X86] Control optional mitigations for CPU
+			vulnerabilities.  This is a set of curated,
+			arch-independent options, each of which is an
+			aggregation of existing arch-specific options.
 
 			off
 				Disable all optional CPU mitigations.  This
 				improves system performance, but it may also
 				expose users to several CPU vulnerabilities.
+				Equivalent to: nopti [X86]
+					       nospectre_v2 [X86]
+					       spectre_v2_user=off [X86]
+					       spec_store_bypass_disable=off [X86]
+					       l1tf=off [X86]
 
 			auto (default)
 				Mitigate all CPU vulnerabilities, but leave SMT
@@ -2530,12 +2535,13 @@
 				users who don't want to be surprised by SMT
 				getting disabled across kernel upgrades, or who
 				have other ways of avoiding SMT-based attacks.
-				This is the default behavior.
+				Equivalent to: (default behavior)
 
 			auto,nosmt
 				Mitigate all CPU vulnerabilities, disabling SMT
 				if needed.  This is for users who always want to
 				be fully mitigated, even if it means losing SMT.
+				Equivalent to: l1tf=flush,nosmt [X86]
 
 	mminit_loglevel=
 			[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 01874d54f4fd9..435c078c29483 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -440,7 +440,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	char arg[20];
 	int ret, i;
 
-	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
+	    cpu_mitigations_off())
 		return SPECTRE_V2_CMD_NONE;
 
 	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
@@ -672,7 +673,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
 	char arg[20];
 	int ret, i;
 
-	if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+	if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
+	    cpu_mitigations_off()) {
 		return SPEC_STORE_BYPASS_CMD_NONE;
 	} else {
 		ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
@@ -996,6 +998,11 @@ static void __init l1tf_select_mitigation(void)
 	if (!boot_cpu_has_bug(X86_BUG_L1TF))
 		return;
 
+	if (cpu_mitigations_off())
+		l1tf_mitigation = L1TF_MITIGATION_OFF;
+	else if (cpu_mitigations_auto_nosmt())
+		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
+
 	override_cache_bits(&boot_cpu_data);
 
 	switch (l1tf_mitigation) {
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 4fee5c3003ed7..5890f09bfc195 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -35,6 +35,7 @@
 #include <linux/spinlock.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
+#include <linux/cpu.h>
 
 #include <asm/cpufeature.h>
 #include <asm/hypervisor.h>
@@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void)
 		}
 	}
 
-	if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+	if (cmdline_find_option_bool(boot_command_line, "nopti") ||
+	    cpu_mitigations_off()) {
 		pti_mode = PTI_FORCE_OFF;
 		pti_print_if_insecure("disabled on command line.");
 		return;
-- 
GitLab


From 782e69efb3dfed6e8360bc612e8c7827a901a8f9 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 12 Apr 2019 15:39:30 -0500
Subject: [PATCH 1287/1861] powerpc/speculation: Support 'mitigations=' cmdline
 option

Configure powerpc CPU runtime speculation bug mitigations in accordance
with the 'mitigations=' cmdline option.  This affects Meltdown, Spectre
v1, Spectre v2, and Speculative Store Bypass.

The default behavior is unchanged.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86)
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-arch@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Phil Auld <pauld@redhat.com>
Link: https://lkml.kernel.org/r/245a606e1a42a558a310220312d9b6adb9159df6.1555085500.git.jpoimboe@redhat.com
---
 Documentation/admin-guide/kernel-parameters.txt | 9 +++++----
 arch/powerpc/kernel/security.c                  | 6 +++---
 arch/powerpc/kernel/setup_64.c                  | 2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 779ddeb2929cc..ac7150a25a7a1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2514,7 +2514,7 @@
 			http://repo.or.cz/w/linux-2.6/mini2440.git
 
 	mitigations=
-			[X86] Control optional mitigations for CPU
+			[X86,PPC] Control optional mitigations for CPU
 			vulnerabilities.  This is a set of curated,
 			arch-independent options, each of which is an
 			aggregation of existing arch-specific options.
@@ -2523,10 +2523,11 @@
 				Disable all optional CPU mitigations.  This
 				improves system performance, but it may also
 				expose users to several CPU vulnerabilities.
-				Equivalent to: nopti [X86]
-					       nospectre_v2 [X86]
+				Equivalent to: nopti [X86,PPC]
+					       nospectre_v1 [PPC]
+					       nospectre_v2 [X86,PPC]
 					       spectre_v2_user=off [X86]
-					       spec_store_bypass_disable=off [X86]
+					       spec_store_bypass_disable=off [X86,PPC]
 					       l1tf=off [X86]
 
 			auto (default)
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 9b8631533e02a..cdf3e73000e94 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -57,7 +57,7 @@ void setup_barrier_nospec(void)
 	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 		 security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
 
-	if (!no_nospec)
+	if (!no_nospec && !cpu_mitigations_off())
 		enable_barrier_nospec(enable);
 }
 
@@ -116,7 +116,7 @@ static int __init handle_nospectre_v2(char *p)
 early_param("nospectre_v2", handle_nospectre_v2);
 void setup_spectre_v2(void)
 {
-	if (no_spectrev2)
+	if (no_spectrev2 || cpu_mitigations_off())
 		do_btb_flush_fixups();
 	else
 		btb_flush_enabled = true;
@@ -307,7 +307,7 @@ void setup_stf_barrier(void)
 
 	stf_enabled_flush_types = type;
 
-	if (!no_stf_barrier)
+	if (!no_stf_barrier && !cpu_mitigations_off())
 		stf_barrier_enable(enable);
 }
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 236c1151a3a77..c7ec27ba8926a 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -958,7 +958,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 
 	enabled_flush_types = types;
 
-	if (!no_rfi_flush)
+	if (!no_rfi_flush && !cpu_mitigations_off())
 		rfi_flush_enable(enable);
 }
 
-- 
GitLab


From 0336e04a6520bdaefdb0769d2a70084fa52e81ed Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 12 Apr 2019 15:39:31 -0500
Subject: [PATCH 1288/1861] s390/speculation: Support 'mitigations=' cmdline
 option

Configure s390 runtime CPU speculation bug mitigations in accordance
with the 'mitigations=' cmdline option.  This affects Spectre v1 and
Spectre v2.

The default behavior is unchanged.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86)
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-arch@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Phil Auld <pauld@redhat.com>
Link: https://lkml.kernel.org/r/e4a161805458a5ec88812aac0307ae3908a030fc.1555085500.git.jpoimboe@redhat.com
---
 Documentation/admin-guide/kernel-parameters.txt | 5 +++--
 arch/s390/kernel/nospec-branch.c                | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ac7150a25a7a1..1ae93872b79fd 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2514,7 +2514,7 @@
 			http://repo.or.cz/w/linux-2.6/mini2440.git
 
 	mitigations=
-			[X86,PPC] Control optional mitigations for CPU
+			[X86,PPC,S390] Control optional mitigations for CPU
 			vulnerabilities.  This is a set of curated,
 			arch-independent options, each of which is an
 			aggregation of existing arch-specific options.
@@ -2525,7 +2525,8 @@
 				expose users to several CPU vulnerabilities.
 				Equivalent to: nopti [X86,PPC]
 					       nospectre_v1 [PPC]
-					       nospectre_v2 [X86,PPC]
+					       nobp=0 [S390]
+					       nospectre_v2 [X86,PPC,S390]
 					       spectre_v2_user=off [X86]
 					       spec_store_bypass_disable=off [X86,PPC]
 					       l1tf=off [X86]
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index bdddaae965598..649135cbedd5c 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/cpu.h>
 #include <asm/nospec-branch.h>
 
 static int __init nobp_setup_early(char *str)
@@ -58,7 +59,7 @@ early_param("nospectre_v2", nospectre_v2_setup_early);
 
 void __init nospec_auto_detect(void)
 {
-	if (test_facility(156)) {
+	if (test_facility(156) || cpu_mitigations_off()) {
 		/*
 		 * The machine supports etokens.
 		 * Disable expolines and disable nobp.
-- 
GitLab


From a7b1a4839ff979b4dd4fb6c1ccd31af11de9ca87 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 15 Apr 2019 11:54:13 -0400
Subject: [PATCH 1289/1861] SUNRPC: Ignore queue transmission errors on
 successful transmission

If a request transmission fails due to write space or slot unavailability
errors, but the queued task then gets transmitted before it has time to
process the error in call_transmit_status() or call_bc_transmit_status(),
we need to suppress the transmission error code to prevent it from leaking
out of the RPC layer.

Reported-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/clnt.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1d0395ef62c95..8ff11dc98d7f9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2081,8 +2081,8 @@ call_transmit_status(struct rpc_task *task)
 	 * test first.
 	 */
 	if (rpc_task_transmitted(task)) {
-		if (task->tk_status == 0)
-			xprt_request_wait_receive(task);
+		task->tk_status = 0;
+		xprt_request_wait_receive(task);
 		return;
 	}
 
@@ -2167,6 +2167,9 @@ call_bc_transmit_status(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 
+	if (rpc_task_transmitted(task))
+		task->tk_status = 0;
+
 	dprint_status(task);
 
 	switch (task->tk_status) {
-- 
GitLab


From 19fad20d15a6494f47f85d869f00b11343ee5c78 Mon Sep 17 00:00:00 2001
From: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
Date: Tue, 16 Apr 2019 09:47:24 +0800
Subject: [PATCH 1290/1861] ipv4: set the tcp_min_rtt_wlen range from 0 to one
 day

There is a UBSAN report as below:
UBSAN: Undefined behaviour in net/ipv4/tcp_input.c:2877:56
signed integer overflow:
2147483647 * 1000 cannot be represented in type 'int'
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.1.0-rc4-00058-g582549e #1
Call Trace:
 <IRQ>
 dump_stack+0x8c/0xba
 ubsan_epilogue+0x11/0x60
 handle_overflow+0x12d/0x170
 ? ttwu_do_wakeup+0x21/0x320
 __ubsan_handle_mul_overflow+0x12/0x20
 tcp_ack_update_rtt+0x76c/0x780
 tcp_clean_rtx_queue+0x499/0x14d0
 tcp_ack+0x69e/0x1240
 ? __wake_up_sync_key+0x2c/0x50
 ? update_group_capacity+0x50/0x680
 tcp_rcv_established+0x4e2/0xe10
 tcp_v4_do_rcv+0x22b/0x420
 tcp_v4_rcv+0xfe8/0x1190
 ip_protocol_deliver_rcu+0x36/0x180
 ip_local_deliver+0x15b/0x1a0
 ip_rcv+0xac/0xd0
 __netif_receive_skb_one_core+0x7f/0xb0
 __netif_receive_skb+0x33/0xc0
 netif_receive_skb_internal+0x84/0x1c0
 napi_gro_receive+0x2a0/0x300
 receive_buf+0x3d4/0x2350
 ? detach_buf_split+0x159/0x390
 virtnet_poll+0x198/0x840
 ? reweight_entity+0x243/0x4b0
 net_rx_action+0x25c/0x770
 __do_softirq+0x19b/0x66d
 irq_exit+0x1eb/0x230
 do_IRQ+0x7a/0x150
 common_interrupt+0xf/0xf
 </IRQ>

It can be reproduced by:
  echo 2147483647 > /proc/sys/net/ipv4/tcp_min_rtt_wlen

Fixes: f672258391b42 ("tcp: track min RTT using windowed min-filter")
Signed-off-by: ZhangXiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 1 +
 net/ipv4/sysctl_net_ipv4.c             | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index acdfb5d2bcaa4..e2142fe40cdad 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -422,6 +422,7 @@ tcp_min_rtt_wlen - INTEGER
 	minimum RTT when it is moved to a longer path (e.g., due to traffic
 	engineering). A longer window makes the filter more resistant to RTT
 	inflations such as transient congestion. The unit is seconds.
+	Possible values: 0 - 86400 (1 day)
 	Default: 300
 
 tcp_moderate_rcvbuf - BOOLEAN
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ba0fc4b184656..eeb4041fa5f90 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static int comp_sack_nr_max = 255;
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[] = {
 		.data		= &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one_day_secs
 	},
 	{
 		.procname	= "tcp_autocorking",
-- 
GitLab


From 7390619ab9ea9fd0ba9f4c3e4749ee20262cba7d Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Wed, 17 Apr 2019 19:08:48 +0800
Subject: [PATCH 1291/1861] x86/resctrl: Move per RDT domain initialization to
 a separate function

Carve out per rdt_domain initialization code from rdtgroup_init_alloc()
into a separate function.

No functional change, make the code more readable and save us at least
two indentation levels.

Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: pei.p.jia@intel.com
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1555499329-1170-2-git-send-email-xiaochen.shen@intel.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 131 ++++++++++++++-----------
 1 file changed, 72 insertions(+), 59 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 85212a32b54df..36ace51ee705f 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2516,28 +2516,86 @@ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
 	bitmap_clear(val, zero_bit, cbm_len - zero_bit);
 }
 
+/*
+ * Initialize cache resources per RDT domain
+ *
+ * Set the RDT domain up to start off with all usable allocations. That is,
+ * all shareable and unused bits. All-zero CBM is invalid.
+ */
+static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
+				 u32 closid)
+{
+	struct rdt_resource *r_cdp = NULL;
+	struct rdt_domain *d_cdp = NULL;
+	u32 used_b = 0, unused_b = 0;
+	unsigned long tmp_cbm;
+	enum rdtgrp_mode mode;
+	u32 peer_ctl, *ctrl;
+	int i;
+
+	rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
+	d->have_new_ctrl = false;
+	d->new_ctrl = r->cache.shareable_bits;
+	used_b = r->cache.shareable_bits;
+	ctrl = d->ctrl_val;
+	for (i = 0; i < closids_supported(); i++, ctrl++) {
+		if (closid_allocated(i) && i != closid) {
+			mode = rdtgroup_mode_by_closid(i);
+			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
+				break;
+			/*
+			 * If CDP is active include peer domain's
+			 * usage to ensure there is no overlap
+			 * with an exclusive group.
+			 */
+			if (d_cdp)
+				peer_ctl = d_cdp->ctrl_val[i];
+			else
+				peer_ctl = 0;
+			used_b |= *ctrl | peer_ctl;
+			if (mode == RDT_MODE_SHAREABLE)
+				d->new_ctrl |= *ctrl | peer_ctl;
+		}
+	}
+	if (d->plr && d->plr->cbm > 0)
+		used_b |= d->plr->cbm;
+	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
+	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
+	d->new_ctrl |= unused_b;
+	/*
+	 * Force the initial CBM to be valid, user can
+	 * modify the CBM based on system availability.
+	 */
+	cbm_ensure_valid(&d->new_ctrl, r);
+	/*
+	 * Assign the u32 CBM to an unsigned long to ensure that
+	 * bitmap_weight() does not access out-of-bound memory.
+	 */
+	tmp_cbm = d->new_ctrl;
+	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
+		rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
+		return -ENOSPC;
+	}
+	d->have_new_ctrl = true;
+
+	return 0;
+}
+
 /**
  * rdtgroup_init_alloc - Initialize the new RDT group's allocations
  *
  * A new RDT group is being created on an allocation capable (CAT)
  * supporting system. Set this group up to start off with all usable
- * allocations. That is, all shareable and unused bits.
+ * allocations.
  *
- * All-zero CBM is invalid. If there are no more shareable bits available
- * on any domain then the entire allocation will fail.
+ * If there are no more shareable bits available on any domain then
+ * the entire allocation will fail.
  */
 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 {
-	struct rdt_resource *r_cdp = NULL;
-	struct rdt_domain *d_cdp = NULL;
-	u32 used_b = 0, unused_b = 0;
-	u32 closid = rdtgrp->closid;
 	struct rdt_resource *r;
-	unsigned long tmp_cbm;
-	enum rdtgrp_mode mode;
 	struct rdt_domain *d;
-	u32 peer_ctl, *ctrl;
-	int i, ret;
+	int ret;
 
 	for_each_alloc_enabled_rdt_resource(r) {
 		/*
@@ -2547,54 +2605,9 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 		if (r->rid == RDT_RESOURCE_MBA)
 			continue;
 		list_for_each_entry(d, &r->domains, list) {
-			rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
-			d->have_new_ctrl = false;
-			d->new_ctrl = r->cache.shareable_bits;
-			used_b = r->cache.shareable_bits;
-			ctrl = d->ctrl_val;
-			for (i = 0; i < closids_supported(); i++, ctrl++) {
-				if (closid_allocated(i) && i != closid) {
-					mode = rdtgroup_mode_by_closid(i);
-					if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
-						break;
-					/*
-					 * If CDP is active include peer
-					 * domain's usage to ensure there
-					 * is no overlap with an exclusive
-					 * group.
-					 */
-					if (d_cdp)
-						peer_ctl = d_cdp->ctrl_val[i];
-					else
-						peer_ctl = 0;
-					used_b |= *ctrl | peer_ctl;
-					if (mode == RDT_MODE_SHAREABLE)
-						d->new_ctrl |= *ctrl | peer_ctl;
-				}
-			}
-			if (d->plr && d->plr->cbm > 0)
-				used_b |= d->plr->cbm;
-			unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
-			unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
-			d->new_ctrl |= unused_b;
-			/*
-			 * Force the initial CBM to be valid, user can
-			 * modify the CBM based on system availability.
-			 */
-			cbm_ensure_valid(&d->new_ctrl, r);
-			/*
-			 * Assign the u32 CBM to an unsigned long to ensure
-			 * that bitmap_weight() does not access out-of-bound
-			 * memory.
-			 */
-			tmp_cbm = d->new_ctrl;
-			if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
-			    r->cache.min_cbm_bits) {
-				rdt_last_cmd_printf("No space on %s:%d\n",
-						    r->name, d->id);
-				return -ENOSPC;
-			}
-			d->have_new_ctrl = true;
+			ret = __init_one_rdt_domain(d, r, rdtgrp->closid);
+			if (ret < 0)
+				return ret;
 		}
 	}
 
-- 
GitLab


From 47820e73f5b3a1fdb8ebd1219191edc96e0c85c1 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Wed, 17 Apr 2019 19:08:49 +0800
Subject: [PATCH 1292/1861] x86/resctrl: Initialize a new resource group with
 default MBA values

Currently, when a new resource group is created, the allocation values
of the MBA resource are not initialized and remain meaningless data.

For example:

  mkdir /sys/fs/resctrl/p1
  cat /sys/fs/resctrl/p1/schemata
  MB:0=100;1=100

  echo "MB:0=10;1=20" > /sys/fs/resctrl/p1/schemata
  cat /sys/fs/resctrl/p1/schemata
  MB:0= 10;1= 20

  rmdir /sys/fs/resctrl/p1
  mkdir /sys/fs/resctrl/p2
  cat /sys/fs/resctrl/p2/schemata
  MB:0= 10;1= 20

Therefore, when the new group is created, it is reasonable to initialize
MBA resource with default values.

Initialize the MBA resource and cache resources in separate functions.

 [ bp: Add newlines between code blocks for better readability. ]

Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: pei.p.jia@intel.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1555499329-1170-3-git-send-email-xiaochen.shen@intel.com
---
 arch/x86/kernel/cpu/resctrl/ctrlmondata.c |  4 +-
 arch/x86/kernel/cpu/resctrl/rdtgroup.c    | 52 ++++++++++++++---------
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 2dbd990a2eb78..89320c0396b1f 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -342,10 +342,10 @@ int update_domains(struct rdt_resource *r, int closid)
 	if (cpumask_empty(cpu_mask) || mba_sc)
 		goto done;
 	cpu = get_cpu();
-	/* Update CBM on this cpu if it's in cpu_mask. */
+	/* Update resource control msr on this CPU if it's in cpu_mask. */
 	if (cpumask_test_cpu(cpu, cpu_mask))
 		rdt_ctrl_update(&msr_param);
-	/* Update CBM on other cpus. */
+	/* Update resource control msr on other CPUs. */
 	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
 	put_cpu();
 
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 36ace51ee705f..333c177a2471e 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2581,8 +2581,8 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
 	return 0;
 }
 
-/**
- * rdtgroup_init_alloc - Initialize the new RDT group's allocations
+/*
+ * Initialize cache resources with default values.
  *
  * A new RDT group is being created on an allocation capable (CAT)
  * supporting system. Set this group up to start off with all usable
@@ -2591,38 +2591,52 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
  * If there are no more shareable bits available on any domain then
  * the entire allocation will fail.
  */
+static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
+{
+	struct rdt_domain *d;
+	int ret;
+
+	list_for_each_entry(d, &r->domains, list) {
+		ret = __init_one_rdt_domain(d, r, closid);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Initialize MBA resource with default values. */
+static void rdtgroup_init_mba(struct rdt_resource *r)
+{
+	struct rdt_domain *d;
+
+	list_for_each_entry(d, &r->domains, list) {
+		d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
+		d->have_new_ctrl = true;
+	}
+}
+
+/* Initialize the RDT group's allocations. */
 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 {
 	struct rdt_resource *r;
-	struct rdt_domain *d;
 	int ret;
 
 	for_each_alloc_enabled_rdt_resource(r) {
-		/*
-		 * Only initialize default allocations for CBM cache
-		 * resources
-		 */
-		if (r->rid == RDT_RESOURCE_MBA)
-			continue;
-		list_for_each_entry(d, &r->domains, list) {
-			ret = __init_one_rdt_domain(d, r, rdtgrp->closid);
+		if (r->rid == RDT_RESOURCE_MBA) {
+			rdtgroup_init_mba(r);
+		} else {
+			ret = rdtgroup_init_cat(r, rdtgrp->closid);
 			if (ret < 0)
 				return ret;
 		}
-	}
 
-	for_each_alloc_enabled_rdt_resource(r) {
-		/*
-		 * Only initialize default allocations for CBM cache
-		 * resources
-		 */
-		if (r->rid == RDT_RESOURCE_MBA)
-			continue;
 		ret = update_domains(r, rdtgrp->closid);
 		if (ret < 0) {
 			rdt_last_cmd_puts("Failed to initialize allocations\n");
 			return ret;
 		}
+
 	}
 
 	rdtgrp->mode = RDT_MODE_SHAREABLE;
-- 
GitLab


From 7249c8ea227a582c14f63e9e8853eb7369122f10 Mon Sep 17 00:00:00 2001
From: Guy Levi <guyle@mellanox.com>
Date: Wed, 10 Apr 2019 10:59:45 +0300
Subject: [PATCH 1293/1861] IB/mlx5: Fix scatter to CQE in DCT QP creation

When scatter to CQE is enabled on a DCT QP it corrupts the mailbox command
since it tried to treat it as as QP create mailbox command instead of a
DCT create command.

The corrupted mailbox command causes userspace to malfunction as the
device doesn't create the QP as expected.

A new mlx5 capability is exposed to user-space which ensures that it will
not enable the feature on DCT without this fix in the kernel.

Fixes: 5d6ff1babe78 ("IB/mlx5: Support scatter to CQE for DC transport type")
Signed-off-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c |  2 ++
 drivers/infiniband/hw/mlx5/qp.c   | 11 +++++++----
 include/uapi/rdma/mlx5-abi.h      |  1 +
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 531ff20b32ade..241d9ead79eb1 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1119,6 +1119,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		if (MLX5_CAP_GEN(mdev, qp_packet_based))
 			resp.flags |=
 				MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
+
+		resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
 	}
 
 	if (field_avail(typeof(resp), sw_parsing_caps,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7cd006da1daef..8870c350fda0b 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1818,13 +1818,16 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
 
 	rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
 
-	if (rcqe_sz == 128) {
-		MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+	if (init_attr->qp_type == MLX5_IB_QPT_DCT) {
+		if (rcqe_sz == 128)
+			MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+
 		return;
 	}
 
-	if (init_attr->qp_type != MLX5_IB_QPT_DCT)
-		MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
+	MLX5_SET(qpc, qpc, cs_res,
+		 rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
+				  MLX5_RES_SCAT_DATA32_CQE);
 }
 
 static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 87b3198f4b5d7..f4d4010b7e3e5 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -238,6 +238,7 @@ enum mlx5_ib_query_dev_resp_flags {
 	MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
 	MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD  = 1 << 1,
 	MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
+	MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3,
 };
 
 enum mlx5_ib_tunnel_offloads {
-- 
GitLab


From f87db4dbd52f2f8a170a2b51cb0926221ca7c9e2 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 17 Apr 2019 09:49:39 +0800
Subject: [PATCH 1294/1861] net: stmmac: Use bfsize1 in ndesc_init_rx_desc

gcc warn this:

drivers/net/ethernet/stmicro/stmmac/norm_desc.c: In function ndesc_init_rx_desc:
drivers/net/ethernet/stmicro/stmmac/norm_desc.c:138:6: warning: variable 'bfsize1' set but not used [-Wunused-but-set-variable]

Like enh_desc_init_rx_desc, we should use bfsize1
in ndesc_init_rx_desc to calculate 'p->des1'

Fixes: 583e63614149 ("net: stmmac: use correct DMA buffer size in the RX descriptor")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index b7dd4e3c760d8..6d690678c20e1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -140,7 +140,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
 	p->des0 |= cpu_to_le32(RDES0_OWN);
 
 	bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
-	p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK);
+	p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		ndesc_rx_set_on_chain(p, end);
-- 
GitLab


From d003d772e64df08af04ee63609d47169ee82ae0e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 17 Apr 2019 14:15:00 +0100
Subject: [PATCH 1295/1861] nfp: abm: fix spelling mistake "offseting" ->
 "offsetting"

There are a couple of spelling mistakes in NL_SET_ERR_MSG_MOD error
messages. Fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/abm/cls.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c
index 9852080cf4548..ff39130856652 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c
@@ -39,7 +39,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
 	}
 	if (knode->sel->off || knode->sel->offshift || knode->sel->offmask ||
 	    knode->sel->offoff || knode->fshift) {
-		NL_SET_ERR_MSG_MOD(extack, "variable offseting not supported");
+		NL_SET_ERR_MSG_MOD(extack, "variable offsetting not supported");
 		return false;
 	}
 	if (knode->sel->hoff || knode->sel->hmask) {
@@ -78,7 +78,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
 
 	k = &knode->sel->keys[0];
 	if (k->offmask) {
-		NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offseting not supported");
+		NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offsetting not supported");
 		return false;
 	}
 	if (k->off) {
-- 
GitLab


From 27b141fc234a3670d21bd742c35d7205d03cbb3a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 Apr 2019 18:29:13 +0200
Subject: [PATCH 1296/1861] s390: ctcm: fix ctcm_new_device error return code

clang points out that the return code from this function is
undefined for one of the error paths:

../drivers/s390/net/ctcm_main.c:1595:7: warning: variable 'result' is used uninitialized whenever 'if' condition is true
      [-Wsometimes-uninitialized]
                if (priv->channel[direction] == NULL) {
                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/s390/net/ctcm_main.c:1638:9: note: uninitialized use occurs here
        return result;
               ^~~~~~
../drivers/s390/net/ctcm_main.c:1595:3: note: remove the 'if' if its condition is always false
                if (priv->channel[direction] == NULL) {
                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../drivers/s390/net/ctcm_main.c:1539:12: note: initialize the variable 'result' to silence this warning
        int result;
                  ^

Make it return -ENODEV here, as in the related failure cases.
gcc has a known bug in underreporting some of these warnings
when it has already eliminated the assignment of the return code
based on some earlier optimization step.

Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/ctcm_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 7617d21cb2960..f63c5c871d3dd 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1595,6 +1595,7 @@ static int ctcm_new_device(struct ccwgroup_device *cgdev)
 		if (priv->channel[direction] == NULL) {
 			if (direction == CTCM_WRITE)
 				channel_free(priv->channel[CTCM_READ]);
+			result = -ENODEV;
 			goto out_dev;
 		}
 		priv->channel[direction]->netdev = dev;
-- 
GitLab


From 3c454f47e67bf5a65dc892cd50221a7de695870f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 3 Apr 2019 17:30:12 +0900
Subject: [PATCH 1297/1861] x86/build/vdso: Add FORCE to the build rule of %.so

$(call if_changed,...) must have FORCE as a prerequisite.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1554280212-10578-1-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vdso/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 5bfe2243a08f8..42fe42e82bafa 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -116,7 +116,7 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
 targets += vdsox32.lds $(vobjx32s-y)
 
 $(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
-- 
GitLab


From 5b2ad270529ff8abfd8a324ceece1ae1704875a0 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Mon, 8 Apr 2019 11:19:54 +0200
Subject: [PATCH 1298/1861] s390/qdio: limit direct access to first_to_check
 cursor

Refactor all the low-level helpers to take the first_to_check cursor as
parameter, rather than accessing it directly.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_main.c | 56 +++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 0ccd3b30af78e..045f2aad0b3cf 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -415,7 +415,8 @@ static inline void account_sbals(struct qdio_q *q, unsigned int count)
 	q->q_stats.nr_sbals[pos]++;
 }
 
-static void process_buffer_error(struct qdio_q *q, int count)
+static void process_buffer_error(struct qdio_q *q, unsigned int start,
+				 int count)
 {
 	unsigned char state = (q->is_input_q) ? SLSB_P_INPUT_NOT_INIT :
 					SLSB_P_OUTPUT_NOT_INIT;
@@ -424,29 +425,29 @@ static void process_buffer_error(struct qdio_q *q, int count)
 
 	/* special handling for no target buffer empty */
 	if (queue_type(q) == QDIO_IQDIO_QFMT && !q->is_input_q &&
-	    q->sbal[q->first_to_check]->element[15].sflags == 0x10) {
+	    q->sbal[start]->element[15].sflags == 0x10) {
 		qperf_inc(q, target_full);
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "OUTFULL FTC:%02x",
-			      q->first_to_check);
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "OUTFULL FTC:%02x", start);
 		goto set;
 	}
 
 	DBF_ERROR("%4x BUF ERROR", SCH_NO(q));
 	DBF_ERROR((q->is_input_q) ? "IN:%2d" : "OUT:%2d", q->nr);
-	DBF_ERROR("FTC:%3d C:%3d", q->first_to_check, count);
+	DBF_ERROR("FTC:%3d C:%3d", start, count);
 	DBF_ERROR("F14:%2x F15:%2x",
-		  q->sbal[q->first_to_check]->element[14].sflags,
-		  q->sbal[q->first_to_check]->element[15].sflags);
+		  q->sbal[start]->element[14].sflags,
+		  q->sbal[start]->element[15].sflags);
 
 set:
 	/*
 	 * Interrupts may be avoided as long as the error is present
 	 * so change the buffer state immediately to avoid starvation.
 	 */
-	set_buf_states(q, q->first_to_check, state, count);
+	set_buf_states(q, start, state, count);
 }
 
-static inline void inbound_primed(struct qdio_q *q, int count)
+static inline void inbound_primed(struct qdio_q *q, unsigned int start,
+				  int count)
 {
 	int new;
 
@@ -457,7 +458,7 @@ static inline void inbound_primed(struct qdio_q *q, int count)
 		if (!q->u.in.polling) {
 			q->u.in.polling = 1;
 			q->u.in.ack_count = count;
-			q->u.in.ack_start = q->first_to_check;
+			q->u.in.ack_start = start;
 			return;
 		}
 
@@ -465,7 +466,7 @@ static inline void inbound_primed(struct qdio_q *q, int count)
 		set_buf_states(q, q->u.in.ack_start, SLSB_P_INPUT_NOT_INIT,
 			       q->u.in.ack_count);
 		q->u.in.ack_count = count;
-		q->u.in.ack_start = q->first_to_check;
+		q->u.in.ack_start = start;
 		return;
 	}
 
@@ -473,7 +474,7 @@ static inline void inbound_primed(struct qdio_q *q, int count)
 	 * ACK the newest buffer. The ACK will be removed in qdio_stop_polling
 	 * or by the next inbound run.
 	 */
-	new = add_buf(q->first_to_check, count - 1);
+	new = add_buf(start, count - 1);
 	if (q->u.in.polling) {
 		/* reset the previous ACK but first set the new one */
 		set_buf_state(q, new, SLSB_P_INPUT_ACK);
@@ -488,11 +489,12 @@ static inline void inbound_primed(struct qdio_q *q, int count)
 	if (!count)
 		return;
 	/* need to change ALL buffers to get more interrupts */
-	set_buf_states(q, q->first_to_check, SLSB_P_INPUT_NOT_INIT, count);
+	set_buf_states(q, start, SLSB_P_INPUT_NOT_INIT, count);
 }
 
 static int get_inbound_buffer_frontier(struct qdio_q *q)
 {
+	unsigned int start = q->first_to_check;
 	unsigned char state = 0;
 	int count;
 
@@ -510,22 +512,22 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 	 * No siga sync here, as a PCI or we after a thin interrupt
 	 * already sync'ed the queues.
 	 */
-	count = get_buf_states(q, q->first_to_check, &state, count, 1, 0);
+	count = get_buf_states(q, start, &state, count, 1, 0);
 	if (!count)
 		return 0;
 
 	switch (state) {
 	case SLSB_P_INPUT_PRIMED:
-		inbound_primed(q, count);
-		q->first_to_check = add_buf(q->first_to_check, count);
+		inbound_primed(q, start, count);
+		q->first_to_check = add_buf(start, count);
 		if (atomic_sub_return(count, &q->nr_buf_used) == 0)
 			qperf_inc(q, inbound_queue_full);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals(q, count);
 		return count;
 	case SLSB_P_INPUT_ERROR:
-		process_buffer_error(q, count);
-		q->first_to_check = add_buf(q->first_to_check, count);
+		process_buffer_error(q, start, count);
+		q->first_to_check = add_buf(start, count);
 		if (atomic_sub_return(count, &q->nr_buf_used) == 0)
 			qperf_inc(q, inbound_queue_full);
 		if (q->irq_ptr->perf_stat_enabled)
@@ -537,7 +539,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 		if (q->irq_ptr->perf_stat_enabled)
 			q->q_stats.nr_sbal_nop++;
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in nop:%1d %#02x",
-			q->nr, q->first_to_check);
+			      q->nr, start);
 		return 0;
 	default:
 		WARN_ON_ONCE(1);
@@ -559,6 +561,7 @@ static int qdio_inbound_q_moved(struct qdio_q *q)
 
 static inline int qdio_inbound_q_done(struct qdio_q *q)
 {
+	unsigned int start = q->first_to_check;
 	unsigned char state = 0;
 
 	if (!atomic_read(&q->nr_buf_used))
@@ -566,7 +569,7 @@ static inline int qdio_inbound_q_done(struct qdio_q *q)
 
 	if (need_siga_sync(q))
 		qdio_siga_sync_q(q);
-	get_buf_state(q, q->first_to_check, &state, 0);
+	get_buf_state(q, start, &state, 0);
 
 	if (state == SLSB_P_INPUT_PRIMED || state == SLSB_P_INPUT_ERROR)
 		/* more work coming */
@@ -584,8 +587,7 @@ static inline int qdio_inbound_q_done(struct qdio_q *q)
 	 * has (probably) not moved (see qdio_inbound_processing).
 	 */
 	if (get_tod_clock_fast() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) {
-		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in done:%02x",
-			      q->first_to_check);
+		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in done:%02x", start);
 		return 1;
 	} else
 		return 0;
@@ -706,6 +708,7 @@ void qdio_inbound_processing(unsigned long data)
 
 static int get_outbound_buffer_frontier(struct qdio_q *q)
 {
+	unsigned int start = q->first_to_check;
 	unsigned char state = 0;
 	int count;
 
@@ -726,8 +729,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 	if (!count)
 		return 0;
 
-	count = get_buf_states(q, q->first_to_check, &state, count, 0,
-			       q->u.out.use_cq);
+	count = get_buf_states(q, start, &state, count, 0, q->u.out.use_cq);
 	if (!count)
 		return 0;
 
@@ -738,13 +740,13 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 			"out empty:%1d %02x", q->nr, count);
 
 		atomic_sub(count, &q->nr_buf_used);
-		q->first_to_check = add_buf(q->first_to_check, count);
+		q->first_to_check = add_buf(start, count);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals(q, count);
 		return count;
 	case SLSB_P_OUTPUT_ERROR:
-		process_buffer_error(q, count);
-		q->first_to_check = add_buf(q->first_to_check, count);
+		process_buffer_error(q, start, count);
+		q->first_to_check = add_buf(start, count);
 		atomic_sub(count, &q->nr_buf_used);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals_error(q, count);
-- 
GitLab


From 6bcf74e2d15c8339f0e4a0f0613638873098e85f Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Mon, 8 Apr 2019 13:32:12 +0200
Subject: [PATCH 1299/1861] s390/qdio: consolidate index tracking for queue
 scan

qdio.ko offers a small number of high-level functions to drive the
scanning of a QDIO queue for ready-to-process SBALs:
qdio_get_next_buffers(), __[ti]qdio_inbound_processing() and
__qdio_outbound_processing().

Let each of those functions maintain the 'start' index for their current
scan, and pass it to lower-level helpers as needed. This improves the
code's overall layering, and allows us to eliminate the additional
first_to_kick cursor with a follow-on patch.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_main.c | 55 ++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 045f2aad0b3cf..cfce255521ac4 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -492,9 +492,8 @@ static inline void inbound_primed(struct qdio_q *q, unsigned int start,
 	set_buf_states(q, start, SLSB_P_INPUT_NOT_INIT, count);
 }
 
-static int get_inbound_buffer_frontier(struct qdio_q *q)
+static int get_inbound_buffer_frontier(struct qdio_q *q, unsigned int start)
 {
-	unsigned int start = q->first_to_check;
 	unsigned char state = 0;
 	int count;
 
@@ -519,7 +518,6 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 	switch (state) {
 	case SLSB_P_INPUT_PRIMED:
 		inbound_primed(q, start, count);
-		q->first_to_check = add_buf(start, count);
 		if (atomic_sub_return(count, &q->nr_buf_used) == 0)
 			qperf_inc(q, inbound_queue_full);
 		if (q->irq_ptr->perf_stat_enabled)
@@ -527,7 +525,6 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 		return count;
 	case SLSB_P_INPUT_ERROR:
 		process_buffer_error(q, start, count);
-		q->first_to_check = add_buf(start, count);
 		if (atomic_sub_return(count, &q->nr_buf_used) == 0)
 			qperf_inc(q, inbound_queue_full);
 		if (q->irq_ptr->perf_stat_enabled)
@@ -547,11 +544,11 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 	}
 }
 
-static int qdio_inbound_q_moved(struct qdio_q *q)
+static int qdio_inbound_q_moved(struct qdio_q *q, unsigned int start)
 {
 	int count;
 
-	count = get_inbound_buffer_frontier(q);
+	count = get_inbound_buffer_frontier(q, start);
 
 	if (count && !is_thinint_irq(q->irq_ptr) && MACHINE_IS_LPAR)
 		q->u.in.timestamp = get_tod_clock();
@@ -559,9 +556,8 @@ static int qdio_inbound_q_moved(struct qdio_q *q)
 	return count;
 }
 
-static inline int qdio_inbound_q_done(struct qdio_q *q)
+static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start)
 {
-	unsigned int start = q->first_to_check;
 	unsigned char state = 0;
 
 	if (!atomic_read(&q->nr_buf_used))
@@ -672,17 +668,20 @@ static inline int qdio_tasklet_schedule(struct qdio_q *q)
 
 static void __qdio_inbound_processing(struct qdio_q *q)
 {
+	unsigned int start = q->first_to_check;
 	int count;
 
 	qperf_inc(q, tasklet_inbound);
 
-	count = qdio_inbound_q_moved(q);
+	count = qdio_inbound_q_moved(q, start);
 	if (count == 0)
 		return;
 
+	start = add_buf(start, count);
+	q->first_to_check = start;
 	qdio_kick_handler(q, count);
 
-	if (!qdio_inbound_q_done(q)) {
+	if (!qdio_inbound_q_done(q, start)) {
 		/* means poll time is not yet over */
 		qperf_inc(q, tasklet_inbound_resched);
 		if (!qdio_tasklet_schedule(q))
@@ -694,7 +693,7 @@ static void __qdio_inbound_processing(struct qdio_q *q)
 	 * We need to check again to not lose initiative after
 	 * resetting the ACK state.
 	 */
-	if (!qdio_inbound_q_done(q)) {
+	if (!qdio_inbound_q_done(q, start)) {
 		qperf_inc(q, tasklet_inbound_resched2);
 		qdio_tasklet_schedule(q);
 	}
@@ -706,9 +705,8 @@ void qdio_inbound_processing(unsigned long data)
 	__qdio_inbound_processing(q);
 }
 
-static int get_outbound_buffer_frontier(struct qdio_q *q)
+static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start)
 {
-	unsigned int start = q->first_to_check;
 	unsigned char state = 0;
 	int count;
 
@@ -740,13 +738,11 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 			"out empty:%1d %02x", q->nr, count);
 
 		atomic_sub(count, &q->nr_buf_used);
-		q->first_to_check = add_buf(start, count);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals(q, count);
 		return count;
 	case SLSB_P_OUTPUT_ERROR:
 		process_buffer_error(q, start, count);
-		q->first_to_check = add_buf(start, count);
 		atomic_sub(count, &q->nr_buf_used);
 		if (q->irq_ptr->perf_stat_enabled)
 			account_sbals_error(q, count);
@@ -773,11 +769,11 @@ static inline int qdio_outbound_q_done(struct qdio_q *q)
 	return atomic_read(&q->nr_buf_used) == 0;
 }
 
-static inline int qdio_outbound_q_moved(struct qdio_q *q)
+static inline int qdio_outbound_q_moved(struct qdio_q *q, unsigned int start)
 {
 	int count;
 
-	count = get_outbound_buffer_frontier(q);
+	count = get_outbound_buffer_frontier(q, start);
 
 	if (count)
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out moved:%1d", q->nr);
@@ -829,14 +825,17 @@ retry:
 
 static void __qdio_outbound_processing(struct qdio_q *q)
 {
+	unsigned int start = q->first_to_check;
 	int count;
 
 	qperf_inc(q, tasklet_outbound);
 	WARN_ON_ONCE(atomic_read(&q->nr_buf_used) < 0);
 
-	count = qdio_outbound_q_moved(q);
-	if (count)
+	count = qdio_outbound_q_moved(q, start);
+	if (count) {
+		q->first_to_check = add_buf(start, count);
 		qdio_kick_handler(q, count);
+	}
 
 	if (queue_type(q) == QDIO_ZFCP_QFMT && !pci_out_supported(q->irq_ptr) &&
 	    !qdio_outbound_q_done(q))
@@ -891,6 +890,7 @@ static inline void qdio_check_outbound_pci_queues(struct qdio_irq *irq)
 
 static void __tiqdio_inbound_processing(struct qdio_q *q)
 {
+	unsigned int start = q->first_to_check;
 	int count;
 
 	qperf_inc(q, tasklet_inbound);
@@ -900,13 +900,15 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
 	/* The interrupt could be caused by a PCI request: */
 	qdio_check_outbound_pci_queues(q->irq_ptr);
 
-	count = qdio_inbound_q_moved(q);
+	count = qdio_inbound_q_moved(q, start);
 	if (count == 0)
 		return;
 
+	start = add_buf(start, count);
+	q->first_to_check = start;
 	qdio_kick_handler(q, count);
 
-	if (!qdio_inbound_q_done(q)) {
+	if (!qdio_inbound_q_done(q, start)) {
 		qperf_inc(q, tasklet_inbound_resched);
 		if (!qdio_tasklet_schedule(q))
 			return;
@@ -917,7 +919,7 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
 	 * We need to check again to not lose initiative after
 	 * resetting the ACK state.
 	 */
-	if (!qdio_inbound_q_done(q)) {
+	if (!qdio_inbound_q_done(q, start)) {
 		qperf_inc(q, tasklet_inbound_resched2);
 		qdio_tasklet_schedule(q);
 	}
@@ -1637,7 +1639,7 @@ int qdio_start_irq(struct ccw_device *cdev, int nr)
 	 */
 	if (test_nonshared_ind(irq_ptr))
 		goto rescan;
-	if (!qdio_inbound_q_done(q))
+	if (!qdio_inbound_q_done(q, q->first_to_check))
 		goto rescan;
 	return 0;
 
@@ -1668,11 +1670,13 @@ int qdio_get_next_buffers(struct ccw_device *cdev, int nr, int *bufnr,
 {
 	struct qdio_q *q;
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+	unsigned int start;
 	int count;
 
 	if (!irq_ptr)
 		return -ENODEV;
 	q = irq_ptr->input_qs[nr];
+	start = q->first_to_check;
 
 	/*
 	 * Cannot rely on automatic sync after interrupt since queues may
@@ -1683,10 +1687,13 @@ int qdio_get_next_buffers(struct ccw_device *cdev, int nr, int *bufnr,
 
 	qdio_check_outbound_pci_queues(irq_ptr);
 
-	count = qdio_inbound_q_moved(q);
+	count = qdio_inbound_q_moved(q, start);
 	if (count == 0)
 		return 0;
 
+	start = add_buf(start, count);
+	q->first_to_check = start;
+
 	/* Note: upper-layer MUST stop processing immediately here ... */
 	if (unlikely(q->irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
 		return -EIO;
-- 
GitLab


From ec3937107ab43f3e8b2bc9dad95710043c462ff7 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Thu, 4 Apr 2019 10:03:13 +0800
Subject: [PATCH 1300/1861] x86/mm/KASLR: Fix the size of the direct mapping
 section

kernel_randomize_memory() uses __PHYSICAL_MASK_SHIFT to calculate
the maximum amount of system RAM supported. The size of the direct
mapping section is obtained from the smaller one of the below two
values:

  (actual system RAM size + padding size) vs (max system RAM size supported)

This calculation is wrong since commit

  b83ce5ee9147 ("x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52").

In it, __PHYSICAL_MASK_SHIFT was changed to be 52, regardless of whether
the kernel is using 4-level or 5-level page tables. Thus, it will always
use 4 PB as the maximum amount of system RAM, even in 4-level paging
mode where it should actually be 64 TB.

Thus, the size of the direct mapping section will always
be the sum of the actual system RAM size plus the padding size.

Even when the amount of system RAM is 64 TB, the following layout will
still be used. Obviously KALSR will be weakened significantly.

   |____|_______actual RAM_______|_padding_|______the rest_______|
   0            64TB                                            ~120TB

Instead, it should be like this:

   |____|_______actual RAM_______|_________the rest______________|
   0            64TB                                            ~120TB

The size of padding region is controlled by
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING, which is 10 TB by default.

The above issue only exists when
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING is set to a non-zero value,
which is the case when CONFIG_MEMORY_HOTPLUG is enabled. Otherwise,
using __PHYSICAL_MASK_SHIFT doesn't affect KASLR.

Fix it by replacing __PHYSICAL_MASK_SHIFT with MAX_PHYSMEM_BITS.

 [ bp: Massage commit message. ]

Fixes: b83ce5ee9147 ("x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52")
Signed-off-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Garnier <thgarnie@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: frank.ramsay@hpe.com
Cc: herbert@gondor.apana.org.au
Cc: kirill@shutemov.name
Cc: mike.travis@hpe.com
Cc: thgarnie@google.com
Cc: x86-ml <x86@kernel.org>
Cc: yamada.masahiro@socionext.com
Link: https://lkml.kernel.org/r/20190417083536.GE7065@MiWiFi-R3L-srv
---
 arch/x86/mm/kaslr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 3f452ffed7e93..d669c5e797e06 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void)
 	if (!kaslr_memory_enabled())
 		return;
 
-	kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+	kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
 	kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
 
 	/*
-- 
GitLab


From 0d2cc3b3453254f1c56f9456ba03e092ed4cfb72 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 2 Apr 2019 18:02:41 +0200
Subject: [PATCH 1301/1861] locking/lockdep: Move valid_state() inside
 CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING

valid_state() and print_usage_bug*() functions are not used beyond
irq locking correctness checks under CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING.

Sadly the "unused function" warning wouldn't fire because valid_state()
is inline so the unused case has remained unseen until now.

So move them inside the appropriate CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING
section.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190402160244.32434-2-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 34cdcbedda492..9c5819ef4a28a 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2784,6 +2784,12 @@ static void check_chain_key(struct task_struct *curr)
 #endif
 }
 
+static int mark_lock(struct task_struct *curr, struct held_lock *this,
+		     enum lock_usage_bit new_bit);
+
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+
+
 static void
 print_usage_bug_scenario(struct held_lock *lock)
 {
@@ -2853,10 +2859,6 @@ valid_state(struct task_struct *curr, struct held_lock *this,
 	return 1;
 }
 
-static int mark_lock(struct task_struct *curr, struct held_lock *this,
-		     enum lock_usage_bit new_bit);
-
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
 
 /*
  * print irq inversion bug:
-- 
GitLab


From c902a1e8d9c9b47cd8faa16892710247cdda9b02 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 2 Apr 2019 18:02:42 +0200
Subject: [PATCH 1302/1861] locking/lockdep: Map remaining magic numbers to
 lock usage mask names

Clarify the code with mapping some more constant numbers that haven't
been named after their corresponding LOCK_USAGE_* symbol.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190402160244.32434-3-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 9c5819ef4a28a..2288aa2fa4c68 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -516,11 +516,11 @@ static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
 {
 	char c = '.';
 
-	if (class->usage_mask & lock_flag(bit + 2))
+	if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
 		c = '+';
 	if (class->usage_mask & lock_flag(bit)) {
 		c = '-';
-		if (class->usage_mask & lock_flag(bit + 2))
+		if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
 			c = '?';
 	}
 
@@ -1971,7 +1971,10 @@ static const char *state_rnames[] = {
 
 static inline const char *state_name(enum lock_usage_bit bit)
 {
-	return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : state_names[bit >> 2];
+	if (bit & LOCK_USAGE_READ_MASK)
+		return state_rnames[bit >> LOCK_USAGE_DIR_MASK];
+	else
+		return state_names[bit >> LOCK_USAGE_DIR_MASK];
 }
 
 static int exclusive_bit(int new_bit)
@@ -3017,7 +3020,7 @@ static int (*state_verbose_f[])(struct lock_class *class) = {
 static inline int state_verbose(enum lock_usage_bit bit,
 				struct lock_class *class)
 {
-	return state_verbose_f[bit >> 2](class);
+	return state_verbose_f[bit >> LOCK_USAGE_DIR_MASK](class);
 }
 
 typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
-- 
GitLab


From 627f364d24c009b61c9199b2c75006e35c294675 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 2 Apr 2019 18:02:43 +0200
Subject: [PATCH 1303/1861] locking/lockdep: Use expanded masks on
 find_usage_*() functions

In order to optimize check_irq_usage() and factorize all the IRQ usage
validations we'll need to be able to check multiple lock usage bits at
once. Prepare the low level usage mask check functions for that purpose.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190402160244.32434-4-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 2288aa2fa4c68..5e149dd782989 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1682,9 +1682,9 @@ check_redundant(struct lock_list *root, struct lock_class *target,
  * without creating any illegal irq-safe -> irq-unsafe lock dependency.
  */
 
-static inline int usage_match(struct lock_list *entry, void *bit)
+static inline int usage_match(struct lock_list *entry, void *mask)
 {
-	return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit);
+	return entry->class->usage_mask & *(unsigned long *)mask;
 }
 
 
@@ -1700,14 +1700,14 @@ static inline int usage_match(struct lock_list *entry, void *bit)
  * Return <0 on error.
  */
 static int
-find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
+find_usage_forwards(struct lock_list *root, unsigned long usage_mask,
 			struct lock_list **target_entry)
 {
 	int result;
 
 	debug_atomic_inc(nr_find_usage_forwards_checks);
 
-	result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
+	result = __bfs_forwards(root, &usage_mask, usage_match, target_entry);
 
 	return result;
 }
@@ -1723,14 +1723,14 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
  * Return <0 on error.
  */
 static int
-find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
+find_usage_backwards(struct lock_list *root, unsigned long usage_mask,
 			struct lock_list **target_entry)
 {
 	int result;
 
 	debug_atomic_inc(nr_find_usage_backwards_checks);
 
-	result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
+	result = __bfs_backwards(root, &usage_mask, usage_match, target_entry);
 
 	return result;
 }
@@ -1935,7 +1935,7 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
 	this.parent = NULL;
 
 	this.class = hlock_class(prev);
-	ret = find_usage_backwards(&this, bit_backwards, &target_entry);
+	ret = find_usage_backwards(&this, lock_flag(bit_backwards), &target_entry);
 	if (ret < 0)
 		return print_bfs_bug(ret);
 	if (ret == 1)
@@ -1943,7 +1943,7 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
 
 	that.parent = NULL;
 	that.class = hlock_class(next);
-	ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
+	ret = find_usage_forwards(&that, lock_flag(bit_forwards), &target_entry1);
 	if (ret < 0)
 		return print_bfs_bug(ret);
 	if (ret == 1)
@@ -2941,7 +2941,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
 
 	root.parent = NULL;
 	root.class = hlock_class(this);
-	ret = find_usage_forwards(&root, bit, &target_entry);
+	ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
 	if (ret < 0)
 		return print_bfs_bug(ret);
 	if (ret == 1)
@@ -2965,7 +2965,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
 
 	root.parent = NULL;
 	root.class = hlock_class(this);
-	ret = find_usage_backwards(&root, bit, &target_entry);
+	ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
 	if (ret < 0)
 		return print_bfs_bug(ret);
 	if (ret == 1)
-- 
GitLab


From 8808a7c65423cdd07ea12f9ecd812e56c7857421 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 9 Apr 2019 13:59:03 +0200
Subject: [PATCH 1304/1861] locking/lockdep: Generate LOCKF_ bit composites

Instead of open-coding the bitmasks, generate them using the
lockdep_states.h header.

This prepares for additional states, which would make the manual masks
tedious and error prone.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep_internals.h | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index d4c197425f68a..2b3ffd4117ad2 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -42,13 +42,29 @@ enum {
 	__LOCKF(USED)
 };
 
-#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
-#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
+#define LOCKDEP_STATE(__STATE)	LOCKF_ENABLED_##__STATE |
+static const unsigned long LOCKF_ENABLED_IRQ =
+#include "lockdep_states.h"
+	0;
+#undef LOCKDEP_STATE
+
+#define LOCKDEP_STATE(__STATE)	LOCKF_USED_IN_##__STATE |
+static const unsigned long LOCKF_USED_IN_IRQ =
+#include "lockdep_states.h"
+	0;
+#undef LOCKDEP_STATE
 
-#define LOCKF_ENABLED_IRQ_READ \
-		(LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
-#define LOCKF_USED_IN_IRQ_READ \
-		(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
+#define LOCKDEP_STATE(__STATE)	LOCKF_ENABLED_##__STATE##_READ |
+static const unsigned long LOCKF_ENABLED_IRQ_READ =
+#include "lockdep_states.h"
+	0;
+#undef LOCKDEP_STATE
+
+#define LOCKDEP_STATE(__STATE)	LOCKF_USED_IN_##__STATE##_READ |
+static const unsigned long LOCKF_USED_IN_IRQ_READ =
+#include "lockdep_states.h"
+	0;
+#undef LOCKDEP_STATE
 
 /*
  * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
-- 
GitLab


From 1c6bca6d75bca2cc47b5eafb9f7f16e368ffbeca Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Mon, 15 Apr 2019 14:43:04 +0300
Subject: [PATCH 1305/1861] iwlwifi: don't panic in error path on non-msix
 systems

The driver uses msix causes-register to handle both msix and non msix
interrupts when performing sync nmi.  On devices that do not support
msix this register is unmapped and accessing it causes a kernel panic.

Solve this by differentiating the two cases and accessing the proper
causes-register in each case.

Reported-by: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/pcie/trans.c   | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 79c1dc05f9488..c4375b868901d 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3644,20 +3644,27 @@ out_no_pci:
 
 void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans)
 {
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT;
+	u32 inta_addr, sw_err_bit;
+
+	if (trans_pcie->msix_enabled) {
+		inta_addr = CSR_MSIX_HW_INT_CAUSES_AD;
+		sw_err_bit = MSIX_HW_INT_CAUSES_REG_SW_ERR;
+	} else {
+		inta_addr = CSR_INT;
+		sw_err_bit = CSR_INT_BIT_SW_ERR;
+	}
 
 	iwl_disable_interrupts(trans);
 	iwl_force_nmi(trans);
 	while (time_after(timeout, jiffies)) {
-		u32 inta_hw = iwl_read32(trans,
-					 CSR_MSIX_HW_INT_CAUSES_AD);
+		u32 inta_hw = iwl_read32(trans, inta_addr);
 
 		/* Error detected by uCode */
-		if (inta_hw & MSIX_HW_INT_CAUSES_REG_SW_ERR) {
+		if (inta_hw & sw_err_bit) {
 			/* Clear causes register */
-			iwl_write32(trans, CSR_MSIX_HW_INT_CAUSES_AD,
-				    inta_hw &
-				    MSIX_HW_INT_CAUSES_REG_SW_ERR);
+			iwl_write32(trans, inta_addr, inta_hw & sw_err_bit);
 			break;
 		}
 
-- 
GitLab


From 72d3c7bbc9b581e5f2a455e6f399c75626653945 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 25 Mar 2019 14:11:52 +0100
Subject: [PATCH 1306/1861] iwlwifi: mvm: don't attempt debug collection in
 rfkill

If we fail to initialize because rfkill is enabled, then trying
to do debug collection currently just fails. Prevent that in the
high-level code, although we should probably also fix the lower
level code to do things more carefully.

It's not 100% clear that it fixes this commit, as the original
dump code at the time might've been more careful. In any case,
we don't really need to dump anything in this expected scenario.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Fixes: 7125648074e8 ("iwlwifi: add fw dump upon RT ucode start failure")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c  | 4 +++-
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 00a47f6f1d815..ab68b5d53ec95 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1121,7 +1121,9 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	ret = iwl_mvm_load_rt_fw(mvm);
 	if (ret) {
 		IWL_ERR(mvm, "Failed to start RT ucode: %d\n", ret);
-		iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER);
+		if (ret != -ERFKILL)
+			iwl_fw_dbg_error_collect(&mvm->fwrt,
+						 FW_DBG_TRIGGER_DRIVER);
 		goto error;
 	}
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index ba27dce4c2bbd..13681b03c10e1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -834,7 +834,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	mutex_lock(&mvm->mutex);
 	iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE);
 	err = iwl_run_init_mvm_ucode(mvm, true);
-	if (err)
+	if (err && err != -ERFKILL)
 		iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER);
 	if (!iwlmvm_mod_params.init_dbg || !err)
 		iwl_mvm_stop_device(mvm);
-- 
GitLab


From b35f63972c5c67fc0f908286f7fc624137788876 Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Wed, 20 Mar 2019 17:41:16 +0200
Subject: [PATCH 1307/1861] iwlwifi: dbg_ini: check debug TLV type explicitly

In ini debug TLVs bit 24 is set. The driver relies on it in the memory
allocation for the debug configuration. This implementation is
problematic in case of a new debug TLV that is not supported yet is added
and uses bit 24. In such a scenario the driver allocate space without
using it which causes errors in the apply point enabling flow.

Solve it by explicitly checking if a given TLV is part of the list of
the supported ini debug TLVs.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Fixes: f14cda6f3b31 ("iwlwifi: trans: parse and store debug ini TLVs")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/file.h     | 15 +++++++++------
 drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c |  3 ++-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 641c95d03b157..e06407dc088b1 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -93,7 +93,7 @@ struct iwl_ucode_header {
 	} u;
 };
 
-#define IWL_UCODE_INI_TLV_GROUP	BIT(24)
+#define IWL_UCODE_INI_TLV_GROUP	0x1000000
 
 /*
  * new TLV uCode file layout
@@ -148,11 +148,14 @@ enum iwl_ucode_tlv_type {
 	IWL_UCODE_TLV_UMAC_DEBUG_ADDRS	= 54,
 	IWL_UCODE_TLV_LMAC_DEBUG_ADDRS	= 55,
 	IWL_UCODE_TLV_FW_RECOVERY_INFO	= 57,
-	IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION	= IWL_UCODE_INI_TLV_GROUP | 0x1,
-	IWL_UCODE_TLV_TYPE_HCMD			= IWL_UCODE_INI_TLV_GROUP | 0x2,
-	IWL_UCODE_TLV_TYPE_REGIONS		= IWL_UCODE_INI_TLV_GROUP | 0x3,
-	IWL_UCODE_TLV_TYPE_TRIGGERS		= IWL_UCODE_INI_TLV_GROUP | 0x4,
-	IWL_UCODE_TLV_TYPE_DEBUG_FLOW		= IWL_UCODE_INI_TLV_GROUP | 0x5,
+
+	IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION	= IWL_UCODE_INI_TLV_GROUP + 0x1,
+	IWL_UCODE_TLV_DEBUG_BASE = IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION,
+	IWL_UCODE_TLV_TYPE_HCMD			= IWL_UCODE_INI_TLV_GROUP + 0x2,
+	IWL_UCODE_TLV_TYPE_REGIONS		= IWL_UCODE_INI_TLV_GROUP + 0x3,
+	IWL_UCODE_TLV_TYPE_TRIGGERS		= IWL_UCODE_INI_TLV_GROUP + 0x4,
+	IWL_UCODE_TLV_TYPE_DEBUG_FLOW		= IWL_UCODE_INI_TLV_GROUP + 0x5,
+	IWL_UCODE_TLV_DEBUG_MAX = IWL_UCODE_TLV_TYPE_DEBUG_FLOW,
 
 	/* TLVs 0x1000-0x2000 are for internal driver usage */
 	IWL_UCODE_TLV_FW_DBG_DUMP_LST	= 0x1000,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 5798f434f68fd..c7070760a10aa 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -126,7 +126,8 @@ void iwl_alloc_dbg_tlv(struct iwl_trans *trans, size_t len, const u8 *data,
 		len -= ALIGN(tlv_len, 4);
 		data += sizeof(*tlv) + ALIGN(tlv_len, 4);
 
-		if (!(tlv_type & IWL_UCODE_INI_TLV_GROUP))
+		if (tlv_type < IWL_UCODE_TLV_DEBUG_BASE ||
+		    tlv_type > IWL_UCODE_TLV_DEBUG_MAX)
 			continue;
 
 		hdr = (void *)&tlv->data[0];
-- 
GitLab


From 3771b0fe9dfc3801eac0142d1af6ba94dee83c6c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Mar 2019 13:57:57 +0100
Subject: [PATCH 1308/1861] locking/lockdep: Avoid bogus Clang warning

When lockdep is enabled, and -Wuninitialized warnings are enabled,
Clang produces a silly warning for every file we compile:

 In file included from  kernel/sched/fair.c:23:
  kernel/sched/sched.h:1094:15: error: variable 'cookie' is uninitialized when used here [-Werror,-Wuninitialized]
         rf->cookie = lockdep_pin_lock(&rq->lock);
                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~
  include/linux/lockdep.h:474:60: note: expanded from macro 'lockdep_pin_lock'
  #define lockdep_pin_lock(l)                     ({ struct pin_cookie cookie; cookie; })
                                                                             ^~~~~~
  kernel/sched/sched.h:1094:15: note: variable 'cookie' is declared here
  include/linux/lockdep.h:474:34: note: expanded from macro 'lockdep_pin_lock'
  #define lockdep_pin_lock(l)                     ({ struct pin_cookie cookie; cookie; })
                                                    ^

As the 'struct pin_cookie' structure is empty in this configuration,
there is no need to initialize it for correctness, but it also
does not hurt to set it to an empty structure, so do that to
avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nathan Chancellor <natechancellor@gmail.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: clang-built-linux@googlegroups.com
Link: http://lkml.kernel.org/r/20190325125807.1437049-1-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 79c3873d58acc..21725a91442bf 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -471,7 +471,7 @@ struct pin_cookie { };
 
 #define NIL_COOKIE (struct pin_cookie){ }
 
-#define lockdep_pin_lock(l)			({ struct pin_cookie cookie; cookie; })
+#define lockdep_pin_lock(l)			({ struct pin_cookie cookie = { }; cookie; })
 #define lockdep_repin_lock(l, c)		do { (void)(l); (void)(c); } while (0)
 #define lockdep_unpin_lock(l, c)		do { (void)(l); (void)(c); } while (0)
 
-- 
GitLab


From 76e1552466ff2da8b909df0fff3600ec1c27edcc Mon Sep 17 00:00:00 2001
From: Arash Fotouhi <arash@arashfotouhi.com>
Date: Fri, 22 Mar 2019 19:28:32 -0700
Subject: [PATCH 1309/1861] watchdog: Fix typo in comment

Signed-off-by: Arash Fotouhi <arash@arashfotouhi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: loberman@redhat.com
Cc: vincent.whitchurch@axis.com
Link: http://lkml.kernel.org/r/1553308112-3513-1-git-send-email-arash@arashfotouhi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6a57872331132..7f9e7b9306fe2 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -590,7 +590,7 @@ static void lockup_detector_reconfigure(void)
  * Create the watchdog thread infrastructure and configure the detector(s).
  *
  * The threads are not unparked as watchdog_allowed_mask is empty.  When
- * the threads are sucessfully initialized, take the proper locks and
+ * the threads are successfully initialized, take the proper locks and
  * unpark the threads in the watchdog_cpumask if the watchdog is enabled.
  */
 static __init void lockup_detector_setup(void)
-- 
GitLab


From 154d4899e4111ae24e68d6ba955f46856cb046bc Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 30 Mar 2019 10:31:52 +0100
Subject: [PATCH 1310/1861] iwlwifi: mvm: properly check debugfs dentry before
 using it

debugfs can now report an error code if something went wrong instead of
just NULL.  So if the return value is to be used as a "real" dentry, it
needs to be checked if it is an error before dereferencing it.

This is now happening because of ff9fb72bc077 ("debugfs: return error
values, not NULL").  If multiple iwlwifi devices are in the system, this
can cause problems when the driver attempts to create the main debugfs
directory again.  Later on in the code we fail horribly by trying to
dereference a pointer that is an error value.

Reported-by: Laura Abbott <labbott@redhat.com>
Reported-by: Gabriel Ramirez <gabriello.ramirez@gmail.com>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Cc: Luca Coelho <luciano.coelho@intel.com>
Cc: Intel Linux Wireless <linuxwifi@intel.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: stable <stable@vger.kernel.org> # 5.0
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 2453ceabf00dc..738eddb2e7acb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -774,6 +774,11 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 		return;
 
 	mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir);
+	if (IS_ERR_OR_NULL(mvmvif->dbgfs_dir)) {
+		IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n",
+			dbgfs_dir);
+		return;
+	}
 
 	if (!mvmvif->dbgfs_dir) {
 		IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n",
-- 
GitLab


From c537e07b000bc00c9a5ac9d119ed2c8456a99b6e Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Tue, 16 Apr 2019 21:02:52 +0300
Subject: [PATCH 1311/1861] iwlwifi: cfg: use family 22560 based_params for
 AX210 family

Specifically, max_tfd_queue_size should be 0x10000 like in
22560 family and not 0x100 like in 22000 family.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index eb6defb6d0cd9..0a87d87fbb4f5 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -201,7 +201,7 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
 #define IWL_DEVICE_AX210						\
 	IWL_DEVICE_AX200_COMMON,					\
 	.device_family = IWL_DEVICE_FAMILY_AX210,			\
-	.base_params = &iwl_22000_base_params,				\
+	.base_params = &iwl_22560_base_params,				\
 	.csr = &iwl_csr_v1,						\
 	.min_txq_size = 128
 
-- 
GitLab


From 471ba0e686cb13752bc1ff3216c54b69a2d250ea Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 9 Apr 2019 19:34:03 +1000
Subject: [PATCH 1312/1861] irq_work: Do not raise an IPI when queueing work on
 the local CPU

The QEMU PowerPC/PSeries machine model was not expecting a self-IPI,
and it may be a bit surprising thing to do, so have irq_work_queue_on
do local queueing when target is the current CPU.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190409093403.20994-1-npiggin@gmail.com
[ Simplified the preprocessor comments.
  Fixed unbalanced curly brackets pointed out by Thomas. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/irq_work.c | 75 ++++++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 33 deletions(-)

diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 6b7cdf17ccf89..73288914ed5e7 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -56,61 +56,70 @@ void __weak arch_irq_work_raise(void)
 	 */
 }
 
-/*
- * Enqueue the irq_work @work on @cpu unless it's already pending
- * somewhere.
- *
- * Can be re-enqueued while the callback is still in progress.
- */
-bool irq_work_queue_on(struct irq_work *work, int cpu)
+/* Enqueue on current CPU, work must already be claimed and preempt disabled */
+static void __irq_work_queue_local(struct irq_work *work)
 {
-	/* All work should have been flushed before going offline */
-	WARN_ON_ONCE(cpu_is_offline(cpu));
-
-#ifdef CONFIG_SMP
-
-	/* Arch remote IPI send/receive backend aren't NMI safe */
-	WARN_ON_ONCE(in_nmi());
+	/* If the work is "lazy", handle it from next tick if any */
+	if (work->flags & IRQ_WORK_LAZY) {
+		if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
+		    tick_nohz_tick_stopped())
+			arch_irq_work_raise();
+	} else {
+		if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
+			arch_irq_work_raise();
+	}
+}
 
+/* Enqueue the irq work @work on the current CPU */
+bool irq_work_queue(struct irq_work *work)
+{
 	/* Only queue if not already pending */
 	if (!irq_work_claim(work))
 		return false;
 
-	if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
-		arch_send_call_function_single_ipi(cpu);
-
-#else /* #ifdef CONFIG_SMP */
-	irq_work_queue(work);
-#endif /* #else #ifdef CONFIG_SMP */
+	/* Queue the entry and raise the IPI if needed. */
+	preempt_disable();
+	__irq_work_queue_local(work);
+	preempt_enable();
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(irq_work_queue);
 
-/* Enqueue the irq work @work on the current CPU */
-bool irq_work_queue(struct irq_work *work)
+/*
+ * Enqueue the irq_work @work on @cpu unless it's already pending
+ * somewhere.
+ *
+ * Can be re-enqueued while the callback is still in progress.
+ */
+bool irq_work_queue_on(struct irq_work *work, int cpu)
 {
+#ifndef CONFIG_SMP
+	return irq_work_queue(work);
+
+#else /* CONFIG_SMP: */
+	/* All work should have been flushed before going offline */
+	WARN_ON_ONCE(cpu_is_offline(cpu));
+
 	/* Only queue if not already pending */
 	if (!irq_work_claim(work))
 		return false;
 
-	/* Queue the entry and raise the IPI if needed. */
 	preempt_disable();
-
-	/* If the work is "lazy", handle it from next tick if any */
-	if (work->flags & IRQ_WORK_LAZY) {
-		if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
-		    tick_nohz_tick_stopped())
-			arch_irq_work_raise();
+	if (cpu != smp_processor_id()) {
+		/* Arch remote IPI send/receive backend aren't NMI safe */
+		WARN_ON_ONCE(in_nmi());
+		if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
+			arch_send_call_function_single_ipi(cpu);
 	} else {
-		if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
-			arch_irq_work_raise();
+		__irq_work_queue_local(work);
 	}
-
 	preempt_enable();
 
 	return true;
+#endif /* CONFIG_SMP */
 }
-EXPORT_SYMBOL_GPL(irq_work_queue);
+
 
 bool irq_work_needs_cpu(void)
 {
-- 
GitLab


From 3fe3331bb285700ab2253dbb07f8e478fcea2f1b Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Thu, 21 Mar 2019 21:15:22 +0000
Subject: [PATCH 1313/1861] perf/x86/amd: Add event map for AMD Family 17h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Family 17h differs from prior families by:

 - Does not support an L2 cache miss event
 - It has re-enumerated PMC counters for:
   - L2 cache references
   - front & back end stalled cycles

So we add a new amd_f17h_perfmon_event_map[] so that the generic
perf event names will resolve to the correct h/w events on
family 17h and above processors.

Reference sections 2.1.13.3.3 (stalls) and 2.1.13.3.6 (L2):

  https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf

Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Cc: <stable@vger.kernel.org> # v4.9+
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Liška <mliska@suse.cz>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Fixes: e40ed1542dd7 ("perf/x86: Add perf support for AMD family-17h processors")
[ Improved the formatting a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/amd/core.c | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 0ecfac84ba911..d45f3fbd232ea 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -117,22 +117,39 @@ static __initconst const u64 amd_hw_cache_event_ids
 };
 
 /*
- * AMD Performance Monitor K7 and later.
+ * AMD Performance Monitor K7 and later, up to and including Family 16h:
  */
 static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
 {
-  [PERF_COUNT_HW_CPU_CYCLES]			= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]			= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]		= 0x077d,
-  [PERF_COUNT_HW_CACHE_MISSES]			= 0x077e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]		= 0x00c2,
-  [PERF_COUNT_HW_BRANCH_MISSES]			= 0x00c3,
-  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
-  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x077d,
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x077e,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
+};
+
+/*
+ * AMD Performance Monitor Family 17h and later:
+ */
+static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x0287,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x0187,
 };
 
 static u64 amd_pmu_event_map(int hw_event)
 {
+	if (boot_cpu_data.x86 >= 0x17)
+		return amd_f17h_perfmon_event_map[hw_event];
+
 	return amd_perfmon_event_map[hw_event];
 }
 
-- 
GitLab


From 3f2552f7e9c5abef2775c53f7af66532f8bf65bc Mon Sep 17 00:00:00 2001
From: Chang-An Chen <chang-an.chen@mediatek.com>
Date: Fri, 29 Mar 2019 10:59:09 +0800
Subject: [PATCH 1314/1861] timers/sched_clock: Prevent generic sched_clock
 wrap caused by tick_freeze()

tick_freeze() introduced by suspend-to-idle in commit 124cf9117c5f ("PM /
sleep: Make it possible to quiesce timers during suspend-to-idle") uses
timekeeping_suspend() instead of syscore_suspend() during
suspend-to-idle. As a consequence generic sched_clock will keep going
because sched_clock_suspend() and sched_clock_resume() are not invoked
during suspend-to-idle which can result in a generic sched_clock wrap.

On a ARM system with suspend-to-idle enabled, sched_clock is registered
as "56 bits at 13MHz, resolution 76ns, wraps every 4398046511101ns", which
means the real wrapping duration is 8796093022202ns.

[  134.551779] suspend-to-idle suspend (timekeeping_suspend())
[ 1204.912239] suspend-to-idle resume (timekeeping_resume())
......
[ 1206.912239] suspend-to-idle suspend (timekeeping_suspend())
[ 5880.502807] suspend-to-idle resume (timekeeping_resume())
......
[ 6000.403724] suspend-to-idle suspend (timekeeping_suspend())
[ 8035.753167] suspend-to-idle resume  (timekeeping_resume())
......
[ 8795.786684] (2)[321:charger_thread]......
[ 8795.788387] (2)[321:charger_thread]......
[    0.057226] (0)[0:swapper/0]......
[    0.061447] (2)[0:swapper/2]......

sched_clock was not stopped during suspend-to-idle, and sched_clock_poll
hrtimer was not expired because timekeeping_suspend() was invoked during
suspend-to-idle. It makes sched_clock wrap at kernel time 8796s.

To prevent this, invoke sched_clock_suspend() and sched_clock_resume() in
tick_freeze() together with timekeeping_suspend() and timekeeping_resume().

Fixes: 124cf9117c5f (PM / sleep: Make it possible to quiesce timers during suspend-to-idle)
Signed-off-by: Chang-An Chen <chang-an.chen@mediatek.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Corey Minyard <cminyard@mvista.com>
Cc: <linux-mediatek@lists.infradead.org>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: <kuohong.wang@mediatek.com>
Cc: <freddy.hsin@mediatek.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1553828349-8914-1-git-send-email-chang-an.chen@mediatek.com
---
 kernel/time/sched_clock.c | 4 ++--
 kernel/time/tick-common.c | 2 ++
 kernel/time/timekeeping.h | 7 +++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 094b82ca95e52..930113b9799ac 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -272,7 +272,7 @@ static u64 notrace suspended_sched_clock_read(void)
 	return cd.read_data[seq & 1].epoch_cyc;
 }
 
-static int sched_clock_suspend(void)
+int sched_clock_suspend(void)
 {
 	struct clock_read_data *rd = &cd.read_data[0];
 
@@ -283,7 +283,7 @@ static int sched_clock_suspend(void)
 	return 0;
 }
 
-static void sched_clock_resume(void)
+void sched_clock_resume(void)
 {
 	struct clock_read_data *rd = &cd.read_data[0];
 
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 529143b4c8d2a..df401463a1913 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -487,6 +487,7 @@ void tick_freeze(void)
 		trace_suspend_resume(TPS("timekeeping_freeze"),
 				     smp_processor_id(), true);
 		system_state = SYSTEM_SUSPEND;
+		sched_clock_suspend();
 		timekeeping_suspend();
 	} else {
 		tick_suspend_local();
@@ -510,6 +511,7 @@ void tick_unfreeze(void)
 
 	if (tick_freeze_depth == num_online_cpus()) {
 		timekeeping_resume();
+		sched_clock_resume();
 		system_state = SYSTEM_RUNNING;
 		trace_suspend_resume(TPS("timekeeping_freeze"),
 				     smp_processor_id(), false);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 7a9b4eb7a1d5b..141ab3ab0354f 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -14,6 +14,13 @@ extern u64 timekeeping_max_deferment(void);
 extern void timekeeping_warp_clock(void);
 extern int timekeeping_suspend(void);
 extern void timekeeping_resume(void);
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+extern int sched_clock_suspend(void);
+extern void sched_clock_resume(void);
+#else
+static inline int sched_clock_suspend(void) { return 0; }
+static inline void sched_clock_resume(void) { }
+#endif
 
 extern void do_timer(unsigned long ticks);
 extern void update_wall_time(void);
-- 
GitLab


From 44427c0fbc09b448b22410978a4ef6ee37599d25 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Apr 2019 14:35:19 +0800
Subject: [PATCH 1315/1861] crypto: xts - Fix atomic sleep when walking
 skcipher

When we perform a walk in the completion function, we need to ensure
that it is atomic.

Reported-by: syzbot+6f72c20560060c98b566@syzkaller.appspotmail.com
Fixes: 78105c7e769b ("crypto: xts - Drop use of auxiliary buffer")
Cc: <stable@vger.kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/xts.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index 847f54f767897..2f948328cabbd 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -137,8 +137,12 @@ static void crypt_done(struct crypto_async_request *areq, int err)
 {
 	struct skcipher_request *req = areq->data;
 
-	if (!err)
+	if (!err) {
+		struct rctx *rctx = skcipher_request_ctx(req);
+
+		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 		err = xor_tweak_post(req);
+	}
 
 	skcipher_request_complete(req, err);
 }
-- 
GitLab


From b257b48cd5830c5b1d0c347eb281f9c28056f881 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Apr 2019 14:37:34 +0800
Subject: [PATCH 1316/1861] crypto: lrw - Fix atomic sleep when walking
 skcipher

When we perform a walk in the completion function, we need to ensure
that it is atomic.

Fixes: ac3c8f36c31d ("crypto: lrw - Do not use auxiliary buffer")
Cc: <stable@vger.kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/lrw.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crypto/lrw.c b/crypto/lrw.c
index 0430ccd087286..08a0e458bc3e6 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -212,8 +212,12 @@ static void crypt_done(struct crypto_async_request *areq, int err)
 {
 	struct skcipher_request *req = areq->data;
 
-	if (!err)
+	if (!err) {
+		struct rctx *rctx = skcipher_request_ctx(req);
+
+		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 		err = xor_tweak_post(req);
+	}
 
 	skcipher_request_complete(req, err);
 }
-- 
GitLab


From 71adf60f0a925c0e0c7dc2d0311fe40b825be737 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Date: Thu, 18 Apr 2019 15:27:25 +0200
Subject: [PATCH 1317/1861] drm/sun4i: Add missing drm_atomic_helper_shutdown
 at driver unbind

A call to drm_atomic_helper_shutdown is required to properly release
the internal references taken by the core and avoid warnings about
leaking objects. Add it since it was missing.

Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-2-paul.kocialkowski@bootlin.com
---
 drivers/gpu/drm/sun4i/sun4i_drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 3ebd9f5e2719d..c5871b15714f6 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -16,6 +16,7 @@
 #include <linux/of_reserved_mem.h>
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
@@ -144,6 +145,7 @@ static void sun4i_drv_unbind(struct device *dev)
 
 	drm_dev_unregister(drm);
 	drm_kms_helper_poll_fini(drm);
+	drm_atomic_helper_shutdown(drm);
 	drm_mode_config_cleanup(drm);
 	of_reserved_mem_device_release(dev);
 	drm_dev_put(drm);
-- 
GitLab


From 02b92adbe33e6dbd15dc6e32540b22f47c4ff0a2 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Date: Thu, 18 Apr 2019 15:27:26 +0200
Subject: [PATCH 1318/1861] drm/sun4i: Set device driver data at bind time for
 use in unbind

Our sun4i_drv_unbind gets the drm device using dev_get_drvdata.
However, that driver data is never set in sun4i_drv_bind.

Set it there to avoid getting a NULL pointer at unbind time.

Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-3-paul.kocialkowski@bootlin.com
---
 drivers/gpu/drm/sun4i/sun4i_drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index c5871b15714f6..8abaabde08ab5 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -86,6 +86,8 @@ static int sun4i_drv_bind(struct device *dev)
 		ret = -ENOMEM;
 		goto free_drm;
 	}
+
+	dev_set_drvdata(dev, drm);
 	drm->dev_private = drv;
 	INIT_LIST_HEAD(&drv->frontend_list);
 	INIT_LIST_HEAD(&drv->engine_list);
-- 
GitLab


From f5a9ed867c83875546c9aadd4ed8e785e9adcc3c Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Date: Thu, 18 Apr 2019 15:27:27 +0200
Subject: [PATCH 1319/1861] drm/sun4i: Fix component unbinding and component
 master deletion

For our component-backed driver to be properly removed, we need to
delete the component master in sun4i_drv_remove and make sure to call
component_unbind_all in the master's unbind so that all components are
unbound when the master is.

Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190418132727.5128-4-paul.kocialkowski@bootlin.com
---
 drivers/gpu/drm/sun4i/sun4i_drv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 8abaabde08ab5..843b86661833e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -151,6 +151,8 @@ static void sun4i_drv_unbind(struct device *dev)
 	drm_mode_config_cleanup(drm);
 	of_reserved_mem_device_release(dev);
 	drm_dev_put(drm);
+
+	component_unbind_all(dev, NULL);
 }
 
 static const struct component_master_ops sun4i_drv_master_ops = {
@@ -399,6 +401,8 @@ static int sun4i_drv_probe(struct platform_device *pdev)
 
 static int sun4i_drv_remove(struct platform_device *pdev)
 {
+	component_master_del(&pdev->dev, &sun4i_drv_master_ops);
+
 	return 0;
 }
 
-- 
GitLab


From e757e7fa3a93afa54a1bf31953f458b0005e0910 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Tue, 16 Apr 2019 12:23:05 -0400
Subject: [PATCH 1320/1861] PM / Domains: remove unnecessary unlikely()

WARN_ON() already contains an unlikely(), so it's not necessary to use
unlikely.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 96a6dc9d305c8..598a4e02aee19 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -391,11 +391,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
 	if (unlikely(!genpd->set_performance_state))
 		return -EINVAL;
 
-	if (unlikely(!dev->power.subsys_data ||
-		     !dev->power.subsys_data->domain_data)) {
-		WARN_ON(1);
+	if (WARN_ON(!dev->power.subsys_data ||
+		     !dev->power.subsys_data->domain_data))
 		return -EINVAL;
-	}
 
 	genpd_lock(genpd);
 
-- 
GitLab


From 0fcc2bdc8aff6e7feb3222930edb78b4b820cd3e Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 2 Apr 2019 13:30:37 +0300
Subject: [PATCH 1321/1861] device property: Add
 fwnode_graph_get_endpoint_by_id()

fwnode_graph_get_endpoint_by_id() is intended for obtaining local
endpoints by a given local port.

fwnode_graph_get_endpoint_by_id() is slightly different from its OF
counterpart, of_graph_get_endpoint_by_regs(): instead of using -1 as
a value to indicate that a port or an endpoint number does not matter,
it uses flags to look for equal or greater endpoint. The port number
is always fixed. It also returns only remote endpoints that belong
to an available device, a behaviour that can be turned off with a flag.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 75 ++++++++++++++++++++++++++++++++++++++++
 include/linux/property.h | 18 ++++++++++
 2 files changed, 93 insertions(+)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 8b91ab380d140..348b37e64944c 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -983,6 +983,81 @@ fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port_id,
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_node);
 
+/**
+ * fwnode_graph_get_endpoint_by_id - get endpoint by port and endpoint numbers
+ * @fwnode: parent fwnode_handle containing the graph
+ * @port: identifier of the port node
+ * @endpoint: identifier of the endpoint node under the port node
+ * @flags: fwnode lookup flags
+ *
+ * Return the fwnode handle of the local endpoint corresponding the port and
+ * endpoint IDs or NULL if not found.
+ *
+ * If FWNODE_GRAPH_ENDPOINT_NEXT is passed in @flags and the specified endpoint
+ * has not been found, look for the closest endpoint ID greater than the
+ * specified one and return the endpoint that corresponds to it, if present.
+ *
+ * Do not return endpoints that belong to disabled devices, unless
+ * FWNODE_GRAPH_DEVICE_DISABLED is passed in @flags.
+ *
+ * The returned endpoint needs to be released by calling fwnode_handle_put() on
+ * it when it is not needed any more.
+ */
+struct fwnode_handle *
+fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode,
+				u32 port, u32 endpoint, unsigned long flags)
+{
+	struct fwnode_handle *ep = NULL, *best_ep = NULL;
+	unsigned int best_ep_id = 0;
+	bool endpoint_next = flags & FWNODE_GRAPH_ENDPOINT_NEXT;
+	bool enabled_only = !(flags & FWNODE_GRAPH_DEVICE_DISABLED);
+
+	while ((ep = fwnode_graph_get_next_endpoint(fwnode, ep))) {
+		struct fwnode_endpoint fwnode_ep = { 0 };
+		int ret;
+
+		if (enabled_only) {
+			struct fwnode_handle *dev_node;
+			bool available;
+
+			dev_node = fwnode_graph_get_remote_port_parent(ep);
+			available = fwnode_device_is_available(dev_node);
+			fwnode_handle_put(dev_node);
+			if (!available)
+				continue;
+		}
+
+		ret = fwnode_graph_parse_endpoint(ep, &fwnode_ep);
+		if (ret < 0)
+			continue;
+
+		if (fwnode_ep.port != port)
+			continue;
+
+		if (fwnode_ep.id == endpoint)
+			return ep;
+
+		if (!endpoint_next)
+			continue;
+
+		/*
+		 * If the endpoint that has just been found is not the first
+		 * matching one and the ID of the one found previously is closer
+		 * to the requested endpoint ID, skip it.
+		 */
+		if (fwnode_ep.id < endpoint ||
+		    (best_ep && best_ep_id < fwnode_ep.id))
+			continue;
+
+		fwnode_handle_put(best_ep);
+		best_ep = fwnode_handle_get(ep);
+		best_ep_id = fwnode_ep.id;
+	}
+
+	return best_ep;
+}
+EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_by_id);
+
 /**
  * fwnode_graph_parse_endpoint - parse common endpoint node properties
  * @fwnode: pointer to endpoint fwnode_handle
diff --git a/include/linux/property.h b/include/linux/property.h
index 65d3420dd5d18..a29369c89e6ef 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -13,6 +13,7 @@
 #ifndef _LINUX_PROPERTY_H_
 #define _LINUX_PROPERTY_H_
 
+#include <linux/bits.h>
 #include <linux/fwnode.h>
 #include <linux/types.h>
 
@@ -304,6 +305,23 @@ struct fwnode_handle *
 fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port,
 			     u32 endpoint);
 
+/*
+ * Fwnode lookup flags
+ *
+ * @FWNODE_GRAPH_ENDPOINT_NEXT: In the case of no exact match, look for the
+ *				closest endpoint ID greater than the specified
+ *				one.
+ * @FWNODE_GRAPH_DEVICE_DISABLED: That the device to which the remote
+ *				  endpoint of the given endpoint belongs to,
+ *				  may be disabled.
+ */
+#define FWNODE_GRAPH_ENDPOINT_NEXT	BIT(0)
+#define FWNODE_GRAPH_DEVICE_DISABLED	BIT(1)
+
+struct fwnode_handle *
+fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode,
+				u32 port, u32 endpoint, unsigned long flags);
+
 #define fwnode_graph_for_each_endpoint(fwnode, child)			\
 	for (child = NULL;						\
 	     (child = fwnode_graph_get_next_endpoint(fwnode, child)); )
-- 
GitLab


From c8afd03486c26accdda4846e5561aa3f8e862a9d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 18 Apr 2019 13:39:33 +0200
Subject: [PATCH 1322/1861] ACPI / LPSS: Use acpi_lpss_* instead of
 acpi_subsys_* functions for hibernate

Commit 48402cee6889 ("ACPI / LPSS: Resume BYT/CHT I2C controllers from
resume_noirq") makes acpi_lpss_{suspend_late,resume_early}() bail early
on BYT/CHT as resume_from_noirq is set.

This means that on resume from hibernate dw_i2c_plat_resume() doesn't get
called by the restore_early callback, acpi_lpss_resume_early(). Instead it
should be called by the restore_noirq callback matching how things are done
when resume_from_noirq is set and we are doing a regular resume.

Change the restore_noirq callback to acpi_lpss_resume_noirq so that
dw_i2c_plat_resume() gets properly called when resume_from_noirq is set
and we are resuming from hibernate.

Likewise also change the poweroff_noirq callback so that
dw_i2c_plat_suspend gets called properly.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202139
Fixes: 48402cee6889 ("ACPI / LPSS: Resume BYT/CHT I2C controllers from resume_noirq")
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Cc: 4.20+ <stable@vger.kernel.org> # 4.20+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 1e2a10a06b9dc..cf768608437e1 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -1142,8 +1142,8 @@ static struct dev_pm_domain acpi_lpss_pm_domain = {
 		.thaw_noirq = acpi_subsys_thaw_noirq,
 		.poweroff = acpi_subsys_suspend,
 		.poweroff_late = acpi_lpss_suspend_late,
-		.poweroff_noirq = acpi_subsys_suspend_noirq,
-		.restore_noirq = acpi_subsys_resume_noirq,
+		.poweroff_noirq = acpi_lpss_suspend_noirq,
+		.restore_noirq = acpi_lpss_resume_noirq,
 		.restore_early = acpi_lpss_resume_early,
 #endif
 		.runtime_suspend = acpi_lpss_runtime_suspend,
-- 
GitLab


From 738a7832d21e3d911fcddab98ce260b79010b461 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Thu, 18 Apr 2019 12:18:39 +0200
Subject: [PATCH 1323/1861] signal: use fdget() since we don't allow O_PATH

As stated in the original commit for pidfd_send_signal() we don't allow
to signal processes through O_PATH file descriptors since it is
semantically equivalent to a write on the pidfd.

We already correctly error out right now and return EBADF if an O_PATH
fd is passed.  This is because we use file->f_op to detect whether a
pidfd is passed and O_PATH fds have their file->f_op set to empty_fops
in do_dentry_open() and thus fail the test.

Thus, there is no regression.  It's just semantically correct to use
fdget() and return an error right from there instead of taking a
reference and returning an error later.

Signed-off-by: Christian Brauner <christian@brauner.io>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jann Horn <jann@thejh.net>
Cc: David Howells <dhowells@redhat.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index f98448cf2defb..227ba170298e5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3581,7 +3581,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
 	if (flags)
 		return -EINVAL;
 
-	f = fdget_raw(pidfd);
+	f = fdget(pidfd);
 	if (!f.file)
 		return -EBADF;
 
-- 
GitLab


From c09d65d9eab69985c75f98ed64541229f6fa9aa6 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Tue, 16 Apr 2019 20:36:34 +0300
Subject: [PATCH 1324/1861] KVM: x86: Consider LAPIC TSC-Deadline timer expired
 if deadline too short

If guest sets MSR_IA32_TSCDEADLINE to value such that in host
time-domain it's shorter than lapic_timer_advance_ns, we can
reach a case that we call hrtimer_start() with expiration time set at
the past.

Because lapic_timer.timer is init with HRTIMER_MODE_ABS_PINNED, it
is not allowed to run in softirq and therefore will never expire.

To avoid such a scenario, verify that deadline expiration time is set on
host time-domain further than (now + lapic_timer_advance_ns).

A future patch can also consider adding a min_timer_deadline_ns module parameter,
similar to min_timer_period_us to avoid races that amount of ns it takes
to run logic could still call hrtimer_start() with expiration timer set
at the past.

Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9bf70cf845648..6cb990dbc15a0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1542,9 +1542,12 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 
 	now = ktime_get();
 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
-	if (likely(tscdeadline > guest_tsc)) {
-		ns = (tscdeadline - guest_tsc) * 1000000ULL;
-		do_div(ns, this_tsc_khz);
+
+	ns = (tscdeadline - guest_tsc) * 1000000ULL;
+	do_div(ns, this_tsc_khz);
+
+	if (likely(tscdeadline > guest_tsc) &&
+	    likely(ns > lapic_timer_advance_ns)) {
 		expire = ktime_add_ns(now, ns);
 		expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
 		hrtimer_start(&apic->lapic_timer.timer,
-- 
GitLab


From da66761c2d93a46270d69001abb5692717495a68 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 20 Mar 2019 18:43:20 +0100
Subject: [PATCH 1325/1861] x86: kvm: hyper-v: deal with buggy TLB flush
 requests from WS2012

It was reported that with some special Multi Processor Group configuration,
e.g:
 bcdedit.exe /set groupsize 1
 bcdedit.exe /set maxgroup on
 bcdedit.exe /set groupaware on
for a 16-vCPU guest WS2012 shows BSOD on boot when PV TLB flush mechanism
is in use.

Tracing kvm_hv_flush_tlb immediately reveals the issue:

 kvm_hv_flush_tlb: processor_mask 0x0 address_space 0x0 flags 0x2

The only flag set in this request is HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES,
however, processor_mask is 0x0 and no HV_FLUSH_ALL_PROCESSORS is specified.
We don't flush anything and apparently it's not what Windows expects.

TLFS doesn't say anything about such requests and newer Windows versions
seem to be unaffected. This all feels like a WS2012 bug, which is, however,
easy to workaround in KVM: let's flush everything when we see an empty
flush request, over-flushing doesn't hurt.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 421899f6ad7bf..cc24b3a32c449 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1371,7 +1371,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
 
 		valid_bank_mask = BIT_ULL(0);
 		sparse_banks[0] = flush.processor_mask;
-		all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+
+		/*
+		 * Work around possible WS2012 bug: it sends hypercalls
+		 * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
+		 * while also expecting us to flush something and crashing if
+		 * we don't. Let's treat processor_mask == 0 same as
+		 * HV_FLUSH_ALL_PROCESSORS.
+		 */
+		all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
+			flush.processor_mask == 0;
 	} else {
 		if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
 					    sizeof(flush_ex))))
-- 
GitLab


From 57bf67e73ce9bcce2258890f5abf2adf5f619f1a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Wed, 17 Apr 2019 10:15:31 -0700
Subject: [PATCH 1326/1861] KVM: lapic: Disable timer advancement if adaptive
 tuning goes haywire

To minimize the latency of timer interrupts as observed by the guest,
KVM adjusts the values it programs into the host timers to account for
the host's overhead of programming and handling the timer event.  Now
that the timer advancement is automatically tuned during runtime, it's
effectively unbounded by default, e.g. if KVM is running as L1 the
advancement can measure in hundreds of milliseconds.

Disable timer advancement if adaptive tuning yields an advancement of
more than 5000ns, as large advancements can break reasonable assumptions
of the guest, e.g. that a timer configured to fire after 1ms won't
arrive on the next instruction.  Although KVM busy waits to mitigate the
case of a timer event arriving too early, complications can arise when
shifting the interrupt too far, e.g. kvm-unit-test's vmx.interrupt test
will fail when its "host" exits on interrupts as KVM may inject the INTR
before the guest executes STI+HLT.   Arguably the unit test is "broken"
in the sense that delaying a timer interrupt by 1ms doesn't technically
guarantee the interrupt will arrive after STI+HLT, but it's a reasonable
assumption that KVM should support.

Furthermore, an unbounded advancement also effectively unbounds the time
spent busy waiting, e.g. if the guest programs a timer with a very large
delay.

5000ns is a somewhat arbitrary threshold.  When running on bare metal,
which is the intended use case, timer advancement is expected to be in
the general vicinity of 1000ns.  5000ns is high enough that false
positives are unlikely, while not being so high as to negatively affect
the host's performance/stability.

Note, a future patch will enable userspace to disable KVM's adaptive
tuning, which will allow priveleged userspace will to specifying an
advancement value in excess of this arbitrary threshold in order to
satisfy an abnormal use case.

Cc: Liran Alon <liran.alon@oracle.com>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: stable@vger.kernel.org
Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6cb990dbc15a0..bbe1402309e3b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1522,6 +1522,10 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 		}
 		if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
 			lapic_timer_advance_adjust_done = true;
+		if (unlikely(lapic_timer_advance_ns > 5000)) {
+			lapic_timer_advance_ns = 0;
+			lapic_timer_advance_adjust_done = true;
+		}
 	}
 }
 
-- 
GitLab


From 39497d7660d9866a47a2dc9055672358da57ad3d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Wed, 17 Apr 2019 10:15:32 -0700
Subject: [PATCH 1327/1861] KVM: lapic: Track lapic timer advance per vCPU

Automatically adjusting the globally-shared timer advancement could
corrupt the timer, e.g. if multiple vCPUs are concurrently adjusting
the advancement value.  That could be partially fixed by using a local
variable for the arithmetic, but it would still be susceptible to a
race when setting timer_advance_adjust_done.

And because virtual_tsc_khz and tsc_scaling_ratio are per-vCPU, the
correct calibration for a given vCPU may not apply to all vCPUs.

Furthermore, lapic_timer_advance_ns is marked __read_mostly, which is
effectively violated when finding a stable advancement takes an extended
amount of timer.

Opportunistically change the definition of lapic_timer_advance_ns to
a u32 so that it matches the style of struct kvm_timer.  Explicitly
pass the param to kvm_create_lapic() so that it doesn't have to be
exposed to lapic.c, thus reducing the probability of unintentionally
using the global value instead of the per-vCPU value.

Cc: Liran Alon <liran.alon@oracle.com>
Cc: Wanpeng Li <wanpengli@tencent.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Cc: stable@vger.kernel.org
Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c   | 36 +++++++++++++++++++-----------------
 arch/x86/kvm/lapic.h   |  4 +++-
 arch/x86/kvm/vmx/vmx.c |  4 +++-
 arch/x86/kvm/x86.c     |  7 +++----
 arch/x86/kvm/x86.h     |  2 --
 5 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbe1402309e3b..7decd58c9cea6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,7 +70,6 @@
 #define APIC_BROADCAST			0xFF
 #define X2APIC_BROADCAST		0xFFFFFFFFul
 
-static bool lapic_timer_advance_adjust_done = false;
 #define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
 /* step-by-step approximation to mitigate fluctuation */
 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@@ -1485,6 +1484,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
 void wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
 	u64 guest_tsc, tsc_deadline, ns;
 
 	if (!lapic_in_kernel(vcpu))
@@ -1504,34 +1504,36 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
 	if (guest_tsc < tsc_deadline)
 		__delay(min(tsc_deadline - guest_tsc,
-			nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
+			nsec_to_cycles(vcpu, timer_advance_ns)));
 
-	if (!lapic_timer_advance_adjust_done) {
+	if (!apic->lapic_timer.timer_advance_adjust_done) {
 		/* too early */
 		if (guest_tsc < tsc_deadline) {
 			ns = (tsc_deadline - guest_tsc) * 1000000ULL;
 			do_div(ns, vcpu->arch.virtual_tsc_khz);
-			lapic_timer_advance_ns -= min((unsigned int)ns,
-				lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+			timer_advance_ns -= min((u32)ns,
+				timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
 		} else {
 		/* too late */
 			ns = (guest_tsc - tsc_deadline) * 1000000ULL;
 			do_div(ns, vcpu->arch.virtual_tsc_khz);
-			lapic_timer_advance_ns += min((unsigned int)ns,
-				lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+			timer_advance_ns += min((u32)ns,
+				timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
 		}
 		if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
-			lapic_timer_advance_adjust_done = true;
-		if (unlikely(lapic_timer_advance_ns > 5000)) {
-			lapic_timer_advance_ns = 0;
-			lapic_timer_advance_adjust_done = true;
+			apic->lapic_timer.timer_advance_adjust_done = true;
+		if (unlikely(timer_advance_ns > 5000)) {
+			timer_advance_ns = 0;
+			apic->lapic_timer.timer_advance_adjust_done = true;
 		}
+		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 	}
 }
 
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
 {
-	u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
+	struct kvm_timer *ktimer = &apic->lapic_timer;
+	u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
 	u64 ns = 0;
 	ktime_t expire;
 	struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1551,11 +1553,10 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 	do_div(ns, this_tsc_khz);
 
 	if (likely(tscdeadline > guest_tsc) &&
-	    likely(ns > lapic_timer_advance_ns)) {
+	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
 		expire = ktime_add_ns(now, ns);
-		expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
-		hrtimer_start(&apic->lapic_timer.timer,
-				expire, HRTIMER_MODE_ABS_PINNED);
+		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
+		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
 	} else
 		apic_timer_expired(apic);
 
@@ -2262,7 +2263,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 		return HRTIMER_NORESTART;
 }
 
-int kvm_create_lapic(struct kvm_vcpu *vcpu)
+int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns)
 {
 	struct kvm_lapic *apic;
 
@@ -2286,6 +2287,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_ABS_PINNED);
 	apic->lapic_timer.timer.function = apic_timer_fn;
+	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 
 	/*
 	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ff6ef9c3d760c..3e97f8a68967b 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,8 +31,10 @@ struct kvm_timer {
 	u32 timer_mode_mask;
 	u64 tscdeadline;
 	u64 expired_tscdeadline;
+	u32 timer_advance_ns;
 	atomic_t pending;			/* accumulated triggered timers */
 	bool hv_timer_in_use;
+	bool timer_advance_adjust_done;
 };
 
 struct kvm_lapic {
@@ -62,7 +64,7 @@ struct kvm_lapic {
 
 struct dest_map;
 
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
+int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns);
 void kvm_free_lapic(struct kvm_vcpu *vcpu);
 
 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b4e7d645275a2..bcdd69d90a8c3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7032,6 +7032,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
 {
 	struct vcpu_vmx *vmx;
 	u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
+	struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
 
 	if (kvm_mwait_in_guest(vcpu->kvm))
 		return -EOPNOTSUPP;
@@ -7040,7 +7041,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
 	tscl = rdtsc();
 	guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
 	delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
-	lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
+	lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
+						    ktimer->timer_advance_ns);
 
 	if (delta_tsc > lapic_timer_advance_cycles)
 		delta_tsc -= lapic_timer_advance_cycles;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0d1fc80ac5a8..aa26a3cfc765d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -137,9 +137,8 @@ static u32 __read_mostly tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 
 /* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 1000;
+static u32 __read_mostly lapic_timer_advance_ns = 1000;
 module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
-EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
 
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
@@ -7873,7 +7872,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	trace_kvm_entry(vcpu->vcpu_id);
-	if (lapic_timer_advance_ns)
+	if (vcpu->arch.apic->lapic_timer.timer_advance_ns)
 		wait_lapic_expire(vcpu);
 	guest_enter_irqoff();
 
@@ -9061,7 +9060,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	if (irqchip_in_kernel(vcpu->kvm)) {
 		vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
-		r = kvm_create_lapic(vcpu);
+		r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
 		if (r < 0)
 			goto fail_mmu_destroy;
 	} else
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index aedc5d0d4989b..534d3f28bb01a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -294,8 +294,6 @@ extern u64 kvm_supported_xcr0(void);
 
 extern unsigned int min_timer_period_us;
 
-extern unsigned int lapic_timer_advance_ns;
-
 extern bool enable_vmware_backdoor;
 
 extern struct static_key kvm_no_apic_vcpu;
-- 
GitLab


From c3941d9e0ccd48920e4811f133235b3597e5310b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Wed, 17 Apr 2019 10:15:33 -0700
Subject: [PATCH 1328/1861] KVM: lapic: Allow user to disable adaptive tuning
 of timer advancement

The introduction of adaptive tuning of lapic timer advancement did not
allow for the scenario where userspace would want to disable adaptive
tuning but still employ timer advancement, e.g. for testing purposes or
to handle a use case where adaptive tuning is unable to settle on a
suitable time.  This is epecially pertinent now that KVM places a hard
threshold on the maximum advancment time.

Rework the timer semantics to accept signed values, with a value of '-1'
being interpreted as "use adaptive tuning with KVM's internal default",
and any other value being used as an explicit advancement time, e.g. a
time of '0' effectively disables advancement.

Note, this does not completely restore the original behavior of
lapic_timer_advance_ns.  Prior to tracking the advancement per vCPU,
which is necessary to support autotuning, userspace could adjust
lapic_timer_advance_ns for *running* vCPU.  With per-vCPU tracking, the
module params are snapshotted at vCPU creation, i.e. applying a new
advancement effectively requires restarting a VM.

Dynamically updating a running vCPU is possible, e.g. a helper could be
added to retrieve the desired delay, choosing between the global module
param and the per-VCPU value depending on whether or not auto-tuning is
(globally) enabled, but introduces a great deal of complexity.  The
wrapper itself is not complex, but understanding and documenting the
effects of dynamically toggling auto-tuning and/or adjusting the timer
advancement is nigh impossible since the behavior would be dependent on
KVM's implementation as well as compiler optimizations.  In other words,
providing stable behavior would require extremely careful consideration
now and in the future.

Given that the expected use of a manually-tuned timer advancement is to
"tune once, run many", use the vastly simpler approach of recognizing
changes to the module params only when creating a new vCPU.

Cc: Liran Alon <liran.alon@oracle.com>
Cc: Wanpeng Li <wanpengli@tencent.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Cc: stable@vger.kernel.org
Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 11 +++++++++--
 arch/x86/kvm/lapic.h |  2 +-
 arch/x86/kvm/x86.c   |  9 +++++++--
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 7decd58c9cea6..12f2f7dc121fb 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2263,7 +2263,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 		return HRTIMER_NORESTART;
 }
 
-int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns)
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 {
 	struct kvm_lapic *apic;
 
@@ -2287,7 +2287,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns)
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_ABS_PINNED);
 	apic->lapic_timer.timer.function = apic_timer_fn;
-	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+	if (timer_advance_ns == -1) {
+		apic->lapic_timer.timer_advance_ns = 1000;
+		apic->lapic_timer.timer_advance_adjust_done = false;
+	} else {
+		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+		apic->lapic_timer.timer_advance_adjust_done = true;
+	}
+
 
 	/*
 	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 3e97f8a68967b..d6d049ba30452 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -64,7 +64,7 @@ struct kvm_lapic {
 
 struct dest_map;
 
-int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns);
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
 void kvm_free_lapic(struct kvm_vcpu *vcpu);
 
 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aa26a3cfc765d..9482cb36b92ac 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,8 +136,13 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
 static u32 __read_mostly tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 
-/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-static u32 __read_mostly lapic_timer_advance_ns = 1000;
+/*
+ * lapic timer advance (tscdeadline mode only) in nanoseconds.  '-1' enables
+ * adaptive tuning starting from default advancment of 1000ns.  '0' disables
+ * advancement entirely.  Any other value is used as-is and disables adaptive
+ * tuning, i.e. allows priveleged userspace to set an exact advancement time.
+ */
+static int __read_mostly lapic_timer_advance_ns = -1;
 module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
 
 static bool __read_mostly vector_hashing = true;
-- 
GitLab


From b6aa57c69cb26ea0160c51f7cf45f1af23542686 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Wed, 17 Apr 2019 10:15:34 -0700
Subject: [PATCH 1329/1861] KVM: lapic: Convert guest TSC to host time domain
 if necessary

To minimize the latency of timer interrupts as observed by the guest,
KVM adjusts the values it programs into the host timers to account for
the host's overhead of programming and handling the timer event.  In
the event that the adjustments are too aggressive, i.e. the timer fires
earlier than the guest expects, KVM busy waits immediately prior to
entering the guest.

Currently, KVM manually converts the delay from nanoseconds to clock
cycles.  But, the conversion is done in the guest's time domain, while
the delay occurs in the host's time domain.  This is perfectly ok when
the guest and host are using the same TSC ratio, but if the guest is
using a different ratio then the delay may not be accurate and could
wait too little or too long.

When the guest is not using the host's ratio, convert the delay from
guest clock cycles to host nanoseconds and use ndelay() instead of
__delay() to provide more accurate timing.  Because converting to
nanoseconds is relatively expensive, e.g. requires division and more
multiplication ops, continue using __delay() directly when guest and
host TSCs are running at the same ratio.

Cc: Liran Alon <liran.alon@oracle.com>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: stable@vger.kernel.org
Fixes: 3b8a5df6c4dc6 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 12f2f7dc121fb..e0fa6fc0b2d89 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1481,6 +1481,26 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
 	return false;
 }
 
+static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
+{
+	u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
+
+	/*
+	 * If the guest TSC is running at a different ratio than the host, then
+	 * convert the delay to nanoseconds to achieve an accurate delay.  Note
+	 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
+	 * always for VMX enabled hardware.
+	 */
+	if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
+		__delay(min(guest_cycles,
+			nsec_to_cycles(vcpu, timer_advance_ns)));
+	} else {
+		u64 delay_ns = guest_cycles * 1000000ULL;
+		do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
+		ndelay(min_t(u32, delay_ns, timer_advance_ns));
+	}
+}
+
 void wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1501,10 +1521,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
 	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
 
-	/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
 	if (guest_tsc < tsc_deadline)
-		__delay(min(tsc_deadline - guest_tsc,
-			nsec_to_cycles(vcpu, timer_advance_ns)));
+		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 
 	if (!apic->lapic_timer.timer_advance_adjust_done) {
 		/* too early */
-- 
GitLab


From ff8acf929014b7f87315588e0daf8597c8aa9d1c Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Wed, 17 Apr 2019 00:21:21 -0700
Subject: [PATCH 1330/1861] arm64: futex: Restore oldval initialization to work
 around buggy compilers

Commit 045afc24124d ("arm64: futex: Fix FUTEX_WAKE_OP atomic ops with
non-zero result value") removed oldval's zero initialization in
arch_futex_atomic_op_inuser because it is not necessary. Unfortunately,
Android's arm64 GCC 4.9.4 [1] does not agree:

../kernel/futex.c: In function 'do_futex':
../kernel/futex.c:1658:17: warning: 'oldval' may be used uninitialized
in this function [-Wmaybe-uninitialized]
   return oldval == cmparg;
                 ^
In file included from ../kernel/futex.c:73:0:
../arch/arm64/include/asm/futex.h:53:6: note: 'oldval' was declared here
  int oldval, ret, tmp;
      ^

GCC fails to follow that when ret is non-zero, futex_atomic_op_inuser
returns right away, avoiding the uninitialized use that it claims.
Restoring the zero initialization works around this issue.

[1]: https://android.googlesource.com/platform/prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/

Cc: stable@vger.kernel.org
Fixes: 045afc24124d ("arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value")
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/futex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index e1d95f08f8e12..c7e1a7837706c 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -50,7 +50,7 @@ do {									\
 static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
 {
-	int oldval, ret, tmp;
+	int oldval = 0, ret, tmp;
 	u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
 
 	pagefault_disable();
-- 
GitLab


From f476b3f809fa02f47af6333ed63715058c3fc348 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 18 Apr 2019 07:14:13 +0000
Subject: [PATCH 1331/1861] mlxsw: spectrum: Put MC TCs into DWRR mode

Both Spectrum-1 and Spectrum-2 chips are currently configured such that
pairs of TC n (which is used for UC traffic) and TC n+8 (which is used
for MC traffic) are feeding into the same subgroup. Strict
prioritization is configured between the two TCs, and by enabling
MC-aware mode on the switch, the lower-numbered (UC) TCs are favored
over the higher-numbered (MC) TCs.

On Spectrum-2 however, there is an issue in configuration of the
MC-aware mode. As a result, MC traffic is prioritized over UC traffic.
To work around the issue, configure the MC TCs with DWRR mode (while
keeping the UC TCs in strict mode).

With this patch, the multicast-unicast arbitration results in the same
behavior on both Spectrum-1 and Spectrum-2 chips.

Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 9eb63300c1d3a..3745ea194632c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3316,7 +3316,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
 					    MLXSW_REG_QEEC_HIERARCY_TC,
 					    i + 8, i,
-					    false, 0);
+					    true, 100);
 		if (err)
 			return err;
 	}
-- 
GitLab


From 1ab3030193d25878b3b1409060e1e0a879800c95 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 18 Apr 2019 07:14:14 +0000
Subject: [PATCH 1332/1861] mlxsw: pci: Reincrease PCI reset timeout

During driver initialization the driver sends a reset to the device and
waits for the firmware to signal that it is ready to continue.

Commit d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout")
increased the timeout to 13 seconds due to longer PHY calibration in
Spectrum-2 compared to Spectrum-1.

Recently it became apparent that this timeout is too short and therefore
this patch increases it again to a safer limit that will be reduced in
the future.

Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC")
Fixes: d2f372ba0914 ("mlxsw: pci: Increase PCI SW reset timeout")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index ffee38e36ce89..8648ca1712543 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -27,7 +27,7 @@
 
 #define MLXSW_PCI_SW_RESET			0xF0010
 #define MLXSW_PCI_SW_RESET_RST_BIT		BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	13000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	20000
 #define MLXSW_PCI_SW_RESET_WAIT_MSECS		100
 #define MLXSW_PCI_FW_READY			0xA1844
 #define MLXSW_PCI_FW_READY_MASK			0xFFFF
-- 
GitLab


From 151f0dddbbfe4c35c9c5b64873115aafd436af9d Mon Sep 17 00:00:00 2001
From: Amit Cohen <amitc@mellanox.com>
Date: Thu, 18 Apr 2019 07:14:16 +0000
Subject: [PATCH 1333/1861] mlxsw: spectrum: Fix autoneg status in ethtool

If link is down and autoneg is set to on/off, the status in ethtool does
not change.

The reason is when the link is down the function returns with zero
before changing autoneg value.

Move the checking of link state (up/down) to be performed after setting
autoneg value, in order to be sure that autoneg will change in any case.

Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC")
Signed-off-by: Amit Cohen <amitc@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3745ea194632c..6b8aa3761899b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3126,11 +3126,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	if (err)
 		return err;
 
+	mlxsw_sp_port->link.autoneg = autoneg;
+
 	if (!netif_running(dev))
 		return 0;
 
-	mlxsw_sp_port->link.autoneg = autoneg;
-
 	mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
 	mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
 
-- 
GitLab


From d5f6db353829fe3867bbf9cd73fd8d631e2346f1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 18 Apr 2019 11:39:18 +0100
Subject: [PATCH 1334/1861] net: ipv6: addrlabel: fix spelling mistake
 "requewst" -> "request"

There is a spelling mistake in a NL_SET_ERR_MSG_MOD error message,
fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrlabel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index d43d076c98f5d..1766325423b5d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -476,7 +476,7 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
 	}
 
 	if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
-		NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
+		NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request");
 		return -EINVAL;
 	}
 
-- 
GitLab


From b1546edcf2aab710a5afc98d65c948a4bfac0353 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 18 Apr 2019 22:47:13 +0800
Subject: [PATCH 1335/1861] sched/core: Make some functions static

Fix these sparse warnings:

  kernel/sched/core.c:6577:11: warning: symbol 'min_cfs_quota_period' was not declared. Should it be static?
  kernel/sched/core.c:6657:5: warning: symbol 'tg_set_cfs_quota' was not declared. Should it be static?
  kernel/sched/core.c:6670:6: warning: symbol 'tg_get_cfs_quota' was not declared. Should it be static?
  kernel/sched/core.c:6683:5: warning: symbol 'tg_set_cfs_period' was not declared. Should it be static?
  kernel/sched/core.c:6693:6: warning: symbol 'tg_get_cfs_period' was not declared. Should it be static?
  kernel/sched/fair.c:2596:6: warning: symbol 'task_tick_numa' was not declared. Should it be static?

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190418144713.34332-1-yuehaibing@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 10 +++++-----
 kernel/sched/fair.c |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f4838b78b9f90..8b64ef0d5589e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6522,7 +6522,7 @@ static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
 static DEFINE_MUTEX(cfs_constraints_mutex);
 
 const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
-const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
+static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 
@@ -6602,7 +6602,7 @@ out_unlock:
 	return ret;
 }
 
-int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
+static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
 {
 	u64 quota, period;
 
@@ -6615,7 +6615,7 @@ int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
 	return tg_set_cfs_bandwidth(tg, period, quota);
 }
 
-long tg_get_cfs_quota(struct task_group *tg)
+static long tg_get_cfs_quota(struct task_group *tg)
 {
 	u64 quota_us;
 
@@ -6628,7 +6628,7 @@ long tg_get_cfs_quota(struct task_group *tg)
 	return quota_us;
 }
 
-int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
+static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
 {
 	u64 quota, period;
 
@@ -6638,7 +6638,7 @@ int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
 	return tg_set_cfs_bandwidth(tg, period, quota);
 }
 
-long tg_get_cfs_period(struct task_group *tg)
+static long tg_get_cfs_period(struct task_group *tg)
 {
 	u64 cfs_period_us;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e5b100b6ba4ea..13bafe350abf9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2593,7 +2593,7 @@ out:
 /*
  * Drive the periodic memory faults..
  */
-void task_tick_numa(struct rq *rq, struct task_struct *curr)
+static void task_tick_numa(struct rq *rq, struct task_struct *curr)
 {
 	struct callback_head *work = &curr->numa_work;
 	u64 period, now;
-- 
GitLab


From a7cf2cc3cd3622eae9d5619cdde56b4462398c7f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 18 Apr 2019 18:03:50 +0100
Subject: [PATCH 1336/1861] firestream: fix spelling mistake "tramsitted" ->
 "transmitted"

There is a spelling mistake in a debug message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/firestream.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 11e1663bdc4de..b2c06da4f62e3 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1646,7 +1646,7 @@ static irqreturn_t fs_irq (int irq, void *dev_id)
 	}
 
 	if (status & ISR_TBRQ_W) {
-		fs_dprintk (FS_DEBUG_IRQ, "Data tramsitted!\n");
+		fs_dprintk (FS_DEBUG_IRQ, "Data transmitted!\n");
 		process_txdone_queue (dev, &dev->tx_relq);
 	}
 
-- 
GitLab


From e0c1d14a1a3211dccf0540a6703ffbd5d2a75bdb Mon Sep 17 00:00:00 2001
From: Su Bao Cheng <baocheng.su@siemens.com>
Date: Thu, 18 Apr 2019 11:14:56 +0200
Subject: [PATCH 1337/1861] stmmac: pci: Adjust IOT2000 matching

Since there are more IOT2040 variants with identical hardware but
different asset tags, the asset tag matching should be adjusted to
support them.

For the board name "SIMATIC IOT2000", currently there are 2 types of
hardware, IOT2020 and IOT2040. The IOT2020 is identified by its unique
asset tag. Match on it first. If we then match on the board name only,
we will catch all IOT2040 variants. In the future there will be no other
devices with the "SIMATIC IOT2000" DMI board name but different
hardware.

Signed-off-by: Su Bao Cheng <baocheng.su@siemens.com>
Reviewed-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index d819e8eaba122..cc1e887e47b50 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -159,6 +159,12 @@ static const struct dmi_system_id quark_pci_dmi[] = {
 		},
 		.driver_data = (void *)&galileo_stmmac_dmi_data,
 	},
+	/*
+	 * There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040.
+	 * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
+	 * has only one pci network device while other asset tags are
+	 * for IOT2040 which has two.
+	 */
 	{
 		.matches = {
 			DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
@@ -170,8 +176,6 @@ static const struct dmi_system_id quark_pci_dmi[] = {
 	{
 		.matches = {
 			DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
-			DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG,
-					"6ES7647-0AA00-1YA2"),
 		},
 		.driver_data = (void *)&iot2040_stmmac_dmi_data,
 	},
-- 
GitLab


From 5a7efdacb9dda1c35fb414d57b3e16d520e2c0db Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Fri, 12 Apr 2019 11:31:42 -0700
Subject: [PATCH 1338/1861] clkdev: Hold clocks_mutex while iterating clocks
 list

We recently introduced a change to support devm clk lookups. That change
introduced a code-path that used clk_find() without holding the
'clocks_mutex'. Unfortunately, clk_find() iterates over the 'clocks'
list and so we need to prevent the list from being modified at the same
time. Do this by holding the mutex and checking to make sure it's held
while iterating the list.

Note, we don't really care if the lookup is freed after we find it with
clk_find() because we're just doing a pointer comparison, but if we did
care we would need to keep holding the mutex while we dereference the
clk_lookup pointer.

Fixes: 3eee6c7d119c ("clkdev: add managed clkdev lookup registration")
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Jerome Brunet <jbrunet@baylibre.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Jeffrey Hugo <jhugo@codeaurora.org>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Acked-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Tested-by: Jeffrey Hugo <jhugo@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clkdev.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c
index 8c4435c53f09c..6e787cc9e5b90 100644
--- a/drivers/clk/clkdev.c
+++ b/drivers/clk/clkdev.c
@@ -46,6 +46,8 @@ static struct clk_lookup *clk_find(const char *dev_id, const char *con_id)
 	if (con_id)
 		best_possible += 1;
 
+	lockdep_assert_held(&clocks_mutex);
+
 	list_for_each_entry(p, &clocks, node) {
 		match = 0;
 		if (p->dev_id) {
@@ -402,7 +404,10 @@ void devm_clk_release_clkdev(struct device *dev, const char *con_id,
 	struct clk_lookup *cl;
 	int rval;
 
+	mutex_lock(&clocks_mutex);
 	cl = clk_find(dev_id, con_id);
+	mutex_unlock(&clocks_mutex);
+
 	WARN_ON(!cl);
 	rval = devres_release(dev, devm_clkdev_release,
 			      devm_clk_match_clkdev, cl);
-- 
GitLab


From 9188d5ca454fd665145904267e726e9e8d122f5c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 17 Apr 2019 10:51:19 -0700
Subject: [PATCH 1339/1861] net/tls: fix refcount adjustment in fallback

Unlike atomic_add(), refcount_add() does not deal well
with a negative argument.  TLS fallback code reallocates
the skb and is very likely to shrink the truesize, leading to:

[  189.513254] WARNING: CPU: 5 PID: 0 at lib/refcount.c:81 refcount_add_not_zero_checked+0x15c/0x180
 Call Trace:
  refcount_add_checked+0x6/0x40
  tls_enc_skb+0xb93/0x13e0 [tls]

Once wmem_allocated count saturates the application can no longer
send data on the socket.  This is similar to Eric's fixes for GSO,
TCP:
commit 7ec318feeed1 ("tcp: gso: avoid refcount_t warning from tcp_gso_segment()")
and UDP:
commit 575b65bc5bff ("udp: avoid refcount_t saturation in __udp_gso_segment()").

Unlike the GSO case, for TLS fallback it's likely that the skb has
shrunk, so the "likely" annotation is the other way around (likely
branch being "sub").

Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device_fallback.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 54c3a758f2a7d..a3ebd4b02714c 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -194,6 +194,9 @@ static void update_chksum(struct sk_buff *skb, int headln)
 
 static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
 {
+	struct sock *sk = skb->sk;
+	int delta;
+
 	skb_copy_header(nskb, skb);
 
 	skb_put(nskb, skb->len);
@@ -201,11 +204,15 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
 	update_chksum(nskb, headln);
 
 	nskb->destructor = skb->destructor;
-	nskb->sk = skb->sk;
+	nskb->sk = sk;
 	skb->destructor = NULL;
 	skb->sk = NULL;
-	refcount_add(nskb->truesize - skb->truesize,
-		     &nskb->sk->sk_wmem_alloc);
+
+	delta = nskb->truesize - skb->truesize;
+	if (likely(delta < 0))
+		WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
+	else if (delta)
+		refcount_add(delta, &sk->sk_wmem_alloc);
 }
 
 /* This function may be called after the user socket is already
-- 
GitLab


From 0228034d8e5915b98c33db35a98f5e909e848ae9 Mon Sep 17 00:00:00 2001
From: Saurav Kashyap <skashyap@marvell.com>
Date: Thu, 18 Apr 2019 03:40:12 -0700
Subject: [PATCH 1340/1861] Revert "scsi: fcoe: clear FC_RP_STARTED flags when
 receiving a LOGO"

This patch clears FC_RP_STARTED flag during logoff, because of this
re-login(flogi) didn't happen to the switch.

This reverts commit 1550ec458e0cf1a40a170ab1f4c46e3f52860f65.

Fixes: 1550ec458e0c ("scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO")
Cc: <stable@vger.kernel.org> # v4.18+
Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
Reviewed-by: Hannes Reinecke <hare@#suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libfc/fc_rport.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index dfba4921b265a..5bf61431434be 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -2162,7 +2162,6 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp)
 		FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
 			     fc_rport_state(rdata));
 
-		rdata->flags &= ~FC_RP_STARTED;
 		fc_rport_enter_delete(rdata, RPORT_EV_STOP);
 		mutex_unlock(&rdata->rp_mutex);
 		kref_put(&rdata->kref, fc_rport_destroy);
-- 
GitLab


From 144ec97493af34efdb77c5aba146e9c7de8d0a06 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 18 Apr 2019 18:13:58 +0200
Subject: [PATCH 1341/1861] scsi: aic7xxx: fix EISA support

Instead of relying on the now removed NULL argument to
pci_alloc_consistent, switch to the generic DMA API, and store the struct
device so that we can pass it.

Fixes: 4167b2ad5182 ("PCI: Remove NULL device handling from PCI DMA API")
Reported-by: Matthew Whitehead <tedheadster@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Matthew Whitehead <tedheadster@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aic7xxx/aic7770_osm.c     |  1 +
 drivers/scsi/aic7xxx/aic7xxx.h         |  1 +
 drivers/scsi/aic7xxx/aic7xxx_osm.c     | 10 ++++------
 drivers/scsi/aic7xxx/aic7xxx_osm_pci.c |  1 +
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/aic7xxx/aic7770_osm.c b/drivers/scsi/aic7xxx/aic7770_osm.c
index 3d401d02c0195..bdd177e3d7622 100644
--- a/drivers/scsi/aic7xxx/aic7770_osm.c
+++ b/drivers/scsi/aic7xxx/aic7770_osm.c
@@ -91,6 +91,7 @@ aic7770_probe(struct device *dev)
 	ahc = ahc_alloc(&aic7xxx_driver_template, name);
 	if (ahc == NULL)
 		return (ENOMEM);
+	ahc->dev = dev;
 	error = aic7770_config(ahc, aic7770_ident_table + edev->id.driver_data,
 			       eisaBase);
 	if (error != 0) {
diff --git a/drivers/scsi/aic7xxx/aic7xxx.h b/drivers/scsi/aic7xxx/aic7xxx.h
index 5614921b4041a..88b90f9806c99 100644
--- a/drivers/scsi/aic7xxx/aic7xxx.h
+++ b/drivers/scsi/aic7xxx/aic7xxx.h
@@ -943,6 +943,7 @@ struct ahc_softc {
 	 * Platform specific device information.
 	 */
 	ahc_dev_softc_t		  dev_softc;
+	struct device		  *dev;
 
 	/*
 	 * Bus specific device information.
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index 3c9c17450bb39..d5c4a0d237062 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -860,8 +860,8 @@ int
 ahc_dmamem_alloc(struct ahc_softc *ahc, bus_dma_tag_t dmat, void** vaddr,
 		 int flags, bus_dmamap_t *mapp)
 {
-	*vaddr = pci_alloc_consistent(ahc->dev_softc,
-				      dmat->maxsize, mapp);
+	/* XXX: check if we really need the GFP_ATOMIC and unwind this mess! */
+	*vaddr = dma_alloc_coherent(ahc->dev, dmat->maxsize, mapp, GFP_ATOMIC);
 	if (*vaddr == NULL)
 		return ENOMEM;
 	return 0;
@@ -871,8 +871,7 @@ void
 ahc_dmamem_free(struct ahc_softc *ahc, bus_dma_tag_t dmat,
 		void* vaddr, bus_dmamap_t map)
 {
-	pci_free_consistent(ahc->dev_softc, dmat->maxsize,
-			    vaddr, map);
+	dma_free_coherent(ahc->dev, dmat->maxsize, vaddr, map);
 }
 
 int
@@ -1123,8 +1122,7 @@ ahc_linux_register_host(struct ahc_softc *ahc, struct scsi_host_template *templa
 
 	host->transportt = ahc_linux_transport_template;
 
-	retval = scsi_add_host(host,
-			(ahc->dev_softc ? &ahc->dev_softc->dev : NULL));
+	retval = scsi_add_host(host, ahc->dev);
 	if (retval) {
 		printk(KERN_WARNING "aic7xxx: scsi_add_host failed\n");
 		scsi_host_put(host);
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
index 0fc14dac7070c..717d8d1082ce1 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
@@ -250,6 +250,7 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		}
 	}
 	ahc->dev_softc = pci;
+	ahc->dev = &pci->dev;
 	error = ahc_pci_config(ahc, entry);
 	if (error != 0) {
 		ahc_free(ahc);
-- 
GitLab


From ef7c7727f2c18338fa77b339f10ab923a4c0b0f4 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 26 Feb 2019 01:19:59 -0500
Subject: [PATCH 1342/1861] x86/topology: Fix documentation typo

Syntax only, no functional or semantic change.

reflect actual cpuinfo_x86 field name:

s/logical_id/logical_proc_id/

Signed-off-by: Len Brown <len.brown@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/e2810a5317d3a109a98204e883fd1461f77b9339.1551160674.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/x86/topology.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt
index 2953e3ec9a025..06b3cdbc40489 100644
--- a/Documentation/x86/topology.txt
+++ b/Documentation/x86/topology.txt
@@ -51,7 +51,7 @@ The topology of a system is described in the units of:
     The physical ID of the package. This information is retrieved via CPUID
     and deduced from the APIC IDs of the cores in the package.
 
-  - cpuinfo_x86.logical_id:
+  - cpuinfo_x86.logical_proc_id:
 
     The logical ID of the package. As we do not trust BIOSes to enumerate the
     packages in a consistent way, we introduced the concept of logical package
-- 
GitLab


From 3a1c779fb8f71e772e2145e68c262936ada815ed Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 26 Feb 2019 01:20:00 -0500
Subject: [PATCH 1343/1861] topology: Simplify cputopology.txt formatting and
 wording

Syntax only, no functional or semantic change.

Signed-off-by: Len Brown <len.brown@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1ca56f8ea922a67f0017bd645912ea02a65a85ec.1551160674.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/cputopology.txt | 46 +++++++++++++++++------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index c6e7e9196a8b4..cb61277e23082 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -3,79 +3,79 @@ How CPU topology info is exported via sysfs
 ===========================================
 
 Export CPU topology info via sysfs. Items (attributes) are similar
-to /proc/cpuinfo output of some architectures:
+to /proc/cpuinfo output of some architectures.  They reside in
+/sys/devices/system/cpu/cpuX/topology/:
 
-1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
+physical_package_id:
 
 	physical package id of cpuX. Typically corresponds to a physical
 	socket number, but the actual value is architecture and platform
 	dependent.
 
-2) /sys/devices/system/cpu/cpuX/topology/core_id:
+core_id:
 
 	the CPU core ID of cpuX. Typically it is the hardware platform's
 	identifier (rather than the kernel's).  The actual value is
 	architecture and platform dependent.
 
-3) /sys/devices/system/cpu/cpuX/topology/book_id:
+book_id:
 
 	the book ID of cpuX. Typically it is the hardware platform's
 	identifier (rather than the kernel's).	The actual value is
 	architecture and platform dependent.
 
-4) /sys/devices/system/cpu/cpuX/topology/drawer_id:
+drawer_id:
 
 	the drawer ID of cpuX. Typically it is the hardware platform's
 	identifier (rather than the kernel's).	The actual value is
 	architecture and platform dependent.
 
-5) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
+thread_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	core as cpuX.
 
-6) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
+thread_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	core as cpuX.
 
-7) /sys/devices/system/cpu/cpuX/topology/core_siblings:
+core_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	physical_package_id.
 
-8) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
+core_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	physical_package_id.
 
-9) /sys/devices/system/cpu/cpuX/topology/book_siblings:
+book_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	book_id.
 
-10) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
+book_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	book_id.
 
-11) /sys/devices/system/cpu/cpuX/topology/drawer_siblings:
+drawer_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	drawer_id.
 
-12) /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list:
+drawer_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	drawer_id.
 
-To implement it in an architecture-neutral way, a new source file,
-drivers/base/topology.c, is to export the 6 to 12 attributes. The book
-and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK
-and CONFIG_SCHED_DRAWER are selected.
+Architecture-neutral, drivers/base/topology.c, exports these attributes.
+However, the book and drawer related sysfs files will only be created if
+CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively.
 
-CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where
-they reflect the cpu and cache hierarchy.
+CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390,
+where they reflect the cpu and cache hierarchy.
 
 For an architecture to support this feature, it must define some of
 these macros in include/asm-XXX/topology.h::
@@ -98,10 +98,10 @@ To be consistent on all architectures, include/linux/topology.h
 provides default definitions for any of the above macros that are
 not defined by include/asm-XXX/topology.h:
 
-1) physical_package_id: -1
-2) core_id: 0
-3) sibling_cpumask: just the given CPU
-4) core_cpumask: just the given CPU
+1) topology_physical_package_id: -1
+2) topology_core_id: 0
+3) topology_sibling_cpumask: just the given CPU
+4) topology_core_cpumask: just the given CPU
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
-- 
GitLab


From 169d0869962da362b5058e31f87911b2960418af Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 26 Feb 2019 01:20:01 -0500
Subject: [PATCH 1344/1861] x86/smpboot: Rename match_die() to match_pkg()

Syntax only, no functional or semantic change.

This routine matches packages, not die, so name it thus.

Signed-off-by: Len Brown <len.brown@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/7ca18c4ae7816a1f9eda37414725df676e63589d.1551160674.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/smpboot.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ce1a67b70168e..3f8bbfb26c186 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -455,7 +455,7 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
  * multicore group inside a NUMA node.  If this happens, we will
  * discard the MC level of the topology later.
  */
-static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 {
 	if (c->phys_proc_id == o->phys_proc_id)
 		return true;
@@ -546,7 +546,7 @@ void set_cpu_sibling_map(int cpu)
 	for_each_cpu(i, cpu_sibling_setup_mask) {
 		o = &cpu_data(i);
 
-		if ((i == cpu) || (has_mp && match_die(c, o))) {
+		if ((i == cpu) || (has_mp && match_pkg(c, o))) {
 			link_mask(topology_core_cpumask, cpu, i);
 
 			/*
@@ -570,7 +570,7 @@ void set_cpu_sibling_map(int cpu)
 			} else if (i != cpu && !c->booted_cores)
 				c->booted_cores = cpu_data(i).booted_cores;
 		}
-		if (match_die(c, o) && !topology_same_node(c, o))
+		if (match_pkg(c, o) && !topology_same_node(c, o))
 			x86_has_numa_in_package = true;
 	}
 
-- 
GitLab


From bee9853932e90ce94bce4276ec6b7b06bc48070b Mon Sep 17 00:00:00 2001
From: Joel Savitz <jsavitz@redhat.com>
Date: Wed, 6 Mar 2019 20:13:33 -0500
Subject: [PATCH 1345/1861] sched/core: Fix typo in comment

Signed-off-by: Joel Savitz <jsavitz@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: trivial@kernel.org
Link: http://lkml.kernel.org/r/1551921213-813-1-git-send-email-jsavitz@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8b64ef0d5589e..fb09eaad1d3a5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -924,7 +924,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
 }
 
 /*
- * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
  * __set_cpus_allowed_ptr() and select_fallback_rq().
  */
 static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-- 
GitLab


From 93ddedaa5c9cb828d39a19b5d6e7e1939393085a Mon Sep 17 00:00:00 2001
From: "Ahmed S. Darwish" <darwish.07@gmail.com>
Date: Wed, 6 Mar 2019 01:44:25 +0100
Subject: [PATCH 1346/1861] x86/defconfig: Remove archaic partition tables
 support

On x86 systems, only MSDOS and GPT partition tables are typically
encountered. Remove all the rest.

Note, CONFIG_EFI_PARTITION is also removed since it defaults to `y'.

Signed-off-by: Ahmed S. Darwish <darwish.07@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190306004425.GA30537@darwi-home-pc
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 12 ------------
 arch/x86/configs/x86_64_defconfig | 12 ------------
 2 files changed, 24 deletions(-)

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 9f908112bbb97..2b2481acc6615 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -25,18 +25,6 @@ CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
 CONFIG_SMP=y
 CONFIG_X86_GENERIC=y
 CONFIG_HPET_TIMER=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 1d3badfda09ee..e8829abf063ac 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -24,18 +24,6 @@ CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
 CONFIG_SMP=y
 CONFIG_CALGARY_IOMMU=y
 CONFIG_NR_CPUS=64
-- 
GitLab


From bff9504bfc9c5c6610b42d47f689f350fd969eb8 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 5 Mar 2019 14:47:53 -0500
Subject: [PATCH 1347/1861] rseq: Clean up comments by reflecting removal of
 event counter

The "event counter" was removed from rseq before it was merged upstream.
However, a few comments in the source code still refer to it. Adapt the
comments to match reality.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Lameter <cl@linux.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/20190305194755.2602-2-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/signal.c | 3 +--
 arch/x86/kernel/signal.c | 5 +----
 kernel/rseq.c            | 3 +--
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 76bb8de6bf6b6..be5edfdde558d 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -549,8 +549,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	int ret;
 
 	/*
-	 * Increment event counter and perform fixup for the pre-signal
-	 * frame.
+	 * Perform fixup for the pre-signal frame.
 	 */
 	rseq_signal_deliver(ksig, regs);
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 08dfd4c1a4f95..22c233b509da1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -688,10 +688,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 	sigset_t *set = sigmask_to_save();
 	compat_sigset_t *cset = (compat_sigset_t *) set;
 
-	/*
-	 * Increment event counter and perform fixup for the pre-signal
-	 * frame.
-	 */
+	/* Perform fixup for the pre-signal frame. */
 	rseq_signal_deliver(ksig, regs);
 
 	/* Set up the stack frame */
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 25e9a7b60eba4..849afe7491317 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -254,8 +254,7 @@ static int rseq_ip_fixup(struct pt_regs *regs)
  * - signal delivery,
  * and return to user-space.
  *
- * This is how we can ensure that the entire rseq critical section,
- * consisting of both the C part and the assembly instruction sequence,
+ * This is how we can ensure that the entire rseq critical section
  * will issue the commit instruction only if executed atomically with
  * respect to other threads scheduled on the same CPU, and with respect
  * to signal handlers.
-- 
GitLab


From 83b0b15bcb0f700e7c1d070aae2e7841170a4c33 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 5 Mar 2019 14:47:54 -0500
Subject: [PATCH 1348/1861] rseq: Remove superfluous rseq_len from task_struct

The rseq system call, when invoked with flags of "0" or
"RSEQ_FLAG_UNREGISTER" values, expects the rseq_len parameter to
be equal to sizeof(struct rseq), which is fixed-size and fixed-layout,
specified in uapi linux/rseq.h.

Expecting a fixed size for rseq_len is a design choice that ensures
multiple libraries and application defining __rseq_abi in the same
process agree on its exact size.

Considering that this size is and will always be the same value, there
is no point in saving this value within task_struct rseq_len. Remove
this field from task_struct.

No change in functionality intended.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Lameter <cl@linux.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/20190305194755.2602-3-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ----
 kernel/rseq.c         | 6 ++----
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1549584a15388..50606a6e73d68 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1057,7 +1057,6 @@ struct task_struct {
 
 #ifdef CONFIG_RSEQ
 	struct rseq __user *rseq;
-	u32 rseq_len;
 	u32 rseq_sig;
 	/*
 	 * RmW on rseq_event_mask must be performed atomically
@@ -1855,12 +1854,10 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 {
 	if (clone_flags & CLONE_THREAD) {
 		t->rseq = NULL;
-		t->rseq_len = 0;
 		t->rseq_sig = 0;
 		t->rseq_event_mask = 0;
 	} else {
 		t->rseq = current->rseq;
-		t->rseq_len = current->rseq_len;
 		t->rseq_sig = current->rseq_sig;
 		t->rseq_event_mask = current->rseq_event_mask;
 	}
@@ -1869,7 +1866,6 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 static inline void rseq_execve(struct task_struct *t)
 {
 	t->rseq = NULL;
-	t->rseq_len = 0;
 	t->rseq_sig = 0;
 	t->rseq_event_mask = 0;
 }
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 849afe7491317..9424ee90589ef 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -313,7 +313,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 		/* Unregister rseq for current thread. */
 		if (current->rseq != rseq || !current->rseq)
 			return -EINVAL;
-		if (current->rseq_len != rseq_len)
+		if (rseq_len != sizeof(*rseq))
 			return -EINVAL;
 		if (current->rseq_sig != sig)
 			return -EPERM;
@@ -321,7 +321,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 		if (ret)
 			return ret;
 		current->rseq = NULL;
-		current->rseq_len = 0;
 		current->rseq_sig = 0;
 		return 0;
 	}
@@ -335,7 +334,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 		 * the provided address differs from the prior
 		 * one.
 		 */
-		if (current->rseq != rseq || current->rseq_len != rseq_len)
+		if (current->rseq != rseq || rseq_len != sizeof(*rseq))
 			return -EINVAL;
 		if (current->rseq_sig != sig)
 			return -EPERM;
@@ -353,7 +352,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 	if (!access_ok(rseq, rseq_len))
 		return -EFAULT;
 	current->rseq = rseq;
-	current->rseq_len = rseq_len;
 	current->rseq_sig = sig;
 	/*
 	 * If rseq was previously inactive, and has just been
-- 
GitLab


From f6c6010a07734103a31faa0cc977641b358c45b0 Mon Sep 17 00:00:00 2001
From: Wei Yang <richardw.yang@linux.intel.com>
Date: Tue, 5 Mar 2019 16:34:32 +0800
Subject: [PATCH 1349/1861] mm/resource: Use resource_overlaps() to simplify
 region_intersects()

The three checks in region_intersects() are basically an open-coded version
of resource_overlaps() - so use the real thing.

Also fix typos in comments while at it.

Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
Reviewed-by: Like Xu <like.xu@linux.intel.com>
Reviewed-by: Yuan Yao <yuan.yao@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: bhelgaas@google.com
Cc: bp@suse.de
Cc: dan.j.williams@intel.com
Cc: jack@suse.cz
Cc: rdunlap@infradead.org
Cc: tiwai@suse.de
Link: http://lkml.kernel.org/r/20190305083432.23675-1-richardw.yang@linux.intel.com
[ Rewrote the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/iomem.c    |  4 ++--
 kernel/resource.c | 11 +++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/kernel/iomem.c b/kernel/iomem.c
index f7525e14ebc6f..93c2644445101 100644
--- a/kernel/iomem.c
+++ b/kernel/iomem.c
@@ -55,7 +55,7 @@ static void *try_ram_remap(resource_size_t offset, size_t size,
  *
  * MEMREMAP_WB - matches the default mapping for System RAM on
  * the architecture.  This is usually a read-allocate write-back cache.
- * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
+ * Moreover, if MEMREMAP_WB is specified and the requested remap region is RAM
  * memremap() will bypass establishing a new mapping and instead return
  * a pointer into the direct map.
  *
@@ -86,7 +86,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 	/* Try all mapping types requested until one returns non-NULL */
 	if (flags & MEMREMAP_WB) {
 		/*
-		 * MEMREMAP_WB is special in that it can be satisifed
+		 * MEMREMAP_WB is special in that it can be satisfied
 		 * from the direct map.  Some archs depend on the
 		 * capability of memremap() to autodetect cases where
 		 * the requested range is potentially in System RAM.
diff --git a/kernel/resource.c b/kernel/resource.c
index 92190f62ebc53..8c15f846e8ef2 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -520,21 +520,20 @@ EXPORT_SYMBOL_GPL(page_is_ram);
 int region_intersects(resource_size_t start, size_t size, unsigned long flags,
 		      unsigned long desc)
 {
-	resource_size_t end = start + size - 1;
+	struct resource res;
 	int type = 0; int other = 0;
 	struct resource *p;
 
+	res.start = start;
+	res.end = start + size - 1;
+
 	read_lock(&resource_lock);
 	for (p = iomem_resource.child; p ; p = p->sibling) {
 		bool is_type = (((p->flags & flags) == flags) &&
 				((desc == IORES_DESC_NONE) ||
 				 (desc == p->desc)));
 
-		if (start >= p->start && start <= p->end)
-			is_type ? type++ : other++;
-		if (end >= p->start && end <= p->end)
-			is_type ? type++ : other++;
-		if (p->start >= start && p->end <= end)
+		if (resource_overlaps(p, &res))
 			is_type ? type++ : other++;
 	}
 	read_unlock(&resource_lock);
-- 
GitLab


From 1a010e29cfa00fee2888fd2fd4983f848cbafb58 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 27 Feb 2019 11:10:17 +0300
Subject: [PATCH 1350/1861] sched/rt: Check integer overflow at usec to nsec
 conversion

Example of unhandled overflows:

 # echo 18446744073709651 > cpu.rt_runtime_us
 # cat cpu.rt_runtime_us
 99

 # echo 18446744073709900 > cpu.rt_period_us
 # cat cpu.rt_period_us
 348

After this patch they will fail with -EINVAL.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/155125501739.293431.5252197504404771496.stgit@buzz
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 90fa23d36565d..1e6b909dca367 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2555,6 +2555,8 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
 	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
 	if (rt_runtime_us < 0)
 		rt_runtime = RUNTIME_INF;
+	else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
+		return -EINVAL;
 
 	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
 }
@@ -2575,6 +2577,9 @@ int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
 {
 	u64 rt_runtime, rt_period;
 
+	if (rt_period_us > U64_MAX / NSEC_PER_USEC)
+		return -EINVAL;
+
 	rt_period = rt_period_us * NSEC_PER_USEC;
 	rt_runtime = tg->rt_bandwidth.rt_runtime;
 
-- 
GitLab


From 5b61d50ab4ef590f5e1d4df15cd2cea5f5715308 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 27 Feb 2019 11:10:18 +0300
Subject: [PATCH 1351/1861] sched/core: Handle overflow in cpu_shares_write_u64

Bit shift in scale_load() could overflow shares. This patch saturates
it to MAX_SHARES like following sched_group_set_shares().

Example:

 # echo 9223372036854776832 > cpu.shares
 # cat cpu.shares

Before patch: 1024
After pattch: 262144

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/155125501891.293431.3345233332801109696.stgit@buzz
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fb09eaad1d3a5..685b1541ce517 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6507,6 +6507,8 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset)
 static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
 				struct cftype *cftype, u64 shareval)
 {
+	if (shareval > scale_load_down(ULONG_MAX))
+		shareval = MAX_SHARES;
 	return sched_group_set_shares(css_tg(css), scale_load(shareval));
 }
 
-- 
GitLab


From 1a8b4540db732ca16c9e43ac7c08b1b8f0b252d8 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 27 Feb 2019 11:10:20 +0300
Subject: [PATCH 1352/1861] sched/core: Check quota and period overflow at usec
 to nsec conversion

Large values could overflow u64 and pass following sanity checks.

 # echo 18446744073750000 > cpu.cfs_period_us
 # cat cpu.cfs_period_us
 40448

 # echo 18446744073750000 > cpu.cfs_quota_us
 # cat cpu.cfs_quota_us
 40448

After this patch they will fail with -EINVAL.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/155125502079.293431.3947497929372138600.stgit@buzz
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 685b1541ce517..de8ab411826ce 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6611,8 +6611,10 @@ static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
 	period = ktime_to_ns(tg->cfs_bandwidth.period);
 	if (cfs_quota_us < 0)
 		quota = RUNTIME_INF;
-	else
+	else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
 		quota = (u64)cfs_quota_us * NSEC_PER_USEC;
+	else
+		return -EINVAL;
 
 	return tg_set_cfs_bandwidth(tg, period, quota);
 }
@@ -6634,6 +6636,9 @@ static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
 {
 	u64 quota, period;
 
+	if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
+		return -EINVAL;
+
 	period = (u64)cfs_period_us * NSEC_PER_USEC;
 	quota = tg->cfs_bandwidth.quota;
 
-- 
GitLab


From c53051128bb0e8754e13345d782ca69e5e1ce36d Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Thu, 18 Apr 2019 10:01:55 +0800
Subject: [PATCH 1353/1861] sc16is7xx: put err_spi and err_i2c into correct
 #ifdef

err_spi is only called within SERIAL_SC16IS7XX_SPI
while err_i2c is called inside SERIAL_SC16IS7XX_I2C.
So we need to put err_spi and err_i2c into each #ifdef
accordingly.

This change fixes ("sc16is7xx: move label 'err_spi'
to correct section").

Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sc16is7xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 22381a8c72e43..a31db15cd7c0d 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1522,11 +1522,11 @@ static int __init sc16is7xx_init(void)
 
 #ifdef CONFIG_SERIAL_SC16IS7XX_SPI
 err_spi:
+#endif
 #ifdef CONFIG_SERIAL_SC16IS7XX_I2C
 	i2c_del_driver(&sc16is7xx_i2c_uart_driver);
-#endif
-#endif
 err_i2c:
+#endif
 	uart_unregister_driver(&sc16is7xx_uart);
 	return ret;
 }
-- 
GitLab


From fc834e607ae3d18e1a20bca3f9a2d7f52ea7a2be Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 18 Apr 2019 13:12:07 -0400
Subject: [PATCH 1354/1861] USB: dummy-hcd: Fix failure to give back unlinked
 URBs

The syzkaller USB fuzzer identified a failure mode in which dummy-hcd
would never give back an unlinked URB.  This causes usb_kill_urb() to
hang, leading to WARNINGs and unkillable threads.

In dummy-hcd, all URBs are given back by the dummy_timer() routine as
it scans through the list of pending URBS.  Failure to give back URBs
can be caused by failure to start or early exit from the scanning
loop.  The code currently has two such pathways: One is triggered when
an unsupported bus transfer speed is encountered, and the other by
exhausting the simulated bandwidth for USB transfers during a frame.

This patch removes those two paths, thereby allowing all unlinked URBs
to be given back in a timely manner.  It adds a check for the bus
speed when the gadget first starts running, so that dummy_timer() will
never thereafter encounter an unsupported speed.  And it prevents the
loop from exiting as soon as the total bandwidth has been used up (the
scanning loop continues, giving back unlinked URBs as they are found,
but not transferring any more data).

Thanks to Andrey Konovalov for manually running the syzkaller fuzzer
to help track down the source of the bug.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: syzbot+d919b0f29d7b5a4994b9@syzkaller.appspotmail.com
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/dummy_hcd.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index baf72f95f0f1c..213b52508621e 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -979,8 +979,18 @@ static int dummy_udc_start(struct usb_gadget *g,
 	struct dummy_hcd	*dum_hcd = gadget_to_dummy_hcd(g);
 	struct dummy		*dum = dum_hcd->dum;
 
-	if (driver->max_speed == USB_SPEED_UNKNOWN)
+	switch (g->speed) {
+	/* All the speeds we support */
+	case USB_SPEED_LOW:
+	case USB_SPEED_FULL:
+	case USB_SPEED_HIGH:
+	case USB_SPEED_SUPER:
+		break;
+	default:
+		dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n",
+				driver->max_speed);
 		return -EINVAL;
+	}
 
 	/*
 	 * SLAVE side init ... the layer above hardware, which
@@ -1784,9 +1794,10 @@ static void dummy_timer(struct timer_list *t)
 		/* Bus speed is 500000 bytes/ms, so use a little less */
 		total = 490000;
 		break;
-	default:
+	default:	/* Can't happen */
 		dev_err(dummy_dev(dum_hcd), "bogus device speed\n");
-		return;
+		total = 0;
+		break;
 	}
 
 	/* FIXME if HZ != 1000 this will probably misbehave ... */
@@ -1828,7 +1839,7 @@ restart:
 
 		/* Used up this frame's bandwidth? */
 		if (total <= 0)
-			break;
+			continue;
 
 		/* find the gadget's ep for this request (if configured) */
 		address = usb_pipeendpoint (urb->pipe);
-- 
GitLab


From b50776ae011cfd26df3cc2b4af8b2dc3b683e553 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 13 Feb 2019 11:59:48 -0800
Subject: [PATCH 1355/1861] locking/atomics: Don't assume that scripts are
 executable

patch(1) doesn't set the x bit on files.  So if someone downloads and
applies patch-4.21.xz, their kernel won't build.  Fix that by executing
/bin/sh.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 scripts/atomic/gen-atomics.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh
index 27400b0cd732e..000dc6437893b 100644
--- a/scripts/atomic/gen-atomics.sh
+++ b/scripts/atomic/gen-atomics.sh
@@ -13,7 +13,7 @@ gen-atomic-long.sh              asm-generic/atomic-long.h
 gen-atomic-fallback.sh          linux/atomic-fallback.h
 EOF
 while read script header; do
-	${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
+	/bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
 	HASH="$(sha1sum ${LINUXDIR}/include/${header})"
 	HASH="${HASH%% *}"
 	printf "// %s\n" "${HASH}" >> ${LINUXDIR}/include/${header}
-- 
GitLab


From 3ff9c075cc767b3060bdac12da72fc94dd7da1b8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sun, 24 Feb 2019 01:49:52 +0900
Subject: [PATCH 1356/1861] x86/kprobes: Verify stack frame on kretprobe

Verify the stack frame pointer on kretprobe trampoline handler,
If the stack frame pointer does not match, it skips the wrong
entry and tries to find correct one.

This can happen if user puts the kretprobe on the function
which can be used in the path of ftrace user-function call.
Such functions should not be probed, so this adds a warning
message that reports which function should be blacklisted.

Tested-by: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/155094059185.6137.15527904013362842072.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/core.c | 26 ++++++++++++++++++++++++++
 include/linux/kprobes.h        |  1 +
 2 files changed, 27 insertions(+)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index a034cb808e7eb..18fbe9be2d683 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -569,6 +569,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 	unsigned long *sara = stack_addr(regs);
 
 	ri->ret_addr = (kprobe_opcode_t *) *sara;
+	ri->fp = sara;
 
 	/* Replace the return addr with trampoline addr */
 	*sara = (unsigned long) &kretprobe_trampoline;
@@ -759,15 +760,21 @@ static __used void *trampoline_handler(struct pt_regs *regs)
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 	kprobe_opcode_t *correct_ret_addr = NULL;
+	void *frame_pointer;
+	bool skipped = false;
 
 	INIT_HLIST_HEAD(&empty_rp);
 	kretprobe_hash_lock(current, &head, &flags);
 	/* fixup registers */
 #ifdef CONFIG_X86_64
 	regs->cs = __KERNEL_CS;
+	/* On x86-64, we use pt_regs->sp for return address holder. */
+	frame_pointer = &regs->sp;
 #else
 	regs->cs = __KERNEL_CS | get_kernel_rpl();
 	regs->gs = 0;
+	/* On x86-32, we use pt_regs->flags for return address holder. */
+	frame_pointer = &regs->flags;
 #endif
 	regs->ip = trampoline_address;
 	regs->orig_ax = ~0UL;
@@ -789,8 +796,25 @@ static __used void *trampoline_handler(struct pt_regs *regs)
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
+		/*
+		 * Return probes must be pushed on this hash list correct
+		 * order (same as return order) so that it can be poped
+		 * correctly. However, if we find it is pushed it incorrect
+		 * order, this means we find a function which should not be
+		 * probed, because the wrong order entry is pushed on the
+		 * path of processing other kretprobe itself.
+		 */
+		if (ri->fp != frame_pointer) {
+			if (!skipped)
+				pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
+			skipped = true;
+			continue;
+		}
 
 		orig_ret_address = (unsigned long)ri->ret_addr;
+		if (skipped)
+			pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
+				ri->rp->kp.addr);
 
 		if (orig_ret_address != trampoline_address)
 			/*
@@ -808,6 +832,8 @@ static __used void *trampoline_handler(struct pt_regs *regs)
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
+		if (ri->fp != frame_pointer)
+			continue;
 
 		orig_ret_address = (unsigned long)ri->ret_addr;
 		if (ri->rp && ri->rp->handler) {
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 201f0f2683f25..9a897256e481f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -173,6 +173,7 @@ struct kretprobe_instance {
 	struct kretprobe *rp;
 	kprobe_opcode_t *ret_addr;
 	struct task_struct *task;
+	void *fp;
 	char data[0];
 };
 
-- 
GitLab


From fabe38ab6b2bd9418350284c63825f13b8a6abba Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sun, 24 Feb 2019 01:50:20 +0900
Subject: [PATCH 1357/1861] kprobes: Mark ftrace mcount handler functions
 nokprobe

Mark ftrace mcount handler functions nokprobe since
probing on these functions with kretprobe pushes
return address incorrectly on kretprobe shadow stack.

Reported-by: Francis Deslauriers <francis.deslauriers@efficios.com>
Tested-by: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/155094062044.6137.6419622920568680640.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/trace/ftrace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 26c8ca9bd06b6..b920358dd8f7f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -33,6 +33,7 @@
 #include <linux/list.h>
 #include <linux/hash.h>
 #include <linux/rcupdate.h>
+#include <linux/kprobes.h>
 
 #include <trace/events/sched.h>
 
@@ -6246,7 +6247,7 @@ void ftrace_reset_array_ops(struct trace_array *tr)
 	tr->ops->func = ftrace_stub;
 }
 
-static inline void
+static nokprobe_inline void
 __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *ignored, struct pt_regs *regs)
 {
@@ -6306,11 +6307,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 {
 	__ftrace_ops_list_func(ip, parent_ip, NULL, regs);
 }
+NOKPROBE_SYMBOL(ftrace_ops_list_func);
 #else
 static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
 {
 	__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
 }
+NOKPROBE_SYMBOL(ftrace_ops_no_ops);
 #endif
 
 /*
@@ -6337,6 +6340,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
 	preempt_enable_notrace();
 	trace_clear_recursion(bit);
 }
+NOKPROBE_SYMBOL(ftrace_ops_assist_func);
 
 /**
  * ftrace_ops_get_func - get the function a trampoline should call
-- 
GitLab


From b191fa96ea6dc00d331dcc28c1f7db5e075693a0 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sun, 24 Feb 2019 01:50:49 +0900
Subject: [PATCH 1358/1861] x86/kprobes: Avoid kretprobe recursion bug

Avoid kretprobe recursion loop bg by setting a dummy
kprobes to current_kprobe per-CPU variable.

This bug has been introduced with the asm-coded trampoline
code, since previously it used another kprobe for hooking
the function return placeholder (which only has a nop) and
trampoline handler was called from that kprobe.

This revives the old lost kprobe again.

With this fix, we don't see deadlock anymore.

And you can see that all inner-called kretprobe are skipped.

  event_1                                  235               0
  event_2                                19375           19612

The 1st column is recorded count and the 2nd is missed count.
Above shows (event_1 rec) + (event_2 rec) ~= (event_2 missed)
(some difference are here because the counter is racy)

Reported-by: Andrea Righi <righi.andrea@gmail.com>
Tested-by: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: c9becf58d935 ("[PATCH] kretprobe: kretprobe-booster")
Link: http://lkml.kernel.org/r/155094064889.6137.972160690963039.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/core.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 18fbe9be2d683..fed46ddb1eef2 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -749,11 +749,16 @@ asm(
 NOKPROBE_SYMBOL(kretprobe_trampoline);
 STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
 
+static struct kprobe kretprobe_kprobe = {
+	.addr = (void *)kretprobe_trampoline,
+};
+
 /*
  * Called from kretprobe_trampoline
  */
 static __used void *trampoline_handler(struct pt_regs *regs)
 {
+	struct kprobe_ctlblk *kcb;
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
 	struct hlist_node *tmp;
@@ -763,6 +768,17 @@ static __used void *trampoline_handler(struct pt_regs *regs)
 	void *frame_pointer;
 	bool skipped = false;
 
+	preempt_disable();
+
+	/*
+	 * Set a dummy kprobe for avoiding kretprobe recursion.
+	 * Since kretprobe never run in kprobe handler, kprobe must not
+	 * be running at this point.
+	 */
+	kcb = get_kprobe_ctlblk();
+	__this_cpu_write(current_kprobe, &kretprobe_kprobe);
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
 	INIT_HLIST_HEAD(&empty_rp);
 	kretprobe_hash_lock(current, &head, &flags);
 	/* fixup registers */
@@ -838,10 +854,9 @@ static __used void *trampoline_handler(struct pt_regs *regs)
 		orig_ret_address = (unsigned long)ri->ret_addr;
 		if (ri->rp && ri->rp->handler) {
 			__this_cpu_write(current_kprobe, &ri->rp->kp);
-			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
 			ri->ret_addr = correct_ret_addr;
 			ri->rp->handler(ri, regs);
-			__this_cpu_write(current_kprobe, NULL);
+			__this_cpu_write(current_kprobe, &kretprobe_kprobe);
 		}
 
 		recycle_rp_inst(ri, &empty_rp);
@@ -857,6 +872,9 @@ static __used void *trampoline_handler(struct pt_regs *regs)
 
 	kretprobe_hash_unlock(current, &flags);
 
+	__this_cpu_write(current_kprobe, NULL);
+	preempt_enable();
+
 	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
-- 
GitLab


From 1de7edbb59c8f1b46071f66c5c97b8a59569eb51 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 29 Mar 2019 17:47:43 -0700
Subject: [PATCH 1359/1861] x86/cpu/bugs: Use __initconst for 'const' init data

Some of the recently added const tables use __initdata which causes section
attribute conflicts.

Use __initconst instead.

Fixes: fa1202ef2243 ("x86/speculation: Add command line control")
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20190330004743.29541-9-andi@firstfloor.org
---
 arch/x86/kernel/cpu/bugs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2da82eff0eb4f..b91b3bfa5cfbe 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -275,7 +275,7 @@ static const struct {
 	const char			*option;
 	enum spectre_v2_user_cmd	cmd;
 	bool				secure;
-} v2_user_options[] __initdata = {
+} v2_user_options[] __initconst = {
 	{ "auto",		SPECTRE_V2_USER_CMD_AUTO,		false },
 	{ "off",		SPECTRE_V2_USER_CMD_NONE,		false },
 	{ "on",			SPECTRE_V2_USER_CMD_FORCE,		true  },
@@ -419,7 +419,7 @@ static const struct {
 	const char *option;
 	enum spectre_v2_mitigation_cmd cmd;
 	bool secure;
-} mitigation_options[] __initdata = {
+} mitigation_options[] __initconst = {
 	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
 	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
 	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
@@ -658,7 +658,7 @@ static const char * const ssb_strings[] = {
 static const struct {
 	const char *option;
 	enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[]  __initdata = {
+} ssb_mitigation_options[]  __initconst = {
 	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
 	{ "on",		SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
 	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
-- 
GitLab


From c03e27506a564ec7db1b179e7464835901f49751 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 29 Mar 2019 17:47:35 -0700
Subject: [PATCH 1360/1861] x86/asm: Mark all top level asm statements as .text

With gcc toplevel assembler statements that do not mark themselves as .text
may end up in other sections. This causes LTO boot crashes because various
assembler statements ended up in the middle of the initcall section. It's
also a latent problem without LTO, although it's currently not known to
cause any real problems.

According to the gcc team it's expected behavior.

Always mark all the top level assembler statements as text so that they
switch to the right section.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190330004743.29541-1-andi@firstfloor.org
---
 arch/x86/kernel/cpu/amd.c      | 3 ++-
 arch/x86/kernel/kprobes/core.c | 1 +
 arch/x86/lib/error-inject.c    | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 01004bfb1a1bc..1bcb489e07e73 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -83,7 +83,8 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
  */
 
 extern __visible void vide(void);
-__asm__(".globl vide\n"
+__asm__(".text\n"
+	".globl vide\n"
 	".type vide, @function\n"
 	".align 4\n"
 	"vide: ret\n");
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index a034cb808e7eb..31ab91c9c4e94 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -715,6 +715,7 @@ NOKPROBE_SYMBOL(kprobe_int3_handler);
  * calls trampoline_handler() runs, which calls the kretprobe's handler.
  */
 asm(
+	".text\n"
 	".global kretprobe_trampoline\n"
 	".type kretprobe_trampoline, @function\n"
 	"kretprobe_trampoline:\n"
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 3cdf06128d13c..be5b5fb1598bd 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -6,6 +6,7 @@
 asmlinkage void just_return_func(void);
 
 asm(
+	".text\n"
 	".type just_return_func, @function\n"
 	".globl just_return_func\n"
 	"just_return_func:\n"
-- 
GitLab


From 26b31f46f036ad89de20cbbb732b76289411eddb Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 29 Mar 2019 17:47:36 -0700
Subject: [PATCH 1361/1861] x86/cpu/amd: Exclude 32bit only assembler from
 64bit build

The "vide" inline assembler is only needed on 32bit kernels for old
32bit only CPUs.

Guard it with an #ifdef so it's not included in 64bit builds.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190330004743.29541-2-andi@firstfloor.org
---
 arch/x86/kernel/cpu/amd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1bcb489e07e73..fb6a64bd765fc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -82,12 +82,14 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
  *	performance at the same time..
  */
 
+#ifdef CONFIG_X86_32
 extern __visible void vide(void);
 __asm__(".text\n"
 	".globl vide\n"
 	".type vide, @function\n"
 	".align 4\n"
 	"vide: ret\n");
+#endif
 
 static void init_amd_k5(struct cpuinfo_x86 *c)
 {
-- 
GitLab


From 81423c37415fe45057d64196ae0ce8e17a9c7148 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 29 Mar 2019 17:47:38 -0700
Subject: [PATCH 1362/1861] x86/timer: Don't inline __const_udelay()

LTO will happily inline __const_udelay() everywhere it is used.  Forcing it
noinline saves ~44k text in a LTO build.

13999560        1740864 1499136 17239560        1070e08 vmlinux-with-udelay-inline
13954764        1736768 1499136 17190668        1064f0c vmlinux-wo-udelay-inline

Even without LTO this function should never be inlined.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190330004743.29541-4-andi@firstfloor.org
---
 arch/x86/lib/delay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index f5b7f1b3b6d75..b7375dc6898f3 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -162,7 +162,7 @@ void __delay(unsigned long loops)
 }
 EXPORT_SYMBOL(__delay);
 
-void __const_udelay(unsigned long xloops)
+noinline void __const_udelay(unsigned long xloops)
 {
 	unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
 	int d0;
-- 
GitLab


From 02143c2931c3c0faf088c5859a10de6c2b4f2d96 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 29 Mar 2019 17:47:40 -0700
Subject: [PATCH 1363/1861] x86/hyperv: Make hv_vcpu_is_preempted() visible

This function is referrenced from assembler, so it needs to be marked
visible for LTO.

Fixes: 3a025de64bf8 ("x86/hyperv: Enable PV qspinlock for Hyper-V")
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Yi Sun <yi.y.sun@linux.intel.com>
Cc: kys@microsoft.com
Cc: haiyangz@microsoft.com
Link: https://lkml.kernel.org/r/20190330004743.29541-6-andi@firstfloor.org
---
 arch/x86/hyperv/hv_spinlock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c
index a861b0456b1a7..07f21a06392fb 100644
--- a/arch/x86/hyperv/hv_spinlock.c
+++ b/arch/x86/hyperv/hv_spinlock.c
@@ -56,7 +56,7 @@ static void hv_qlock_wait(u8 *byte, u8 val)
 /*
  * Hyper-V does not support this so far.
  */
-bool hv_vcpu_is_preempted(int vcpu)
+__visible bool hv_vcpu_is_preempted(int vcpu)
 {
 	return false;
 }
-- 
GitLab


From 14e581c381b942ce5463a7e61326d8ce1c843be7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 29 Mar 2019 17:47:42 -0700
Subject: [PATCH 1364/1861] x86/kvm: Make steal_time visible

This per cpu variable is accessed from assembler code, so it needs
to be visible for LTO.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20190330004743.29541-8-andi@firstfloor.org
---
 arch/x86/kernel/kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5c93a65ee1e5c..3f0cc828cc364 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -67,7 +67,7 @@ static int __init parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
-static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
+DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
 static int has_steal_clock = 0;
 
 /*
-- 
GitLab


From bd4264112f93045704731850c5e4d85db981cd85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 16 Apr 2019 11:49:17 +0200
Subject: [PATCH 1365/1861] drm/ttm: fix re-init of global structures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a driver unloads without unloading TTM we don't correctly
clear the global structures leading to errors on re-init.

Next step should probably be to remove the global structures and
kobjs all together, but this is tricky since we need to maintain
backward compatibility.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Tested-by: Karol Herbst <kherbst@redhat.com>
CC: stable@vger.kernel.org # 5.0.x
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c     | 10 +++++-----
 drivers/gpu/drm/ttm/ttm_memory.c |  5 +++--
 include/drm/ttm/ttm_bo_driver.h  |  1 -
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 0fa5034b9f9e0..1a01669b159ab 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -49,9 +49,8 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj);
  * ttm_global_mutex - protecting the global BO state
  */
 DEFINE_MUTEX(ttm_global_mutex);
-struct ttm_bo_global ttm_bo_glob = {
-	.use_count = 0
-};
+unsigned ttm_bo_glob_use_count;
+struct ttm_bo_global ttm_bo_glob;
 
 static struct attribute ttm_bo_count = {
 	.name = "bo_count",
@@ -1531,12 +1530,13 @@ static void ttm_bo_global_release(void)
 	struct ttm_bo_global *glob = &ttm_bo_glob;
 
 	mutex_lock(&ttm_global_mutex);
-	if (--glob->use_count > 0)
+	if (--ttm_bo_glob_use_count > 0)
 		goto out;
 
 	kobject_del(&glob->kobj);
 	kobject_put(&glob->kobj);
 	ttm_mem_global_release(&ttm_mem_glob);
+	memset(glob, 0, sizeof(*glob));
 out:
 	mutex_unlock(&ttm_global_mutex);
 }
@@ -1548,7 +1548,7 @@ static int ttm_bo_global_init(void)
 	unsigned i;
 
 	mutex_lock(&ttm_global_mutex);
-	if (++glob->use_count > 1)
+	if (++ttm_bo_glob_use_count > 1)
 		goto out;
 
 	ret = ttm_mem_global_init(&ttm_mem_glob);
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index f1567c353b543..9a0909decb366 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -461,8 +461,8 @@ out_no_zone:
 
 void ttm_mem_global_release(struct ttm_mem_global *glob)
 {
-	unsigned int i;
 	struct ttm_mem_zone *zone;
+	unsigned int i;
 
 	/* let the page allocator first stop the shrink work. */
 	ttm_page_alloc_fini();
@@ -475,9 +475,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
 		zone = glob->zones[i];
 		kobject_del(&zone->kobj);
 		kobject_put(&zone->kobj);
-			}
+	}
 	kobject_del(&glob->kobj);
 	kobject_put(&glob->kobj);
+	memset(glob, 0, sizeof(*glob));
 }
 
 static void ttm_check_swapping(struct ttm_mem_global *glob)
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index cbf3180cb612e..668ad971cd7b2 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -420,7 +420,6 @@ extern struct ttm_bo_global {
 	/**
 	 * Protected by ttm_global_mutex.
 	 */
-	unsigned int use_count;
 	struct list_head device_list;
 
 	/**
-- 
GitLab


From 1a62b18d51e5c5ecc0345c85bb9fef870ab721ed Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Thu, 18 Apr 2019 17:49:55 -0700
Subject: [PATCH 1366/1861] slab: store tagged freelist for off-slab slabmgmt

Commit 51dedad06b5f ("kasan, slab: make freelist stored without tags")
calls kasan_reset_tag() for off-slab slab management object leading to
freelist being stored non-tagged.

However, cache_grow_begin() calls alloc_slabmgmt() which calls
kmem_cache_alloc_node() assigns a tag for the address and stores it in
the shadow address.  As the result, it causes endless errors below
during boot due to drain_freelist() -> slab_destroy() ->
kasan_slab_free() which compares already untagged freelist against the
stored tag in the shadow address.

Since off-slab slab management object freelist is such a special case,
just store it tagged.  Non-off-slab management object freelist is still
stored untagged which has not been assigned a tag and should not cause
any other troubles with this inconsistency.

  BUG: KASAN: double-free or invalid-free in slab_destroy+0x84/0x88
  Pointer tag: [ff], memory tag: [99]

  CPU: 0 PID: 1376 Comm: kworker/0:4 Tainted: G        W 5.1.0-rc3+ #8
  Hardware name: HPE Apollo 70             /C01_APACHE_MB         , BIOS L50_5.13_1.0.6 07/10/2018
  Workqueue: cgroup_destroy css_killed_work_fn
  Call trace:
   print_address_description+0x74/0x2a4
   kasan_report_invalid_free+0x80/0xc0
   __kasan_slab_free+0x204/0x208
   kasan_slab_free+0xc/0x18
   kmem_cache_free+0xe4/0x254
   slab_destroy+0x84/0x88
   drain_freelist+0xd0/0x104
   __kmem_cache_shrink+0x1ac/0x224
   __kmemcg_cache_deactivate+0x1c/0x28
   memcg_deactivate_kmem_caches+0xa0/0xe8
   memcg_offline_kmem+0x8c/0x3d4
   mem_cgroup_css_offline+0x24c/0x290
   css_killed_work_fn+0x154/0x618
   process_one_work+0x9cc/0x183c
   worker_thread+0x9b0/0xe38
   kthread+0x374/0x390
   ret_from_fork+0x10/0x18

  Allocated by task 1625:
   __kasan_kmalloc+0x168/0x240
   kasan_slab_alloc+0x18/0x20
   kmem_cache_alloc_node+0x1f8/0x3a0
   cache_grow_begin+0x4fc/0xa24
   cache_alloc_refill+0x2f8/0x3e8
   kmem_cache_alloc+0x1bc/0x3bc
   sock_alloc_inode+0x58/0x334
   alloc_inode+0xb8/0x164
   new_inode_pseudo+0x20/0xec
   sock_alloc+0x74/0x284
   __sock_create+0xb0/0x58c
   sock_create+0x98/0xb8
   __sys_socket+0x60/0x138
   __arm64_sys_socket+0xa4/0x110
   el0_svc_handler+0x2c0/0x47c
   el0_svc+0x8/0xc

  Freed by task 1625:
   __kasan_slab_free+0x114/0x208
   kasan_slab_free+0xc/0x18
   kfree+0x1a8/0x1e0
   single_release+0x7c/0x9c
   close_pdeo+0x13c/0x43c
   proc_reg_release+0xec/0x108
   __fput+0x2f8/0x784
   ____fput+0x1c/0x28
   task_work_run+0xc0/0x1b0
   do_notify_resume+0xb44/0x1278
   work_pending+0x8/0x10

  The buggy address belongs to the object at ffff809681b89e00
   which belongs to the cache kmalloc-128 of size 128
  The buggy address is located 0 bytes inside of
   128-byte region [ffff809681b89e00, ffff809681b89e80)
  The buggy address belongs to the page:
  page:ffff7fe025a06e00 count:1 mapcount:0 mapping:01ff80082000fb00
  index:0xffff809681b8fe04
  flags: 0x17ffffffc000200(slab)
  raw: 017ffffffc000200 ffff7fe025a06d08 ffff7fe022ef7b88 01ff80082000fb00
  raw: ffff809681b8fe04 ffff809681b80000 00000001000000e0 0000000000000000
  page dumped because: kasan: bad access detected
  page allocated via order 0, migratetype Unmovable, gfp_mask
  0x2420c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_COMP|__GFP_THISNODE)
   prep_new_page+0x4e0/0x5e0
   get_page_from_freelist+0x4ce8/0x50d4
   __alloc_pages_nodemask+0x738/0x38b8
   cache_grow_begin+0xd8/0xa24
   ____cache_alloc_node+0x14c/0x268
   __kmalloc+0x1c8/0x3fc
   ftrace_free_mem+0x408/0x1284
   ftrace_free_init_mem+0x20/0x28
   kernel_init+0x24/0x548
   ret_from_fork+0x10/0x18

  Memory state around the buggy address:
   ffff809681b89c00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe
   ffff809681b89d00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe
  >ffff809681b89e00: 99 99 99 99 99 99 99 99 fe fe fe fe fe fe fe fe
                     ^
   ffff809681b89f00: 43 43 43 43 43 fe fe fe fe fe fe fe fe fe fe fe
   ffff809681b8a000: 6d fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe

Link: http://lkml.kernel.org/r/20190403022858.97584-1-cai@lca.pw
Fixes: 51dedad06b5f ("kasan, slab: make freelist stored without tags")
Signed-off-by: Qian Cai <cai@lca.pw>
Reviewed-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/slab.c b/mm/slab.c
index 47a380a486eef..9142ee9924932 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2374,7 +2374,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
 		/* Slab management obj is off-slab. */
 		freelist = kmem_cache_alloc_node(cachep->freelist_cache,
 					      local_flags, nodeid);
-		freelist = kasan_reset_tag(freelist);
 		if (!freelist)
 			return NULL;
 	} else {
-- 
GitLab


From 87039546544479d4bedb19d0ea525270c43c1c9b Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 18 Apr 2019 17:49:58 -0700
Subject: [PATCH 1367/1861] mm: swapoff: shmem_find_swap_entries() filter out
 other types

Swapfile "type" was passed all the way down to shmem_unuse_inode(), but
then forgotten from shmem_find_swap_entries(): with the result that
removing one swapfile would try to free up all the swap from shmem - no
problem when only one swapfile anyway, but counter-productive when more,
causing swapoff to be unnecessarily OOM-killed when it should succeed.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081254470.1523@eggly.anvils
Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: "Alex Xu (Hello71)" <alex_y_xu@yahoo.ca>
Cc: Vineeth Pillai <vpillai@digitalocean.com>
Cc: Kelley Nielsen <kelleynnn@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index b3db3779a30a1..859e8628071f1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1099,10 +1099,11 @@ extern struct swap_info_struct *swap_info[];
 static int shmem_find_swap_entries(struct address_space *mapping,
 				   pgoff_t start, unsigned int nr_entries,
 				   struct page **entries, pgoff_t *indices,
-				   bool frontswap)
+				   unsigned int type, bool frontswap)
 {
 	XA_STATE(xas, &mapping->i_pages, start);
 	struct page *page;
+	swp_entry_t entry;
 	unsigned int ret = 0;
 
 	if (!nr_entries)
@@ -1116,13 +1117,12 @@ static int shmem_find_swap_entries(struct address_space *mapping,
 		if (!xa_is_value(page))
 			continue;
 
-		if (frontswap) {
-			swp_entry_t entry = radix_to_swp_entry(page);
-
-			if (!frontswap_test(swap_info[swp_type(entry)],
-					    swp_offset(entry)))
-				continue;
-		}
+		entry = radix_to_swp_entry(page);
+		if (swp_type(entry) != type)
+			continue;
+		if (frontswap &&
+		    !frontswap_test(swap_info[type], swp_offset(entry)))
+			continue;
 
 		indices[ret] = xas.xa_index;
 		entries[ret] = page;
@@ -1194,7 +1194,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type,
 
 		pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
 						  pvec.pages, indices,
-						  frontswap);
+						  type, frontswap);
 		if (pvec.nr == 0) {
 			ret = 0;
 			break;
-- 
GitLab


From dd862deb151aad2548e72b077a82ad3aa91b715f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 18 Apr 2019 17:50:02 -0700
Subject: [PATCH 1368/1861] mm: swapoff: remove too limiting
 SWAP_UNUSE_MAX_TRIES

SWAP_UNUSE_MAX_TRIES 3 appeared to work well in earlier testing, but
further testing has proved it to be a source of unnecessary swapoff
EBUSY failures (which can then be followed by unmount EBUSY failures).

When mmget_not_zero() or shmem's igrab() fails, there is an mm exiting
or inode being evicted, freeing up swap independent of try_to_unuse().
Those typically completed much sooner than the old quadratic swapoff,
but now it's more common that swapoff may need to wait for them.

It's possible to move those cases from init_mm.mmlist and shmem_swaplist
to separate "exiting" swaplists, and try_to_unuse() then wait for those
lists to be emptied; but we've not bothered with that in the past, and
don't want to risk missing some other forgotten case.  So just revert to
cycling around until the swap is gone, without any retries limit.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081256170.1523@eggly.anvils
Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: "Alex Xu (Hello71)" <alex_y_xu@yahoo.ca>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Kelley Nielsen <kelleynnn@gmail.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vineeth Pillai <vpillai@digitalocean.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swapfile.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2b8d9c3fbb47f..bf4ef2e40f23d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2023,7 +2023,6 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
  * If the boolean frontswap is true, only unuse pages_to_unuse pages;
  * pages_to_unuse==0 means all pages; ignored if frontswap is false
  */
-#define SWAP_UNUSE_MAX_TRIES 3
 int try_to_unuse(unsigned int type, bool frontswap,
 		 unsigned long pages_to_unuse)
 {
@@ -2035,7 +2034,6 @@ int try_to_unuse(unsigned int type, bool frontswap,
 	struct page *page;
 	swp_entry_t entry;
 	unsigned int i;
-	int retries = 0;
 
 	if (!si->inuse_pages)
 		return 0;
@@ -2117,14 +2115,16 @@ retry:
 	 * If yes, we would need to do retry the unuse logic again.
 	 * Under global memory pressure, swap entries can be reinserted back
 	 * into process space after the mmlist loop above passes over them.
-	 * Its not worth continuosuly retrying to unuse the swap in this case.
-	 * So we try SWAP_UNUSE_MAX_TRIES times.
+	 *
+	 * Limit the number of retries? No: when shmem_unuse()'s igrab() fails,
+	 * a shmem inode using swap is being evicted; and when mmget_not_zero()
+	 * above fails, that mm is likely to be freeing swap from exit_mmap().
+	 * Both proceed at their own independent pace: we could move them to
+	 * separate lists, and wait for those lists to be emptied; but it's
+	 * easier and more robust (though cpu-intensive) just to keep retrying.
 	 */
-	if (++retries >= SWAP_UNUSE_MAX_TRIES)
-		retval = -EBUSY;
-	else if (si->inuse_pages)
+	if (si->inuse_pages)
 		goto retry;
-
 out:
 	return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
 }
-- 
GitLab


From 64165b1affc5bc16231ac971e66aae7d68d57f2c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 18 Apr 2019 17:50:09 -0700
Subject: [PATCH 1369/1861] mm: swapoff: take notice of completion sooner

The old try_to_unuse() implementation was driven by find_next_to_unuse(),
which terminated as soon as all the swap had been freed.

Add inuse_pages checks now (alongside signal_pending()) to stop scanning
mms and swap_map once finished.

The same ought to be done in shmem_unuse() too, but never was before,
and needs a different interface: so leave it as is for now.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081258200.1523@eggly.anvils
Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: "Alex Xu (Hello71)" <alex_y_xu@yahoo.ca>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Kelley Nielsen <kelleynnn@gmail.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vineeth Pillai <vpillai@digitalocean.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swapfile.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index bf4ef2e40f23d..71383625a582c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2051,11 +2051,9 @@ retry:
 
 	spin_lock(&mmlist_lock);
 	p = &init_mm.mmlist;
-	while ((p = p->next) != &init_mm.mmlist) {
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			break;
-		}
+	while (si->inuse_pages &&
+	       !signal_pending(current) &&
+	       (p = p->next) != &init_mm.mmlist) {
 
 		mm = list_entry(p, struct mm_struct, mmlist);
 		if (!mmget_not_zero(mm))
@@ -2082,7 +2080,9 @@ retry:
 	mmput(prev_mm);
 
 	i = 0;
-	while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
+	while (si->inuse_pages &&
+	       !signal_pending(current) &&
+	       (i = find_next_to_unuse(si, i, frontswap)) != 0) {
 
 		entry = swp_entry(type, i);
 		page = find_get_page(swap_address_space(entry), i);
@@ -2123,8 +2123,11 @@ retry:
 	 * separate lists, and wait for those lists to be emptied; but it's
 	 * easier and more robust (though cpu-intensive) just to keep retrying.
 	 */
-	if (si->inuse_pages)
-		goto retry;
+	if (si->inuse_pages) {
+		if (!signal_pending(current))
+			goto retry;
+		retval = -EINTR;
+	}
 out:
 	return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
 }
-- 
GitLab


From af53d3e9e04024885de5b4fda51e5fa362ae2bd8 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 18 Apr 2019 17:50:13 -0700
Subject: [PATCH 1370/1861] mm: swapoff: shmem_unuse() stop eviction without
 igrab()

The igrab() in shmem_unuse() looks good, but we forgot that it gives no
protection against concurrent unmounting: a point made by Konstantin
Khlebnikov eight years ago, and then fixed in 2.6.39 by 778dd893ae78
("tmpfs: fix race between umount and swapoff").  The current 5.1-rc
swapoff is liable to hit "VFS: Busy inodes after unmount of tmpfs.
Self-destruct in 5 seconds.  Have a nice day..." followed by GPF.

Once again, give up on using igrab(); but don't go back to making such
heavy-handed use of shmem_swaplist_mutex as last time: that would spoil
the new design, and I expect could deadlock inside shmem_swapin_page().

Instead, shmem_unuse() just raise a "stop_eviction" count in the shmem-
specific inode, and shmem_evict_inode() wait for that to go down to 0.
Call it "stop_eviction" rather than "swapoff_busy" because it can be put
to use for others later (huge tmpfs patches expect to use it).

That simplifies shmem_unuse(), protecting it from both unlink and
unmount; and in practice lets it locate all the swap in its first try.
But do not rely on that: there's still a theoretical case, when
shmem_writepage() might have been preempted after its get_swap_page(),
before making the swap entry visible to swapoff.

[hughd@google.com: remove incorrect list_del()]
  Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904091133570.1898@eggly.anvils
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081259400.1523@eggly.anvils
Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: "Alex Xu (Hello71)" <alex_y_xu@yahoo.ca>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Kelley Nielsen <kelleynnn@gmail.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vineeth Pillai <vpillai@digitalocean.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h |  1 +
 mm/shmem.c               | 40 ++++++++++++++++++----------------------
 mm/swapfile.c            | 11 +++++------
 3 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index f3fb1edb3526d..20d815a331454 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -21,6 +21,7 @@ struct shmem_inode_info {
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct simple_xattrs	xattrs;		/* list of xattrs */
+	atomic_t		stop_eviction;	/* hold when working on inode */
 	struct inode		vfs_inode;
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 859e8628071f1..2275a0ff7c305 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1081,9 +1081,14 @@ static void shmem_evict_inode(struct inode *inode)
 			}
 			spin_unlock(&sbinfo->shrinklist_lock);
 		}
-		if (!list_empty(&info->swaplist)) {
+		while (!list_empty(&info->swaplist)) {
+			/* Wait while shmem_unuse() is scanning this inode... */
+			wait_var_event(&info->stop_eviction,
+				       !atomic_read(&info->stop_eviction));
 			mutex_lock(&shmem_swaplist_mutex);
-			list_del_init(&info->swaplist);
+			/* ...but beware of the race if we peeked too early */
+			if (!atomic_read(&info->stop_eviction))
+				list_del_init(&info->swaplist);
 			mutex_unlock(&shmem_swaplist_mutex);
 		}
 	}
@@ -1227,36 +1232,27 @@ int shmem_unuse(unsigned int type, bool frontswap,
 		unsigned long *fs_pages_to_unuse)
 {
 	struct shmem_inode_info *info, *next;
-	struct inode *inode;
-	struct inode *prev_inode = NULL;
 	int error = 0;
 
 	if (list_empty(&shmem_swaplist))
 		return 0;
 
 	mutex_lock(&shmem_swaplist_mutex);
-
-	/*
-	 * The extra refcount on the inode is necessary to safely dereference
-	 * p->next after re-acquiring the lock. New shmem inodes with swap
-	 * get added to the end of the list and we will scan them all.
-	 */
 	list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
 		if (!info->swapped) {
 			list_del_init(&info->swaplist);
 			continue;
 		}
-
-		inode = igrab(&info->vfs_inode);
-		if (!inode)
-			continue;
-
+		/*
+		 * Drop the swaplist mutex while searching the inode for swap;
+		 * but before doing so, make sure shmem_evict_inode() will not
+		 * remove placeholder inode from swaplist, nor let it be freed
+		 * (igrab() would protect from unlink, but not from unmount).
+		 */
+		atomic_inc(&info->stop_eviction);
 		mutex_unlock(&shmem_swaplist_mutex);
-		if (prev_inode)
-			iput(prev_inode);
-		prev_inode = inode;
 
-		error = shmem_unuse_inode(inode, type, frontswap,
+		error = shmem_unuse_inode(&info->vfs_inode, type, frontswap,
 					  fs_pages_to_unuse);
 		cond_resched();
 
@@ -1264,14 +1260,13 @@ int shmem_unuse(unsigned int type, bool frontswap,
 		next = list_next_entry(info, swaplist);
 		if (!info->swapped)
 			list_del_init(&info->swaplist);
+		if (atomic_dec_and_test(&info->stop_eviction))
+			wake_up_var(&info->stop_eviction);
 		if (error)
 			break;
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
 
-	if (prev_inode)
-		iput(prev_inode);
-
 	return error;
 }
 
@@ -2238,6 +2233,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
 		spin_lock_init(&info->lock);
+		atomic_set(&info->stop_eviction, 0);
 		info->seals = F_SEAL_SEAL;
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->shrinklist);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 71383625a582c..cf63b5f01adf7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2116,12 +2116,11 @@ retry:
 	 * Under global memory pressure, swap entries can be reinserted back
 	 * into process space after the mmlist loop above passes over them.
 	 *
-	 * Limit the number of retries? No: when shmem_unuse()'s igrab() fails,
-	 * a shmem inode using swap is being evicted; and when mmget_not_zero()
-	 * above fails, that mm is likely to be freeing swap from exit_mmap().
-	 * Both proceed at their own independent pace: we could move them to
-	 * separate lists, and wait for those lists to be emptied; but it's
-	 * easier and more robust (though cpu-intensive) just to keep retrying.
+	 * Limit the number of retries? No: when mmget_not_zero() above fails,
+	 * that mm is likely to be freeing swap from exit_mmap(), which proceeds
+	 * at its own independent pace; and even shmem_writepage() could have
+	 * been preempted after get_swap_page(), temporarily hiding that swap.
+	 * It's easy and robust (though cpu-intensive) just to keep retrying.
 	 */
 	if (si->inuse_pages) {
 		if (!signal_pending(current))
-- 
GitLab


From 37803841c92d7b327147e0b1be3436423189e1cf Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Thu, 18 Apr 2019 17:50:16 -0700
Subject: [PATCH 1371/1861] mm/memory_hotplug: do not unlock after failing to
 take the device_hotplug_lock

When adding memory by probing a memory block in the sysfs interface,
there is an obvious issue where we will unlock the device_hotplug_lock
when we failed to takes it.

That issue was introduced in 8df1d0e4a265 ("mm/memory_hotplug: make
add_memory() take the device_hotplug_lock").

We should drop out in time when failing to take the device_hotplug_lock.

Link: http://lkml.kernel.org/r/1554696437-9593-1-git-send-email-zhongjiang@huawei.com
Fixes: 8df1d0e4a265 ("mm/memory_hotplug: make add_memory() take the device_hotplug_lock")
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Reported-by: Yang yingliang <yangyingliang@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index cb8347500ce28..e49028a604295 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -506,7 +506,7 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
 
 	ret = lock_device_hotplug_sysfs();
 	if (ret)
-		goto out;
+		return ret;
 
 	nid = memory_add_physaddr_to_nid(phys_addr);
 	ret = __add_memory(nid, phys_addr,
-- 
GitLab


From e8277b3b52240ec1caad8e6df278863e4bf42eac Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Thu, 18 Apr 2019 17:50:20 -0700
Subject: [PATCH 1372/1861] mm/vmstat.c: fix /proc/vmstat format for
 CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n

Commit 58bc4c34d249 ("mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly")
depends on skipping vmstat entries with empty name introduced in
7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in
/proc/vmstat") but reverted in b29940c1abd7 ("mm: rename and change
semantics of nr_indirectly_reclaimable_bytes").

So skipping no longer works and /proc/vmstat has misformatted lines " 0".

This patch simply shows debug counters "nr_tlb_remote_*" for UP.

Link: http://lkml.kernel.org/r/155481488468.467.4295519102880913454.stgit@buzz
Fixes: 58bc4c34d249 ("mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Roman Gushchin <guro@fb.com>
Cc: Jann Horn <jannh@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmstat.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 36b56f858f0f0..a7d493366a65b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1274,13 +1274,8 @@ const char * const vmstat_text[] = {
 #endif
 #endif /* CONFIG_MEMORY_BALLOON */
 #ifdef CONFIG_DEBUG_TLBFLUSH
-#ifdef CONFIG_SMP
 	"nr_tlb_remote_flush",
 	"nr_tlb_remote_flush_received",
-#else
-	"", /* nr_tlb_remote_flush */
-	"", /* nr_tlb_remote_flush_received */
-#endif /* CONFIG_SMP */
 	"nr_tlb_local_flush_all",
 	"nr_tlb_local_flush_one",
 #endif /* CONFIG_DEBUG_TLBFLUSH */
-- 
GitLab


From 8cd40d1d41ffc305d9aed77937896e1712b2490c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 18 Apr 2019 17:50:23 -0700
Subject: [PATCH 1373/1861] proc: fix map_files test on F29

F29 bans mapping first 64KB even for root making test fail.  Iterate
from address 0 until mmap() works.

Gentoo (root):

	openat(AT_FDCWD, "/dev/zero", O_RDONLY) = 3
	mmap(NULL, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0

Gentoo (non-root):

	openat(AT_FDCWD, "/dev/zero", O_RDONLY) = 3
	mmap(NULL, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EPERM (Operation not permitted)
	mmap(0x1000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x1000

F29 (root):

	openat(AT_FDCWD, "/dev/zero", O_RDONLY) = 3
	mmap(NULL, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x1000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x2000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x3000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x4000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x5000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x6000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x7000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x8000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x9000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0xa000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0xb000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0xc000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0xd000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0xe000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0xf000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = -1 EACCES (Permission denied)
	mmap(0x10000, 4096, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x10000

Now all proc tests succeed on F29 if run as root, at last!

Link: http://lkml.kernel.org/r/20190414123612.GB12971@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../selftests/proc/proc-self-map-files-002.c  | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
index 762cb01f2ca71..47b7473dedef7 100644
--- a/tools/testing/selftests/proc/proc-self-map-files-002.c
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -46,12 +46,9 @@ static void fail(const char *fmt, unsigned long a, unsigned long b)
 
 int main(void)
 {
-	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
-#ifdef __arm__
-	unsigned long va = 2 * PAGE_SIZE;
-#else
-	unsigned long va = 0;
-#endif
+	const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+	const unsigned long va_max = 1UL << 32;
+	unsigned long va;
 	void *p;
 	int fd;
 	unsigned long a, b;
@@ -60,10 +57,13 @@ int main(void)
 	if (fd == -1)
 		return 1;
 
-	p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
-	if (p == MAP_FAILED) {
-		if (errno == EPERM)
-			return 4;
+	for (va = 0; va < va_max; va += PAGE_SIZE) {
+		p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+		if (p == (void *)va)
+			break;
+	}
+	if (va == va_max) {
+		fprintf(stderr, "error: mmap doesn't like you\n");
 		return 1;
 	}
 
-- 
GitLab


From 68545aa1cda847c4fdda7e49331807f99f799838 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 18 Apr 2019 17:50:27 -0700
Subject: [PATCH 1374/1861] proc: fixup proc-pid-vm test

Silly sizeof(pointer) vs sizeof(uint8_t[]) bug.

Link: http://lkml.kernel.org/r/20190414123009.GA12971@avx2
Fixes: e483b0208784 ("proc: test /proc/*/maps, smaps, smaps_rollup, statm")
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/proc/proc-pid-vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index 7202bbac976ea..853aa164a401e 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -187,8 +187,8 @@ static int make_exe(const uint8_t *payload, size_t len)
 	ph.p_offset = 0;
 	ph.p_vaddr = VADDR;
 	ph.p_paddr = 0;
-	ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload);
-	ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload);
+	ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
+	ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
 	ph.p_align = 4096;
 
 	fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
-- 
GitLab


From 1a9f219157b22d0ffb340a9c5f431afd02cd2cf3 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Thu, 18 Apr 2019 17:50:30 -0700
Subject: [PATCH 1375/1861] mm/hotplug: treat CMA pages as unmovable

has_unmovable_pages() is used by allocating CMA and gigantic pages as
well as the memory hotplug.  The later doesn't know how to offline CMA
pool properly now, but if an unused (free) CMA page is encountered, then
has_unmovable_pages() happily considers it as a free memory and
propagates this up the call chain.  Memory offlining code then frees the
page without a proper CMA tear down which leads to an accounting issues.
Moreover if the same memory range is onlined again then the memory never
gets back to the CMA pool.

State after memory offline:

 # grep cma /proc/vmstat
 nr_free_cma 205824

 # cat /sys/kernel/debug/cma/cma-kvm_cma/count
 209920

Also, kmemleak still think those memory address are reserved below but
have already been used by the buddy allocator after onlining.  This
patch fixes the situation by treating CMA pageblocks as unmovable except
when has_unmovable_pages() is called as part of CMA allocation.

  Offlined Pages 4096
  kmemleak: Cannot insert 0xc000201f7d040008 into the object search tree (overlaps existing)
  Call Trace:
    dump_stack+0xb0/0xf4 (unreliable)
    create_object+0x344/0x380
    __kmalloc_node+0x3ec/0x860
    kvmalloc_node+0x58/0x110
    seq_read+0x41c/0x620
    __vfs_read+0x3c/0x70
    vfs_read+0xbc/0x1a0
    ksys_read+0x7c/0x140
    system_call+0x5c/0x70
  kmemleak: Kernel memory leak detector disabled
  kmemleak: Object 0xc000201cc8000000 (size 13757317120):
  kmemleak:   comm "swapper/0", pid 0, jiffies 4294937297
  kmemleak:   min_count = -1
  kmemleak:   count = 0
  kmemleak:   flags = 0x5
  kmemleak:   checksum = 0
  kmemleak:   backtrace:
       cma_declare_contiguous+0x2a4/0x3b0
       kvm_cma_reserve+0x11c/0x134
       setup_arch+0x300/0x3f8
       start_kernel+0x9c/0x6e8
       start_here_common+0x1c/0x4b0
  kmemleak: Automatic memory scanning thread ended

[cai@lca.pw: use is_migrate_cma_page() and update commit log]
  Link: http://lkml.kernel.org/r/20190416170510.20048-1-cai@lca.pw
Link: http://lkml.kernel.org/r/20190413002623.8967-1-cai@lca.pw
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d96ca5bc555bb..c6ce20aaf80bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8005,7 +8005,10 @@ void *__init alloc_large_system_hash(const char *tablename,
 bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 			 int migratetype, int flags)
 {
-	unsigned long pfn, iter, found;
+	unsigned long found;
+	unsigned long iter = 0;
+	unsigned long pfn = page_to_pfn(page);
+	const char *reason = "unmovable page";
 
 	/*
 	 * TODO we could make this much more efficient by not checking every
@@ -8015,17 +8018,20 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 	 * can still lead to having bootmem allocations in zone_movable.
 	 */
 
-	/*
-	 * CMA allocations (alloc_contig_range) really need to mark isolate
-	 * CMA pageblocks even when they are not movable in fact so consider
-	 * them movable here.
-	 */
-	if (is_migrate_cma(migratetype) &&
-			is_migrate_cma(get_pageblock_migratetype(page)))
-		return false;
+	if (is_migrate_cma_page(page)) {
+		/*
+		 * CMA allocations (alloc_contig_range) really need to mark
+		 * isolate CMA pageblocks even when they are not movable in fact
+		 * so consider them movable here.
+		 */
+		if (is_migrate_cma(migratetype))
+			return false;
+
+		reason = "CMA page";
+		goto unmovable;
+	}
 
-	pfn = page_to_pfn(page);
-	for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+	for (found = 0; iter < pageblock_nr_pages; iter++) {
 		unsigned long check = pfn + iter;
 
 		if (!pfn_valid_within(check))
@@ -8105,7 +8111,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 unmovable:
 	WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
 	if (flags & REPORT_FAILURE)
-		dump_page(pfn_to_page(pfn+iter), "unmovable page");
+		dump_page(pfn_to_page(pfn + iter), reason);
 	return true;
 }
 
-- 
GitLab


From 3b991208b897f52507168374033771a984b947b1 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 18 Apr 2019 17:50:34 -0700
Subject: [PATCH 1376/1861] mm: fix inactive list balancing between NUMA nodes
 and cgroups

During !CONFIG_CGROUP reclaim, we expand the inactive list size if it's
thrashing on the node that is about to be reclaimed.  But when cgroups
are enabled, we suddenly ignore the node scope and use the cgroup scope
only.  The result is that pressure bleeds between NUMA nodes depending
on whether cgroups are merely compiled into Linux.  This behavioral
difference is unexpected and undesirable.

When the refault adaptivity of the inactive list was first introduced,
there were no statistics at the lruvec level - the intersection of node
and memcg - so it was better than nothing.

But now that we have that infrastructure, use lruvec_page_state() to
make the list balancing decision always NUMA aware.

[hannes@cmpxchg.org: fix bisection hole]
  Link: http://lkml.kernel.org/r/20190417155241.GB23013@cmpxchg.org
Link: http://lkml.kernel.org/r/20190412144438.2645-1-hannes@cmpxchg.org
Fixes: 2a2e48854d70 ("mm: vmscan: fix IO/refault regression in cache workingset transition")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index a5ad0b35ab8e3..a815f73ee4d5b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2176,7 +2176,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
  *   10TB     320        32GB
  */
 static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
-				 struct mem_cgroup *memcg,
 				 struct scan_control *sc, bool actual_reclaim)
 {
 	enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
@@ -2197,16 +2196,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
 	inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
 	active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
 
-	if (memcg)
-		refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
-	else
-		refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
 	/*
 	 * When refaults are being observed, it means a new workingset
 	 * is being established. Disable active list protection to get
 	 * rid of the stale workingset quickly.
 	 */
+	refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
 	if (file && actual_reclaim && lruvec->refaults != refaults) {
 		inactive_ratio = 0;
 	} else {
@@ -2227,12 +2222,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
 }
 
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
-				 struct lruvec *lruvec, struct mem_cgroup *memcg,
-				 struct scan_control *sc)
+				 struct lruvec *lruvec, struct scan_control *sc)
 {
 	if (is_active_lru(lru)) {
-		if (inactive_list_is_low(lruvec, is_file_lru(lru),
-					 memcg, sc, true))
+		if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
 			shrink_active_list(nr_to_scan, lruvec, sc, lru);
 		return 0;
 	}
@@ -2332,7 +2325,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 			 * anonymous pages on the LRU in eligible zones.
 			 * Otherwise, the small LRU gets thrashed.
 			 */
-			if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+			if (!inactive_list_is_low(lruvec, false, sc, false) &&
 			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
 					>> sc->priority) {
 				scan_balance = SCAN_ANON;
@@ -2350,7 +2343,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 	 * lruvec even if it has plenty of old anonymous pages unless the
 	 * system is under heavy pressure.
 	 */
-	if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+	if (!inactive_list_is_low(lruvec, true, sc, false) &&
 	    lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
 		scan_balance = SCAN_FILE;
 		goto out;
@@ -2503,7 +2496,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
 				nr[lru] -= nr_to_scan;
 
 				nr_reclaimed += shrink_list(lru, nr_to_scan,
-							    lruvec, memcg, sc);
+							    lruvec, sc);
 			}
 		}
 
@@ -2570,7 +2563,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
 	 * Even if we did not try to evict anon pages at all, we want to
 	 * rebalance the anon lru active/inactive ratio.
 	 */
-	if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+	if (inactive_list_is_low(lruvec, false, sc, true))
 		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
 				   sc, LRU_ACTIVE_ANON);
 }
@@ -2969,12 +2962,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
 		unsigned long refaults;
 		struct lruvec *lruvec;
 
-		if (memcg)
-			refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
-		else
-			refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
 		lruvec = mem_cgroup_lruvec(pgdat, memcg);
+		refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
 		lruvec->refaults = refaults;
 	} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
 }
@@ -3339,7 +3328,7 @@ static void age_active_anon(struct pglist_data *pgdat,
 	do {
 		struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
 
-		if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+		if (inactive_list_is_low(lruvec, false, sc, true))
 			shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
 					   sc, LRU_ACTIVE_ANON);
 
-- 
GitLab


From 40453c4f9bb6d166a56a102a8c51dd24b0801557 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 18 Apr 2019 17:50:37 -0700
Subject: [PATCH 1377/1861] kcov: improve CONFIG_ARCH_HAS_KCOV help text

The help text for CONFIG_ARCH_HAS_KCOV is stale, and describes the
feature as being enabled only for x86_64, when it is now enabled for
several architectures, including arm, arm64, powerpc, and s390.

Let's remove that stale help text, and update it along the lines of hat
for ARCH_HAS_FORTIFY_SOURCE, better describing when an architecture
should select CONFIG_ARCH_HAS_KCOV.

Link: http://lkml.kernel.org/r/20190412102733.5154-1-mark.rutland@arm.com
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d9e81779e373..00dbcdbc9a0d3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -753,9 +753,9 @@ endmenu # "Memory Debugging"
 config ARCH_HAS_KCOV
 	bool
 	help
-	  KCOV does not have any arch-specific code, but currently it is enabled
-	  only for x86_64. KCOV requires testing on other archs, and most likely
-	  disabling of instrumentation for some early boot code.
+	  An architecture should select this when it can successfully
+	  build and run with CONFIG_KCOV. This typically requires
+	  disabling instrumentation for some early boot code.
 
 config CC_HAS_SANCOV_TRACE_PC
 	def_bool $(cc-option,-fsanitize-coverage=trace-pc)
-- 
GitLab


From 8f4a8c12cafe54535f334a09be7ec7f236134764 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Date: Thu, 18 Apr 2019 17:50:41 -0700
Subject: [PATCH 1378/1861] kernel/watchdog_hld.c: hard lockup message should
 end with a newline

Separate print_modules() and hard lockup error message.

Before the patch:

  NMI watchdog: Watchdog detected hard LOCKUP on cpu 1Modules linked in: nls_cp437

Link: http://lkml.kernel.org/r/20190412062557.2700-1-sergey.senozhatsky@gmail.com
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/watchdog_hld.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 71381168dedef..247bf0b1582ca 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -135,7 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
 		if (__this_cpu_read(hard_watchdog_warn) == true)
 			return;
 
-		pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
+			 this_cpu);
 		print_modules();
 		print_irqtrace_events(current);
 		if (regs)
-- 
GitLab


From 6041186a32585fc7a1d0f6cfe2f138b05fdc3c82 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 18 Apr 2019 17:50:44 -0700
Subject: [PATCH 1379/1861] init: initialize jump labels before command line
 option parsing

When a module option, or core kernel argument, toggles a static-key it
requires jump labels to be initialized early.  While x86, PowerPC, and
ARM64 arrange for jump_label_init() to be called before parse_args(),
ARM does not.

  Kernel command line: rdinit=/sbin/init page_alloc.shuffle=1 panic=-1 console=ttyAMA0,115200 page_alloc.shuffle=1
  ------------[ cut here ]------------
  WARNING: CPU: 0 PID: 0 at ./include/linux/jump_label.h:303
  page_alloc_shuffle+0x12c/0x1ac
  static_key_enable(): static key 'page_alloc_shuffle_key+0x0/0x4' used
  before call to jump_label_init()
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper Not tainted
  5.1.0-rc4-next-20190410-00003-g3367c36ce744 #1
  Hardware name: ARM Integrator/CP (Device Tree)
  [<c0011c68>] (unwind_backtrace) from [<c000ec48>] (show_stack+0x10/0x18)
  [<c000ec48>] (show_stack) from [<c07e9710>] (dump_stack+0x18/0x24)
  [<c07e9710>] (dump_stack) from [<c001bb1c>] (__warn+0xe0/0x108)
  [<c001bb1c>] (__warn) from [<c001bb88>] (warn_slowpath_fmt+0x44/0x6c)
  [<c001bb88>] (warn_slowpath_fmt) from [<c0b0c4a8>]
  (page_alloc_shuffle+0x12c/0x1ac)
  [<c0b0c4a8>] (page_alloc_shuffle) from [<c0b0c550>] (shuffle_store+0x28/0x48)
  [<c0b0c550>] (shuffle_store) from [<c003e6a0>] (parse_args+0x1f4/0x350)
  [<c003e6a0>] (parse_args) from [<c0ac3c00>] (start_kernel+0x1c0/0x488)

Move the fallback call to jump_label_init() to occur before
parse_args().

The redundant calls to jump_label_init() in other archs are left intact
in case they have static key toggling use cases that are even earlier
than option parsing.

Link: http://lkml.kernel.org/r/155544804466.1032396.13418949511615676665.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Guenter Roeck <groeck@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Russell King <rmk@armlinux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/init/main.c b/init/main.c
index 598e278b46f74..7d4025d665eb9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -582,6 +582,8 @@ asmlinkage __visible void __init start_kernel(void)
 	page_alloc_init();
 
 	pr_notice("Kernel command line: %s\n", boot_command_line);
+	/* parameters may set static keys */
+	jump_label_init();
 	parse_early_param();
 	after_dashes = parse_args("Booting kernel",
 				  static_command_line, __start___param,
@@ -591,8 +593,6 @@ asmlinkage __visible void __init start_kernel(void)
 		parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
 			   NULL, set_init_arg);
 
-	jump_label_init();
-
 	/*
 	 * These use large bootmem allocations and must precede
 	 * kmem_cache_init()
-- 
GitLab


From dce5b0bdeec61bdbee56121ceb1d014151d5cab1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Apr 2019 17:50:48 -0700
Subject: [PATCH 1380/1861] mm/kmemleak.c: fix unused-function warning

The only references outside of the #ifdef have been removed, so now we
get a warning in non-SMP configurations:

  mm/kmemleak.c:1404:13: error: unused function 'scan_large_block' [-Werror,-Wunused-function]

Add a new #ifdef around it.

Link: http://lkml.kernel.org/r/20190416123148.3502045-1-arnd@arndb.de
Fixes: 298a32b13208 ("kmemleak: powerpc: skip scanning holes in the .bss section")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Vincent Whitchurch <vincent.whitchurch@axis.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kmemleak.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 6c318f5ac234f..2e435b8142e51 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1401,6 +1401,7 @@ static void scan_block(void *_start, void *_end,
 /*
  * Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency.
  */
+#ifdef CONFIG_SMP
 static void scan_large_block(void *start, void *end)
 {
 	void *next;
@@ -1412,6 +1413,7 @@ static void scan_large_block(void *start, void *end)
 		cond_resched();
 	}
 }
+#endif
 
 /*
  * Scan a memory block corresponding to a kmemleak_object. A condition is
-- 
GitLab


From 04f5866e41fb70690e28397487d8bd8eea7d712a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 18 Apr 2019 17:50:52 -0700
Subject: [PATCH 1381/1861] coredump: fix race condition between
 mmget_not_zero()/get_task_mm() and core dumping

The core dumping code has always run without holding the mmap_sem for
writing, despite that is the only way to ensure that the entire vma
layout will not change from under it.  Only using some signal
serialization on the processes belonging to the mm is not nearly enough.
This was pointed out earlier.  For example in Hugh's post from Jul 2017:

  https://lkml.kernel.org/r/alpine.LSU.2.11.1707191716030.2055@eggly.anvils

  "Not strictly relevant here, but a related note: I was very surprised
   to discover, only quite recently, how handle_mm_fault() may be called
   without down_read(mmap_sem) - when core dumping. That seems a
   misguided optimization to me, which would also be nice to correct"

In particular because the growsdown and growsup can move the
vm_start/vm_end the various loops the core dump does around the vma will
not be consistent if page faults can happen concurrently.

Pretty much all users calling mmget_not_zero()/get_task_mm() and then
taking the mmap_sem had the potential to introduce unexpected side
effects in the core dumping code.

Adding mmap_sem for writing around the ->core_dump invocation is a
viable long term fix, but it requires removing all copy user and page
faults and to replace them with get_dump_page() for all binary formats
which is not suitable as a short term fix.

For the time being this solution manually covers the places that can
confuse the core dump either by altering the vma layout or the vma flags
while it runs.  Once ->core_dump runs under mmap_sem for writing the
function mmget_still_valid() can be dropped.

Allowing mmap_sem protected sections to run in parallel with the
coredump provides some minor parallelism advantage to the swapoff code
(which seems to be safe enough by never mangling any vma field and can
keep doing swapins in parallel to the core dumping) and to some other
corner case.

In order to facilitate the backporting I added "Fixes: 86039bd3b4e6"
however the side effect of this same race condition in /proc/pid/mem
should be reproducible since before 2.6.12-rc2 so I couldn't add any
other "Fixes:" because there's no hash beyond the git genesis commit.

Because find_extend_vma() is the only location outside of the process
context that could modify the "mm" structures under mmap_sem for
reading, by adding the mmget_still_valid() check to it, all other cases
that take the mmap_sem for reading don't need the new check after
mmget_not_zero()/get_task_mm().  The expand_stack() in page fault
context also doesn't need the new check, because all tasks under core
dumping are frozen.

Link: http://lkml.kernel.org/r/20190325224949.11068-1-aarcange@redhat.com
Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Jann Horn <jannh@google.com>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jann Horn <jannh@google.com>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/infiniband/core/uverbs_main.c |  3 +++
 fs/proc/task_mmu.c                    | 18 ++++++++++++++++++
 fs/userfaultfd.c                      |  9 +++++++++
 include/linux/sched/mm.h              | 21 +++++++++++++++++++++
 mm/mmap.c                             |  7 ++++++-
 5 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 70b7d80431a9b..f2e7ffe6fc546 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -993,6 +993,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 		 * will only be one mm, so no big deal.
 		 */
 		down_write(&mm->mmap_sem);
+		if (!mmget_still_valid(mm))
+			goto skip_mm;
 		mutex_lock(&ufile->umap_lock);
 		list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
 					  list) {
@@ -1007,6 +1009,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 			vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
 		}
 		mutex_unlock(&ufile->umap_lock);
+	skip_mm:
 		up_write(&mm->mmap_sem);
 		mmput(mm);
 	}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 92a91e7816d84..95ca1fe7283cf 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1143,6 +1143,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 					count = -EINTR;
 					goto out_mm;
 				}
+				/*
+				 * Avoid to modify vma->vm_flags
+				 * without locked ops while the
+				 * coredump reads the vm_flags.
+				 */
+				if (!mmget_still_valid(mm)) {
+					/*
+					 * Silently return "count"
+					 * like if get_task_mm()
+					 * failed. FIXME: should this
+					 * function have returned
+					 * -ESRCH if get_task_mm()
+					 * failed like if
+					 * get_proc_task() fails?
+					 */
+					up_write(&mm->mmap_sem);
+					goto out_mm;
+				}
 				for (vma = mm->mmap; vma; vma = vma->vm_next) {
 					vma->vm_flags &= ~VM_SOFTDIRTY;
 					vma_set_page_prot(vma);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 89800fc7dc9d5..f5de1e726356a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -629,6 +629,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 
 		/* the various vma->vm_userfaultfd_ctx still points to it */
 		down_write(&mm->mmap_sem);
+		/* no task can run (and in turn coredump) yet */
+		VM_WARN_ON(!mmget_still_valid(mm));
 		for (vma = mm->mmap; vma; vma = vma->vm_next)
 			if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
 				vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -883,6 +885,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 	 * taking the mmap_sem for writing.
 	 */
 	down_write(&mm->mmap_sem);
+	if (!mmget_still_valid(mm))
+		goto skip_mm;
 	prev = NULL;
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		cond_resched();
@@ -905,6 +909,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 		vma->vm_flags = new_flags;
 		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 	}
+skip_mm:
 	up_write(&mm->mmap_sem);
 	mmput(mm);
 wakeup:
@@ -1333,6 +1338,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		goto out;
 
 	down_write(&mm->mmap_sem);
+	if (!mmget_still_valid(mm))
+		goto out_unlock;
 	vma = find_vma_prev(mm, start, &prev);
 	if (!vma)
 		goto out_unlock;
@@ -1520,6 +1527,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		goto out;
 
 	down_write(&mm->mmap_sem);
+	if (!mmget_still_valid(mm))
+		goto out_unlock;
 	vma = find_vma_prev(mm, start, &prev);
 	if (!vma)
 		goto out_unlock;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 0cd9f10423fb8..a3fda9f024c3c 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,27 @@ static inline void mmdrop(struct mm_struct *mm)
 		__mmdrop(mm);
 }
 
+/*
+ * This has to be called after a get_task_mm()/mmget_not_zero()
+ * followed by taking the mmap_sem for writing before modifying the
+ * vmas or anything the coredump pretends not to change from under it.
+ *
+ * NOTE: find_extend_vma() called from GUP context is the only place
+ * that can modify the "mm" (notably the vm_start/end) under mmap_sem
+ * for reading and outside the context of the process, so it is also
+ * the only case that holds the mmap_sem for reading that must call
+ * this function. Generally if the mmap_sem is hold for reading
+ * there's no need of this check after get_task_mm()/mmget_not_zero().
+ *
+ * This function can be obsoleted and the check can be removed, after
+ * the coredump code will hold the mmap_sem for writing before
+ * invoking the ->core_dump methods.
+ */
+static inline bool mmget_still_valid(struct mm_struct *mm)
+{
+	return likely(!mm->core_state);
+}
+
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
  * @mm: The address space to pin.
diff --git a/mm/mmap.c b/mm/mmap.c
index 41eb48d9b5276..bd7b9f293b391 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -45,6 +45,7 @@
 #include <linux/moduleparam.h>
 #include <linux/pkeys.h>
 #include <linux/oom.h>
+#include <linux/sched/mm.h>
 
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
@@ -2525,7 +2526,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 	vma = find_vma_prev(mm, addr, &prev);
 	if (vma && (vma->vm_start <= addr))
 		return vma;
-	if (!prev || expand_stack(prev, addr))
+	/* don't alter vm_end if the coredump is running */
+	if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
 		return NULL;
 	if (prev->vm_flags & VM_LOCKED)
 		populate_vma_page_range(prev, addr, prev->vm_end, NULL);
@@ -2551,6 +2553,9 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 		return vma;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		return NULL;
+	/* don't alter vm_start if the coredump is running */
+	if (!mmget_still_valid(mm))
+		return NULL;
 	start = vma->vm_start;
 	if (expand_stack(vma, addr))
 		return NULL;
-- 
GitLab


From 13e792a19d4e3a1c64e94197ba357685fd584ded Mon Sep 17 00:00:00 2001
From: Laurent Gauthier <laurent.gauthier@soccasys.com>
Date: Sat, 5 Jan 2019 00:07:45 +0100
Subject: [PATCH 1382/1861] tick: Fix typos in comments

Signed-off-by: Laurent Gauthier <laurent.gauthier@soccasys.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-broadcast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 7541cbca695eb..e51778c312f1c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -809,13 +809,13 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 			 * either the CPU handling the broadcast
 			 * interrupt or we got woken by something else.
 			 *
-			 * We are not longer in the broadcast mask, so
+			 * We are no longer in the broadcast mask, so
 			 * if the cpu local expiry time is already
 			 * reached, we would reprogram the cpu local
 			 * timer with an already expired event.
 			 *
 			 * This can lead to a ping-pong when we return
-			 * to idle and therefor rearm the broadcast
+			 * to idle and therefore rearm the broadcast
 			 * timer before the cpu local timer was able
 			 * to fire. This happens because the forced
 			 * reprogramming makes sure that the event
-- 
GitLab


From 2ee27796f298b710992a677a7e4d35c8c588b17e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 30 Dec 2018 18:27:15 +0100
Subject: [PATCH 1383/1861] x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to
 normal" message's log priority

The "ENERGY_PERF_BIAS: Set to 'normal', was 'performance'" message triggers
on pretty much every Intel machine. The purpose of log messages with
a warning level is to notify the user of something which potentially is
a problem, or at least somewhat unexpected.

This message clearly does not match those criteria, so lower its log
priority from warning to info.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181230172715.17469-1-hdegoede@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/intel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df58..3142fd7a9b322 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -611,8 +611,8 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
 	if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
 		return;
 
-	pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
-	pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
+	pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+	pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
 	epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
 	wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
 }
-- 
GitLab


From 28156d7678431312b463361b23a218a1b5645ba5 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 28 Dec 2018 07:24:13 +0000
Subject: [PATCH 1384/1861] x86/mce: Fix debugfs_simple_attr.cocci warnings

Use DEFINE_DEBUGFS_ATTRIBUTE() rather than DEFINE_SIMPLE_ATTRIBUTE()
for debugfs files.

Semantic patch information:

  Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file()
  imposes some significant overhead as compared to
  DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe().

Generated by: scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
Cc: kernel-janitors@vger.kernel.org
Link: http://lkml.kernel.org/r/1545981853-70877-1-git-send-email-yuehaibing@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mce/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 58925e7ccb276..3e081428117c5 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2429,8 +2429,8 @@ static int fake_panic_set(void *data, u64 val)
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
-			fake_panic_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
+			 "%llu\n");
 
 static int __init mcheck_debugfs_init(void)
 {
@@ -2439,8 +2439,8 @@ static int __init mcheck_debugfs_init(void)
 	dmce = mce_get_debugfs_dir();
 	if (!dmce)
 		return -ENOMEM;
-	ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
-					  &fake_panic_fops);
+	ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
+						 NULL, &fake_panic_fops);
 	if (!ffake_panic)
 		return -ENOMEM;
 
-- 
GitLab


From 987ddbe4870b53623d76ac64044c55a13e368113 Mon Sep 17 00:00:00 2001
From: David Wang <davidwang@zhaoxin.com>
Date: Thu, 27 Dec 2018 16:41:50 +0800
Subject: [PATCH 1385/1861] x86/power: Optimize C3 entry on Centaur CPUs

For new Centaur CPUs the ucode will take care of the preservation of cache coherence
between CPU cores in C-states regardless of how deep the C-states are. So, it is not
necessary to flush the caches in software befor entering C3. This useless operation
will cause performance drop for the cores which share some caches with the idling core.

Signed-off-by: David Wang <davidwang@zhaoxin.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: brucechang@via-alliance.com
Cc: cooperyan@zhaoxin.com
Cc: len.brown@intel.com
Cc: linux-pm@kernel.org
Cc: qiyuanwang@zhaoxin.com
Cc: rjw@rjwysocki.net
Cc: timguo@zhaoxin.com
Link: http://lkml.kernel.org/r/1545900110-2757-1-git-send-email-davidwang@zhaoxin.com
[ Tidy up the comment. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/acpi/cstate.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 158ad1483c435..cb6e076a6d398 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -51,6 +51,18 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
 	if (c->x86_vendor == X86_VENDOR_INTEL &&
 	    (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
 			flags->bm_control = 0;
+	/*
+	 * For all recent Centaur CPUs, the ucode will make sure that each
+	 * core can keep cache coherence with each other while entering C3
+	 * type state. So, set bm_check to 1 to indicate that the kernel
+	 * doesn't need to execute a cache flush operation (WBINVD) when
+	 * entering C3 type state.
+	 */
+	if (c->x86_vendor == X86_VENDOR_CENTAUR) {
+		if (c->x86 > 6 || (c->x86 == 6 && c->x86_model == 0x0f &&
+		    c->x86_stepping >= 0x0e))
+			flags->bm_check = 1;
+	}
 }
 EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
 
-- 
GitLab


From f28b11a2abd9cf5535baa03e28fc63ad0e14f52a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 21 Dec 2018 13:36:56 -0800
Subject: [PATCH 1386/1861] x86/fault: Reword initial BUG message for unhandled
 page faults

Reword the NULL pointer dereference case to simply state that a NULL
pointer was dereferenced, i.e. drop "unable to handle" as that implies
that there are instances where the kernel actual does handle NULL
pointer dereferences, which is not true barring funky exception fixup.

For the non-NULL case, replace "kernel paging request" with "page fault"
as the kernel can technically oops on faults that originated in user
code.  Dropping "kernel" also allows future patches to provide detailed
information on where the fault occurred, e.g. user vs. kernel, without
conflicting with the initial BUG message.

In both cases, replace "at address=" with wording more appropriate to
the oops, as "at" may be interpreted as stating that the address is the
RIP of the instruction that faulted.

Last, and probably least, further qualify the NULL-pointer path by
checking that the fault actually originated in kernel code.  It's
technically possible for userspace to map address 0, and not printing
a super specific message is the least of our worries if the kernel does
manage to oops on an actual NULL pointer dereference from userspace.

Before:
    BUG: unable to handle kernel NULL pointer dereference at ffffbeef00000000
    BUG: unable to handle kernel paging request at ffffbeef00000000

After:
    BUG: kernel NULL pointer dereference, address = 0000000000000008
    BUG: unable to handle page fault for address = ffffbeef00000000

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20181221213657.27628-2-sean.j.christopherson@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/fault.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 667f1da36208e..df2c5cdef5c4b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -644,9 +644,12 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
 				from_kuid(&init_user_ns, current_uid()));
 	}
 
-	pr_alert("BUG: unable to handle kernel %s at %px\n",
-		 address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
-		 (void *)address);
+	if (address < PAGE_SIZE && !user_mode(regs))
+		pr_alert("BUG: kernel NULL pointer dereference, address = %px\n",
+			(void *)address);
+	else
+		pr_alert("BUG: unable to handle page fault for address = %px\n",
+			(void *)address);
 
 	err_txt[0] = 0;
 
-- 
GitLab


From 18ea35c5ed9921867194a8efc2a0ac2d5a3c7d2a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Fri, 21 Dec 2018 13:36:57 -0800
Subject: [PATCH 1387/1861] x86/fault: Decode and print #PF oops in human
 readable form

Linus pointed out that deciphering the raw #PF error code and printing
a more human readable message are two different things, and also that
printing the negative cases is mostly just noise[1].  For example, the
USER bit doesn't mean the fault originated in user code and stating
that an oops wasn't due to a protection keys violation isn't interesting
since an oops on a keys violation is a one-in-a-million scenario.

Remove the per-bit decoding of the error code and instead print:
  - the raw error code
  - why the fault occurred
  - the effective privilege level of the access
  - the type of access
  - whether the fault originated in user code or kernel code

This provides the user with the information needed to triage 99.9% of
oopses without polluting the log with useless information or conflating
the error_code with the CPL.

Sample output:

    BUG: kernel NULL pointer dereference, address = 0000000000000008
    #PF: supervisor-privileged instruction fetch from kernel code
    #PF: error_code(0x0010) - not-present page

    BUG: unable to handle page fault for address = ffffbeef00000000
    #PF: supervisor-privileged instruction fetch from kernel code
    #PF: error_code(0x0010) - not-present page

    BUG: unable to handle page fault for address = ffffc90000230000
    #PF: supervisor-privileged write access from kernel code
    #PF: error_code(0x000b) - reserved bit violation

[1] https://lkml.kernel.org/r/CAHk-=whk_fsnxVMvF1T2fFCaP2WrvSybABrLQCWLJyCvHw6NKA@mail.gmail.com

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20181221213657.27628-3-sean.j.christopherson@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/fault.c | 42 +++++++++++-------------------------------
 1 file changed, 11 insertions(+), 31 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index df2c5cdef5c4b..74c9204c5751c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -603,24 +603,9 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
 		 name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
 }
 
-/*
- * This helper function transforms the #PF error_code bits into
- * "[PROT] [USER]" type of descriptive, almost human-readable error strings:
- */
-static void err_str_append(unsigned long error_code, char *buf, unsigned long mask, const char *txt)
-{
-	if (error_code & mask) {
-		if (buf[0])
-			strcat(buf, " ");
-		strcat(buf, txt);
-	}
-}
-
 static void
 show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
-	char err_txt[64];
-
 	if (!oops_may_print())
 		return;
 
@@ -651,27 +636,22 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
 		pr_alert("BUG: unable to handle page fault for address = %px\n",
 			(void *)address);
 
-	err_txt[0] = 0;
-
-	/*
-	 * Note: length of these appended strings including the separation space and the
-	 * zero delimiter must fit into err_txt[].
-	 */
-	err_str_append(error_code, err_txt, X86_PF_PROT,  "[PROT]" );
-	err_str_append(error_code, err_txt, X86_PF_WRITE, "[WRITE]");
-	err_str_append(error_code, err_txt, X86_PF_USER,  "[USER]" );
-	err_str_append(error_code, err_txt, X86_PF_RSVD,  "[RSVD]" );
-	err_str_append(error_code, err_txt, X86_PF_INSTR, "[INSTR]");
-	err_str_append(error_code, err_txt, X86_PF_PK,    "[PK]"   );
-
-	pr_alert("#PF error: %s\n", error_code ? err_txt : "[normal kernel read fault]");
+	pr_alert("#PF: %s-privileged %s from %s code\n",
+		 (error_code & X86_PF_USER)  ? "user" : "supervisor",
+		 (error_code & X86_PF_INSTR) ? "instruction fetch" :
+		 (error_code & X86_PF_WRITE) ? "write access" :
+					       "read access",
+			     user_mode(regs) ? "user" : "kernel");
+	pr_alert("#PF: error_code(0x%04lx) - %s\n", error_code,
+		 !(error_code & X86_PF_PROT) ? "not-present page" :
+		 (error_code & X86_PF_RSVD)  ? "reserved bit violation" :
+		 (error_code & X86_PF_PK)    ? "protection keys violation" :
+					       "permissions violation");
 
 	if (!(error_code & X86_PF_USER) && user_mode(regs)) {
 		struct desc_ptr idt, gdt;
 		u16 ldtr, tr;
 
-		pr_alert("This was a system access from user code\n");
-
 		/*
 		 * This can happen for quite a few reasons.  The more obvious
 		 * ones are faults accessing the GDT, or LDT.  Perhaps
-- 
GitLab


From b40fabc05ea047f6af5933d26a5483873340b0d4 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 19 Apr 2019 10:31:27 +0800
Subject: [PATCH 1388/1861] block: kill all_q_node in request_queue

all_q_node has not been used since commit 4b855ad37194 ("blk-mq: Create
hctx for each present CPU"), so remove it.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c58a3b2bf003..317ab30d29046 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -548,7 +548,6 @@ struct request_queue {
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
 	struct percpu_ref	q_usage_counter;
-	struct list_head	all_q_node;
 
 	struct blk_mq_tag_set	*tag_set;
 	struct list_head	tag_set_list;
-- 
GitLab


From 6bedf00e55e5dd0a4ed1ad3f06131edd6fb56ec8 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 17 Apr 2019 09:11:26 +0800
Subject: [PATCH 1389/1861] block: make sure that bvec length can't be overflow

bvec->bv_offset may be bigger than PAGE_SIZE sometimes, such as,
when one bio is splitted in the middle of one bvec via bio_split(),
and bi_iter.bi_bvec_done is used to build offset of the 1st bvec of
remained bio. And the remained bio's bvec may be re-submitted to fs
layer via ITER_IBVEC, such as loop and nvme-loop.

So we have to make sure that every bvec's offset is less than
PAGE_SIZE from bio_for_each_segment_all() because some drivers(loop,
nvme-loop) passes the splitted bvec to fs layer via ITER_BVEC.

This patch fixes this issue reported by Zhang Yi When running nvme/011.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Yi Zhang <yi.zhang@redhat.com>
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Fixes: 6dc4f100c175 ("block: allow bio_for_each_segment_all() to iterate over multi-page bvec")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 3bc91879e1e2b..ff13cbc1887db 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -160,8 +160,9 @@ static inline void bvec_advance(const struct bio_vec *bvec,
 		bv->bv_page = nth_page(bv->bv_page, 1);
 		bv->bv_offset = 0;
 	} else {
-		bv->bv_page = bvec->bv_page;
-		bv->bv_offset = bvec->bv_offset;
+		bv->bv_page = bvec_nth_page(bvec->bv_page, bvec->bv_offset /
+					    PAGE_SIZE);
+		bv->bv_offset = bvec->bv_offset & ~PAGE_MASK;
 	}
 	bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
 			   bvec->bv_len - iter_all->done);
-- 
GitLab


From 8fea0f59e97df3b9b8d2a76af54f633f4586751b Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Wed, 19 Dec 2018 16:16:47 +0100
Subject: [PATCH 1390/1861] x86/topology: Make DEBUG_HOTPLUG_CPU0 pr_info()
 more descriptive

DEBUG_HOTPLUG_CPU0 debug feature offlines a CPU as early as possible
allowing userspace to boot up without that CPU (so that it is possible
to check for unwanted dependencies towards the offlined CPU). After
doing so it emits a "CPU %u is now offline" pr_info, which is not enough
descriptive of why the CPU was offlined (e.g., one might be running with
a config that triggered some problem, not being aware that CONFIG_DEBUG_
HOTPLUG_CPU0 is set).

Add a bit more of informative text to the pr_info, so that it is
immediately obvious why a CPU has been offlined in early boot stages.

Background:

Got to scratch my head a bit while debugging a WARNING splat related to
the offlining of CPU0. Without being aware yet of this debug option it
wasn't immediately obvious why CPU0 was being offlined by the kernel.

Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: fenghua.yu@intel.com
Link: http://lkml.kernel.org/r/20181219151647.15073-1-juri.lelli@redhat.com
[ Merge line-broken line. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 738bf42b0218f..be5bc2e47c71e 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -71,7 +71,7 @@ int _debug_hotplug_cpu(int cpu, int action)
 	case 0:
 		ret = cpu_down(cpu);
 		if (!ret) {
-			pr_info("CPU %u is now offline\n", cpu);
+			pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu);
 			dev->offline = true;
 			kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
 		} else
-- 
GitLab


From b6fbbf31d15b5072250ec6ed79e415a1160e5621 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Wed, 19 Dec 2018 14:34:44 +0100
Subject: [PATCH 1391/1861] cgroup/cpuset: Update stale
 generate_sched_domains() comments

Commit:

  fc560a26acce ("cpuset: replace cpuset->stack_list with cpuset_for_each_descendant_pre()")

removed the local list (q) that was used to perform a top-down scan
of all cpusets; however, comments mentioning it were not updated.

Update comments to reflect current implementation.

Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: cgroups@vger.kernel.org
Cc: lizefan@huawei.com
Link: http://lkml.kernel.org/r/20181219133445.31982-1-juri.lelli@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/cgroup/cpuset.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 4834c4214e9cd..6a1942ed781c5 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -740,11 +740,10 @@ static inline int nr_cpusets(void)
  * Must be called with cpuset_mutex held.
  *
  * The three key local variables below are:
- *    q  - a linked-list queue of cpuset pointers, used to implement a
- *	   top-down scan of all cpusets.  This scan loads a pointer
- *	   to each cpuset marked is_sched_load_balance into the
- *	   array 'csa'.  For our purposes, rebuilding the schedulers
- *	   sched domains, we can ignore !is_sched_load_balance cpusets.
+ *    cp - cpuset pointer, used (together with pos_css) to perform a
+ *	   top-down scan of all cpusets. For our purposes, rebuilding
+ *	   the schedulers sched domains, we can ignore !is_sched_load_
+ *	   balance cpusets.
  *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
  *	   that need to be load balanced, for convenient iterative
  *	   access by the subsequent code that finds the best partition,
@@ -775,7 +774,7 @@ static inline int nr_cpusets(void)
 static int generate_sched_domains(cpumask_var_t **domains,
 			struct sched_domain_attr **attributes)
 {
-	struct cpuset *cp;	/* scans q */
+	struct cpuset *cp;	/* top-down scan of cpusets */
 	struct cpuset **csa;	/* array of all cpuset ptrs */
 	int csn;		/* how many cpuset ptrs in csa so far */
 	int i, j, k;		/* indices for partition finding loops */
-- 
GitLab


From cb0c04143b6196f4a479ba113706329fc667ee15 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Wed, 19 Dec 2018 14:34:45 +0100
Subject: [PATCH 1392/1861] sched/topology: Update init_sched_domains() comment

Holding hotplug lock is not a requirement anymore for callers of sched_
init_domains after commit:

  6acce3ef8452 ("sched: Remove get_online_cpus() usage")

Update the relative comment preceding init_sched_domains().

Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: cgroups@vger.kernel.org
Cc: lizefan@huawei.com
Link: http://lkml.kernel.org/r/20181219133445.31982-2-juri.lelli@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/topology.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index c65b31e9458be..f53f89df837d8 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2081,9 +2081,8 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
 }
 
 /*
- * Set up scheduler domains and groups. Callers must hold the hotplug lock.
- * For now this just excludes isolated CPUs, but could be used to
- * exclude other special cases in the future.
+ * Set up scheduler domains and groups.  For now this just excludes isolated
+ * CPUs, but could be used to exclude other special cases in the future.
  */
 int sched_init_domains(const struct cpumask *cpu_map)
 {
-- 
GitLab


From f36e7495dd3990d6848e6d6703c78f1f17a97538 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Thu, 29 Nov 2018 16:56:15 +0100
Subject: [PATCH 1393/1861] x86/tools/relocs: Fix big section header tables

In case when the number of entries in the section header table is larger
then or equal to SHN_LORESERVE the size of the table is held in the sh_size
member of the initial entry in section header table instead of e_shnum.
Same with the string table index which is located in sh_link instead of
e_shstrndx.

This case is easily reproducible with KCFLAGS="-ffunction-sections",
bzImage build fails with "String table index out of bounds" error.

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Eric W . Biederman <ebiederm@xmission.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181129155615.2594-1-asavkov@redhat.com
[ Simplify the die() lines. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/tools/relocs.c | 74 +++++++++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 29 deletions(-)

diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index b629f6992d9f6..f345586f5e50e 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -11,7 +11,9 @@
 #define Elf_Shdr		ElfW(Shdr)
 #define Elf_Sym			ElfW(Sym)
 
-static Elf_Ehdr ehdr;
+static Elf_Ehdr		ehdr;
+static unsigned long	shnum;
+static unsigned int	shstrndx;
 
 struct relocs {
 	uint32_t	*offset;
@@ -241,9 +243,9 @@ static const char *sec_name(unsigned shndx)
 {
 	const char *sec_strtab;
 	const char *name;
-	sec_strtab = secs[ehdr.e_shstrndx].strtab;
+	sec_strtab = secs[shstrndx].strtab;
 	name = "<noname>";
-	if (shndx < ehdr.e_shnum) {
+	if (shndx < shnum) {
 		name = sec_strtab + secs[shndx].shdr.sh_name;
 	}
 	else if (shndx == SHN_ABS) {
@@ -271,7 +273,7 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
 static Elf_Sym *sym_lookup(const char *symname)
 {
 	int i;
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		struct section *sec = &secs[i];
 		long nsyms;
 		char *strtab;
@@ -366,27 +368,41 @@ static void read_ehdr(FILE *fp)
 	ehdr.e_shnum     = elf_half_to_cpu(ehdr.e_shnum);
 	ehdr.e_shstrndx  = elf_half_to_cpu(ehdr.e_shstrndx);
 
-	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+	shnum = ehdr.e_shnum;
+	shstrndx = ehdr.e_shstrndx;
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN))
 		die("Unsupported ELF header type\n");
-	}
-	if (ehdr.e_machine != ELF_MACHINE) {
+	if (ehdr.e_machine != ELF_MACHINE)
 		die("Not for %s\n", ELF_MACHINE_NAME);
-	}
-	if (ehdr.e_version != EV_CURRENT) {
+	if (ehdr.e_version != EV_CURRENT)
 		die("Unknown ELF version\n");
-	}
-	if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) {
+	if (ehdr.e_ehsize != sizeof(Elf_Ehdr))
 		die("Bad Elf header size\n");
-	}
-	if (ehdr.e_phentsize != sizeof(Elf_Phdr)) {
+	if (ehdr.e_phentsize != sizeof(Elf_Phdr))
 		die("Bad program header entry\n");
-	}
-	if (ehdr.e_shentsize != sizeof(Elf_Shdr)) {
+	if (ehdr.e_shentsize != sizeof(Elf_Shdr))
 		die("Bad section header entry\n");
+
+
+	if (shnum == SHN_UNDEF || shstrndx == SHN_XINDEX) {
+		Elf_Shdr shdr;
+
+		if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+			die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot read initial ELF section header: %s\n", strerror(errno));
+
+		if (shnum == SHN_UNDEF)
+			shnum = elf_xword_to_cpu(shdr.sh_size);
+
+		if (shstrndx == SHN_XINDEX)
+			shstrndx = elf_word_to_cpu(shdr.sh_link);
 	}
-	if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+
+	if (shstrndx >= shnum)
 		die("String table index out of bounds\n");
-	}
 }
 
 static void read_shdrs(FILE *fp)
@@ -394,20 +410,20 @@ static void read_shdrs(FILE *fp)
 	int i;
 	Elf_Shdr shdr;
 
-	secs = calloc(ehdr.e_shnum, sizeof(struct section));
+	secs = calloc(shnum, sizeof(struct section));
 	if (!secs) {
 		die("Unable to allocate %d section headers\n",
-		    ehdr.e_shnum);
+		    shnum);
 	}
 	if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
 		die("Seek to %d failed: %s\n",
 			ehdr.e_shoff, strerror(errno));
 	}
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		struct section *sec = &secs[i];
 		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
 			die("Cannot read ELF section headers %d/%d: %s\n",
-			    i, ehdr.e_shnum, strerror(errno));
+			    i, shnum, strerror(errno));
 		sec->shdr.sh_name      = elf_word_to_cpu(shdr.sh_name);
 		sec->shdr.sh_type      = elf_word_to_cpu(shdr.sh_type);
 		sec->shdr.sh_flags     = elf_xword_to_cpu(shdr.sh_flags);
@@ -418,7 +434,7 @@ static void read_shdrs(FILE *fp)
 		sec->shdr.sh_info      = elf_word_to_cpu(shdr.sh_info);
 		sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign);
 		sec->shdr.sh_entsize   = elf_xword_to_cpu(shdr.sh_entsize);
-		if (sec->shdr.sh_link < ehdr.e_shnum)
+		if (sec->shdr.sh_link < shnum)
 			sec->link = &secs[sec->shdr.sh_link];
 	}
 
@@ -427,7 +443,7 @@ static void read_shdrs(FILE *fp)
 static void read_strtabs(FILE *fp)
 {
 	int i;
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		struct section *sec = &secs[i];
 		if (sec->shdr.sh_type != SHT_STRTAB) {
 			continue;
@@ -452,7 +468,7 @@ static void read_strtabs(FILE *fp)
 static void read_symtabs(FILE *fp)
 {
 	int i,j;
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		struct section *sec = &secs[i];
 		if (sec->shdr.sh_type != SHT_SYMTAB) {
 			continue;
@@ -485,7 +501,7 @@ static void read_symtabs(FILE *fp)
 static void read_relocs(FILE *fp)
 {
 	int i,j;
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		struct section *sec = &secs[i];
 		if (sec->shdr.sh_type != SHT_REL_TYPE) {
 			continue;
@@ -528,7 +544,7 @@ static void print_absolute_symbols(void)
 
 	printf("Absolute symbols\n");
 	printf(" Num:    Value Size  Type       Bind        Visibility  Name\n");
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		struct section *sec = &secs[i];
 		char *sym_strtab;
 		int j;
@@ -566,7 +582,7 @@ static void print_absolute_relocs(void)
 	else
 		format = "%08"PRIx32" %08"PRIx32" %10s %08"PRIx32"  %s\n";
 
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		struct section *sec = &secs[i];
 		struct section *sec_applies, *sec_symtab;
 		char *sym_strtab;
@@ -650,7 +666,7 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
 {
 	int i;
 	/* Walk through the relocations */
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		char *sym_strtab;
 		Elf_Sym *sh_symtab;
 		struct section *sec_applies, *sec_symtab;
@@ -706,7 +722,7 @@ static Elf_Addr per_cpu_load_addr;
 static void percpu_init(void)
 {
 	int i;
-	for (i = 0; i < ehdr.e_shnum; i++) {
+	for (i = 0; i < shnum; i++) {
 		ElfW(Sym) *sym;
 		if (strcmp(sec_name(i), ".data..percpu"))
 			continue;
-- 
GitLab


From 36ad7022536e0c65f8baeeaa5efde11dec44808a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20=C5=A0tetiar?= <ynezz@true.cz>
Date: Wed, 17 Apr 2019 22:09:12 +0200
Subject: [PATCH 1394/1861] of_net: Fix residues after of_get_nvmem_mac_address
 removal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've discovered following discrepancy in the bindings/net/ethernet.txt
documentation, where it states following:

 - nvmem-cells: phandle, reference to an nvmem node for the MAC address;
 - nvmem-cell-names: string, should be "mac-address" if nvmem is to be..

which is actually misleading and confusing. There are only two ethernet
drivers in the tree, cadence/macb and davinci which supports this
properties.

This nvmem-cell* properties were introduced in commit 9217e566bdee
("of_net: Implement of_get_nvmem_mac_address helper"), but
commit afa64a72b862 ("of: net: kill of_get_nvmem_mac_address()")
forget to properly clean up this parts.

So this patch fixes the documentation by moving the nvmem-cell*
properties at the appropriate places.  While at it, I've removed unused
include as well.

Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Fixes: afa64a72b862 ("of: net: kill of_get_nvmem_mac_address()")
Signed-off-by: Petr Štetiar <ynezz@true.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/davinci_emac.txt | 2 ++
 Documentation/devicetree/bindings/net/ethernet.txt     | 2 --
 Documentation/devicetree/bindings/net/macb.txt         | 4 ++++
 drivers/of/of_net.c                                    | 1 -
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt
index 24c5cdaba8d27..ca83dcc84fb8e 100644
--- a/Documentation/devicetree/bindings/net/davinci_emac.txt
+++ b/Documentation/devicetree/bindings/net/davinci_emac.txt
@@ -20,6 +20,8 @@ Required properties:
 Optional properties:
 - phy-handle: See ethernet.txt file in the same directory.
               If absent, davinci_emac driver defaults to 100/FULL.
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
 - ti,davinci-rmii-en: 1 byte, 1 means use RMII
 - ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM?
 
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index cfc376bc977aa..2974e63ba311a 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -10,8 +10,6 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
   the boot program; should be used in cases where the MAC address assigned to
   the device by the boot program is different from the "local-mac-address"
   property;
-- nvmem-cells: phandle, reference to an nvmem node for the MAC address;
-- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used;
 - max-speed: number, specifies maximum speed in Mbit/s supported by the device;
 - max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
   the maximum frame size (there's contradiction in the Devicetree
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 174f292d8a3e8..8b80515729d71 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -26,6 +26,10 @@ Required properties:
 	Optional elements: 'tsu_clk'
 - clocks: Phandles to input clocks.
 
+Optional properties:
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
+
 Optional properties for PHY child node:
 - reset-gpios : Should specify the gpio for phy reset
 - magic-packet : If present, indicates that the hardware supports waking
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
index 810ab0fbcccbf..d820f3edd4311 100644
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -7,7 +7,6 @@
  */
 #include <linux/etherdevice.h>
 #include <linux/kernel.h>
-#include <linux/nvmem-consumer.h>
 #include <linux/of_net.h>
 #include <linux/phy.h>
 #include <linux/export.h>
-- 
GitLab


From ad2e379def135ebc079f89a0e0b1d987d243f949 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 28 Nov 2018 15:23:50 +0000
Subject: [PATCH 1395/1861] sched/debug: Fix spelling mistake "logaritmic" ->
 "logarithmic"

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-janitors@vger.kernel.org
Link: http://lkml.kernel.org/r/20181128152350.13622-1-colin.king@canonical.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8039d62ae36e6..678bfb9bd87f7 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -702,7 +702,7 @@ do {									\
 
 static const char *sched_tunable_scaling_names[] = {
 	"none",
-	"logaritmic",
+	"logarithmic",
 	"linear"
 };
 
-- 
GitLab


From c2b71462d294cf517a0bc6e4fd6424d7cee5596f Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 19 Apr 2019 13:52:38 -0400
Subject: [PATCH 1396/1861] USB: core: Fix bug caused by duplicate interface PM
 usage counter

The syzkaller fuzzer reported a bug in the USB hub driver which turned
out to be caused by a negative runtime-PM usage counter.  This allowed
a hub to be runtime suspended at a time when the driver did not expect
it.  The symptom is a WARNING issued because the hub's status URB is
submitted while it is already active:

	URB 0000000031fb463e submitted while active
	WARNING: CPU: 0 PID: 2917 at drivers/usb/core/urb.c:363

The negative runtime-PM usage count was caused by an unfortunate
design decision made when runtime PM was first implemented for USB.
At that time, USB class drivers were allowed to unbind from their
interfaces without balancing the usage counter (i.e., leaving it with
a positive count).  The core code would take care of setting the
counter back to 0 before allowing another driver to bind to the
interface.

Later on when runtime PM was implemented for the entire kernel, the
opposite decision was made: Drivers were required to balance their
runtime-PM get and put calls.  In order to maintain backward
compatibility, however, the USB subsystem adapted to the new
implementation by keeping an independent usage counter for each
interface and using it to automatically adjust the normal usage
counter back to 0 whenever a driver was unbound.

This approach involves duplicating information, but what is worse, it
doesn't work properly in cases where a USB class driver delays
decrementing the usage counter until after the driver's disconnect()
routine has returned and the counter has been adjusted back to 0.
Doing so would cause the usage counter to become negative.  There's
even a warning about this in the USB power management documentation!

As it happens, this is exactly what the hub driver does.  The
kick_hub_wq() routine increments the runtime-PM usage counter, and the
corresponding decrement is carried out by hub_event() in the context
of the hub_wq work-queue thread.  This work routine may sometimes run
after the driver has been unbound from its interface, and when it does
it causes the usage counter to go negative.

It is not possible for hub_disconnect() to wait for a pending
hub_event() call to finish, because hub_disconnect() is called with
the device lock held and hub_event() acquires that lock.  The only
feasible fix is to reverse the original design decision: remove the
duplicate interface-specific usage counter and require USB drivers to
balance their runtime PM gets and puts.  As far as I know, all
existing drivers currently do this.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: syzbot+7634edaea4d0b341c625@syzkaller.appspotmail.com
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/usb/power-management.rst | 14 +++++++++-----
 drivers/usb/core/driver.c                         | 13 -------------
 drivers/usb/storage/realtek_cr.c                  | 13 +++++--------
 include/linux/usb.h                               |  2 --
 4 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst
index 79beb807996b7..4a74cf6f27972 100644
--- a/Documentation/driver-api/usb/power-management.rst
+++ b/Documentation/driver-api/usb/power-management.rst
@@ -370,11 +370,15 @@ autosuspend the interface's device.  When the usage counter is = 0
 then the interface is considered to be idle, and the kernel may
 autosuspend the device.
 
-Drivers need not be concerned about balancing changes to the usage
-counter; the USB core will undo any remaining "get"s when a driver
-is unbound from its interface.  As a corollary, drivers must not call
-any of the ``usb_autopm_*`` functions after their ``disconnect``
-routine has returned.
+Drivers must be careful to balance their overall changes to the usage
+counter.  Unbalanced "get"s will remain in effect when a driver is
+unbound from its interface, preventing the device from going into
+runtime suspend should the interface be bound to a driver again.  On
+the other hand, drivers are allowed to achieve this balance by calling
+the ``usb_autopm_*`` functions even after their ``disconnect`` routine
+has returned -- say from within a work-queue routine -- provided they
+retain an active reference to the interface (via ``usb_get_intf`` and
+``usb_put_intf``).
 
 Drivers using the async routines are responsible for their own
 synchronization and mutual exclusion.
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 8987cec9549dd..ebcadaad89d1d 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -473,11 +473,6 @@ static int usb_unbind_interface(struct device *dev)
 		pm_runtime_disable(dev);
 	pm_runtime_set_suspended(dev);
 
-	/* Undo any residual pm_autopm_get_interface_* calls */
-	for (r = atomic_read(&intf->pm_usage_cnt); r > 0; --r)
-		usb_autopm_put_interface_no_suspend(intf);
-	atomic_set(&intf->pm_usage_cnt, 0);
-
 	if (!error)
 		usb_autosuspend_device(udev);
 
@@ -1633,7 +1628,6 @@ void usb_autopm_put_interface(struct usb_interface *intf)
 	int			status;
 
 	usb_mark_last_busy(udev);
-	atomic_dec(&intf->pm_usage_cnt);
 	status = pm_runtime_put_sync(&intf->dev);
 	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
 			__func__, atomic_read(&intf->dev.power.usage_count),
@@ -1662,7 +1656,6 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 	int			status;
 
 	usb_mark_last_busy(udev);
-	atomic_dec(&intf->pm_usage_cnt);
 	status = pm_runtime_put(&intf->dev);
 	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
 			__func__, atomic_read(&intf->dev.power.usage_count),
@@ -1684,7 +1677,6 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf)
 	struct usb_device	*udev = interface_to_usbdev(intf);
 
 	usb_mark_last_busy(udev);
-	atomic_dec(&intf->pm_usage_cnt);
 	pm_runtime_put_noidle(&intf->dev);
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend);
@@ -1715,8 +1707,6 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 	status = pm_runtime_get_sync(&intf->dev);
 	if (status < 0)
 		pm_runtime_put_sync(&intf->dev);
-	else
-		atomic_inc(&intf->pm_usage_cnt);
 	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
 			__func__, atomic_read(&intf->dev.power.usage_count),
 			status);
@@ -1750,8 +1740,6 @@ int usb_autopm_get_interface_async(struct usb_interface *intf)
 	status = pm_runtime_get(&intf->dev);
 	if (status < 0 && status != -EINPROGRESS)
 		pm_runtime_put_noidle(&intf->dev);
-	else
-		atomic_inc(&intf->pm_usage_cnt);
 	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
 			__func__, atomic_read(&intf->dev.power.usage_count),
 			status);
@@ -1775,7 +1763,6 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf)
 	struct usb_device	*udev = interface_to_usbdev(intf);
 
 	usb_mark_last_busy(udev);
-	atomic_inc(&intf->pm_usage_cnt);
 	pm_runtime_get_noresume(&intf->dev);
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume);
diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c
index 31b0244419387..cc794e25a0b6e 100644
--- a/drivers/usb/storage/realtek_cr.c
+++ b/drivers/usb/storage/realtek_cr.c
@@ -763,18 +763,16 @@ static void rts51x_suspend_timer_fn(struct timer_list *t)
 		break;
 	case RTS51X_STAT_IDLE:
 	case RTS51X_STAT_SS:
-		usb_stor_dbg(us, "RTS51X_STAT_SS, intf->pm_usage_cnt:%d, power.usage:%d\n",
-			     atomic_read(&us->pusb_intf->pm_usage_cnt),
+		usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n",
 			     atomic_read(&us->pusb_intf->dev.power.usage_count));
 
-		if (atomic_read(&us->pusb_intf->pm_usage_cnt) > 0) {
+		if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) {
 			usb_stor_dbg(us, "Ready to enter SS state\n");
 			rts51x_set_stat(chip, RTS51X_STAT_SS);
 			/* ignore mass storage interface's children */
 			pm_suspend_ignore_children(&us->pusb_intf->dev, true);
 			usb_autopm_put_interface_async(us->pusb_intf);
-			usb_stor_dbg(us, "RTS51X_STAT_SS 01, intf->pm_usage_cnt:%d, power.usage:%d\n",
-				     atomic_read(&us->pusb_intf->pm_usage_cnt),
+			usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n",
 				     atomic_read(&us->pusb_intf->dev.power.usage_count));
 		}
 		break;
@@ -807,11 +805,10 @@ static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 	int ret;
 
 	if (working_scsi(srb)) {
-		usb_stor_dbg(us, "working scsi, intf->pm_usage_cnt:%d, power.usage:%d\n",
-			     atomic_read(&us->pusb_intf->pm_usage_cnt),
+		usb_stor_dbg(us, "working scsi, power.usage:%d\n",
 			     atomic_read(&us->pusb_intf->dev.power.usage_count));
 
-		if (atomic_read(&us->pusb_intf->pm_usage_cnt) <= 0) {
+		if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) {
 			ret = usb_autopm_get_interface(us->pusb_intf);
 			usb_stor_dbg(us, "working scsi, ret=%d\n", ret);
 		}
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 5e49e82c43684..ff010d1fd1c78 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -200,7 +200,6 @@ usb_find_last_int_out_endpoint(struct usb_host_interface *alt,
  * @dev: driver model's view of this device
  * @usb_dev: if an interface is bound to the USB major, this will point
  *	to the sysfs representation for that device.
- * @pm_usage_cnt: PM usage counter for this interface
  * @reset_ws: Used for scheduling resets from atomic context.
  * @resetting_device: USB core reset the device, so use alt setting 0 as
  *	current; needs bandwidth alloc after reset.
@@ -257,7 +256,6 @@ struct usb_interface {
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-	atomic_t pm_usage_cnt;		/* usage counter for autosuspend */
 	struct work_struct reset_ws;	/* for resets in atomic context */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
-- 
GitLab


From b97369f07e3b826e8da0436e26b01c1f8e36042d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Leonardo=20Br=C3=A1s?= <leobras.c@gmail.com>
Date: Mon, 22 Oct 2018 22:10:22 -0300
Subject: [PATCH 1397/1861] x86/vdso: Rename variable to fix -Wshadow warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The go32() and go64() functions has an argument and a local variable called ‘name’.
Rename both to clarify the code and to fix a warning with -Wshadow.

Signed-off-by: Leonardo Brás <leobras.c@gmail.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David.Laight@aculab.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: helen@koikeco.de
Cc: linux-kbuild@vger.kernel.org
Cc: lkcamp@lists.libreplanetbr.org
Link: http://lkml.kernel.org/r/20181023011022.GA6574@WindFlash
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vdso/vdso2c.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index fa847a620f40f..a20b134de2a89 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -7,7 +7,7 @@
 
 static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 			 void *stripped_addr, size_t stripped_len,
-			 FILE *outfile, const char *name)
+			 FILE *outfile, const char *image_name)
 {
 	int found_load = 0;
 	unsigned long load_size = -1;  /* Work around bogus warning */
@@ -93,11 +93,12 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 		int k;
 		ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
 			GET_LE(&symtab_hdr->sh_entsize) * i;
-		const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
-			GET_LE(&sym->st_name);
+		const char *sym_name = raw_addr +
+				       GET_LE(&strtab_hdr->sh_offset) +
+				       GET_LE(&sym->st_name);
 
 		for (k = 0; k < NSYMS; k++) {
-			if (!strcmp(name, required_syms[k].name)) {
+			if (!strcmp(sym_name, required_syms[k].name)) {
 				if (syms[k]) {
 					fail("duplicate symbol %s\n",
 					     required_syms[k].name);
@@ -134,7 +135,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	if (syms[sym_vvar_start] % 4096)
 		fail("vvar_begin must be a multiple of 4096\n");
 
-	if (!name) {
+	if (!image_name) {
 		fwrite(stripped_addr, stripped_len, 1, outfile);
 		return;
 	}
@@ -157,7 +158,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
 	}
 	fprintf(outfile, "\n};\n\n");
 
-	fprintf(outfile, "const struct vdso_image %s = {\n", name);
+	fprintf(outfile, "const struct vdso_image %s = {\n", image_name);
 	fprintf(outfile, "\t.data = raw_data,\n");
 	fprintf(outfile, "\t.size = %lu,\n", mapping_size);
 	if (alt_sec) {
-- 
GitLab


From 12fc512f5741443a03adde2ead20724da8ad550a Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Fri, 15 Mar 2019 16:41:43 +0200
Subject: [PATCH 1398/1861] net/mlx5e: Fix use-after-free after
 xdp_return_frame

xdp_return_frame releases the frame. It leads to releasing the page, so
it's not allowed to access xdpi.xdpf->len after that, because xdpi.xdpf
is at xdp->data_hard_start after convert_to_xdp_frame. This patch moves
the memory access to precede the return of the frame.

Fixes: 58b99ee3e3ebe ("net/mlx5e: Add support for XDP_REDIRECT in device-out side")
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 03b2a9f9c5895..10a99cd3e5987 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -304,9 +304,9 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
 					mlx5e_xdpi_fifo_pop(xdpi_fifo);
 
 				if (is_redirect) {
-					xdp_return_frame(xdpi.xdpf);
 					dma_unmap_single(sq->pdev, xdpi.dma_addr,
 							 xdpi.xdpf->len, DMA_TO_DEVICE);
+					xdp_return_frame(xdpi.xdpf);
 				} else {
 					/* Recycle RX page */
 					mlx5e_page_release(rq, &xdpi.di, true);
@@ -345,9 +345,9 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
 				mlx5e_xdpi_fifo_pop(xdpi_fifo);
 
 			if (is_redirect) {
-				xdp_return_frame(xdpi.xdpf);
 				dma_unmap_single(sq->pdev, xdpi.dma_addr,
 						 xdpi.xdpf->len, DMA_TO_DEVICE);
+				xdp_return_frame(xdpi.xdpf);
 			} else {
 				/* Recycle RX page */
 				mlx5e_page_release(rq, &xdpi.di, false);
-- 
GitLab


From d460c2718906252a2a69bc6f89b537071f792e6e Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Mon, 8 Apr 2019 15:12:45 +0300
Subject: [PATCH 1399/1861] net/mlx5e: Fix the max MTU check in case of XDP

MLX5E_XDP_MAX_MTU was calculated incorrectly. It didn't account for
NET_IP_ALIGN and MLX5E_HW2SW_MTU, and it also misused MLX5_SKB_FRAG_SZ.
This commit fixes the calculations and adds a brief explanation for the
formula used.

Fixes: a26a5bdf3ee2d ("net/mlx5e: Restrict the combination of large MTU and XDP")
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/xdp.c  | 20 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en/xdp.h  |  3 +--
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  5 +++--
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 10a99cd3e5987..cad34d6f5f451 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -33,6 +33,26 @@
 #include <linux/bpf_trace.h>
 #include "en/xdp.h"
 
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+	int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+	/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+	 * The condition checked in mlx5e_rx_is_linear_skb is:
+	 *   SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE         (1)
+	 *   (Note that hw_mtu == sw_mtu + hard_mtu.)
+	 * What is returned from this function is:
+	 *   max_mtu = PAGE_SIZE - S - hr - hard_mtu                         (2)
+	 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+	 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+	 * because both PAGE_SIZE and S are already aligned. Any number greater
+	 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+	 * so max_mtu is the maximum MTU allowed.
+	 */
+
+	return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
 static inline bool
 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
 		    struct xdp_buff *xdp)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index ee27a7c8cd87d..553956cadc8a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -34,13 +34,12 @@
 
 #include "en.h"
 
-#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
-				 MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 #define MLX5E_XDP_TX_EMPTY_DS_COUNT \
 	(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
 #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
 
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
 		      void *va, u16 *rx_headroom, u32 *len);
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f7eb521db5800..46157e2a1e5ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3777,7 +3777,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 	if (params->xdp_prog &&
 	    !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
 		netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
-			   new_mtu, MLX5E_XDP_MAX_MTU);
+			   new_mtu, mlx5e_xdp_max_mtu(params));
 		err = -EINVAL;
 		goto out;
 	}
@@ -4212,7 +4212,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
 
 	if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
 		netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
-			    new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+			    new_channels.params.sw_mtu,
+			    mlx5e_xdp_max_mtu(&new_channels.params));
 		return -EINVAL;
 	}
 
-- 
GitLab


From ace329f4ab3ba434be2adf618073c752d083b524 Mon Sep 17 00:00:00 2001
From: Erez Alfasi <ereza@mellanox.com>
Date: Thu, 11 Apr 2019 10:41:03 +0300
Subject: [PATCH 1400/1861] net/mlx5e: ethtool, Remove unsupported SFP EEPROM
 high pages query

Querying EEPROM high pages data for SFP module is currently
not supported by our driver and yet queried, resulting in
invalid FW queries.

Set the EEPROM ethtool data length to 256 for SFP module will
limit the reading for page 0 only and prevent invalid FW queries.

Fixes: bb64143eee8c ("net/mlx5e: Add ethtool support for dump module EEPROM")
Signed-off-by: Erez Alfasi <ereza@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/port.c       | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 76a3d01a489e0..78dc8fe2a83c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1586,7 +1586,7 @@ static int mlx5e_get_module_info(struct net_device *netdev,
 		break;
 	case MLX5_MODULE_ID_SFP:
 		modinfo->type       = ETH_MODULE_SFF_8472;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
 		break;
 	default:
 		netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 21b7f05b16a5f..361468e0435dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -317,10 +317,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
 		size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
 
 	i2c_addr = MLX5_I2C_ADDR_LOW;
-	if (offset >= MLX5_EEPROM_PAGE_LENGTH) {
-		i2c_addr = MLX5_I2C_ADDR_HIGH;
-		offset -= MLX5_EEPROM_PAGE_LENGTH;
-	}
 
 	MLX5_SET(mcia_reg, in, l, 0);
 	MLX5_SET(mcia_reg, in, module, module_num);
-- 
GitLab


From 30c04d796b693e22405c38e9b78e9a364e4c77e6 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Thu, 18 Apr 2019 19:57:25 +0800
Subject: [PATCH 1401/1861] selftests/net: correct the return value for
 run_netsocktests

The run_netsocktests will be marked as passed regardless the actual test
result from the ./socket:

    selftests: net: run_netsocktests
    ========================================
    --------------------
    running socket test
    --------------------
    [FAIL]
    ok 1..6 selftests: net: run_netsocktests [PASS]

This is because the test script itself has been successfully executed.
Fix this by exit 1 when the test failed.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/run_netsocktests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
index b093f39c298c3..14e41faf2c574 100755
--- a/tools/testing/selftests/net/run_netsocktests
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -7,7 +7,7 @@ echo "--------------------"
 ./socket
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
+	exit 1
 else
 	echo "[PASS]"
 fi
-
-- 
GitLab


From 925b0c841e066b488cc3a60272472b2c56300704 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 19 Apr 2019 14:31:00 +0800
Subject: [PATCH 1402/1861] team: fix possible recursive locking when add
 slaves

If we add a bond device which is already the master of the team interface,
we will hold the team->lock in team_add_slave() first and then request the
lock in team_set_mac_address() again. The functions are called like:

- team_add_slave()
 - team_port_add()
   - team_port_enter()
     - team_modeop_port_enter()
       - __set_port_dev_addr()
         - dev_set_mac_address()
           - bond_set_mac_address()
             - dev_set_mac_address()
  	       - team_set_mac_address

Although team_upper_dev_link() would check the upper devices but it is
called too late. Fix it by adding a checking before processing the slave.

v2: Do not split the string in netdev_err()

Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device")
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 9ce61b019aadb..16963f7a88f74 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1156,6 +1156,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
 		return -EINVAL;
 	}
 
+	if (netdev_has_upper_dev(dev, port_dev)) {
+		NL_SET_ERR_MSG(extack, "Device is already an upper device of the team interface");
+		netdev_err(dev, "Device %s is already an upper device of the team interface\n",
+			   portname);
+		return -EBUSY;
+	}
+
 	if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
 	    vlan_uses_dev(dev)) {
 		NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
-- 
GitLab


From 09cbd2197e9291d6a3d3f42873f06ca1f388c1a4 Mon Sep 17 00:00:00 2001
From: WANG Chao <chao.wang@ucloud.cn>
Date: Thu, 18 Apr 2019 03:41:14 +0000
Subject: [PATCH 1403/1861] RAS/CEC: Increment cec_entered under the mutex lock

Modify ->cec_entered in the critical section of the mutex.

Signed-off-by: WANG Chao <chao.wang@ucloud.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/20190418034115.75954-2-chao.wang@ucloud.cn
---
 drivers/ras/cec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 2d9ec378a8bc3..88e4f3ff0cb84 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -286,10 +286,10 @@ int cec_add_elem(u64 pfn)
 	if (!ce_arr.array || ce_arr.disabled)
 		return -ENODEV;
 
-	ca->ces_entered++;
-
 	mutex_lock(&ce_mutex);
 
+	ca->ces_entered++;
+
 	if (ca->n == MAX_ELEMS)
 		WARN_ON(!del_lru_elem_unlocked(ca));
 
-- 
GitLab


From 8c03557c3f25271e62e39154af66ebdd1b59c9ca Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Fri, 19 Apr 2019 19:01:13 +0800
Subject: [PATCH 1404/1861] selftests/net: correct the return value for
 run_afpackettests

The run_afpackettests will be marked as passed regardless the return
value of those sub-tests in the script:
    --------------------
    running psock_tpacket test
    --------------------
    [FAIL]
    selftests: run_afpackettests [PASS]

Fix this by changing the return value for each tests.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/run_afpackettests | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 2dc95fda7ef76..ea5938ec009a5 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -6,12 +6,14 @@ if [ $(id -u) != 0 ]; then
 	exit 0
 fi
 
+ret=0
 echo "--------------------"
 echo "running psock_fanout test"
 echo "--------------------"
 ./in_netns.sh ./psock_fanout
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
+	ret=1
 else
 	echo "[PASS]"
 fi
@@ -22,6 +24,7 @@ echo "--------------------"
 ./in_netns.sh ./psock_tpacket
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
+	ret=1
 else
 	echo "[PASS]"
 fi
@@ -32,6 +35,8 @@ echo "--------------------"
 ./in_netns.sh ./txring_overwrite
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
+	ret=1
 else
 	echo "[PASS]"
 fi
+exit $ret
-- 
GitLab


From 62ef81d5632634d5e310ed25b9b940b2b6612b46 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 19 Apr 2019 16:51:38 -0700
Subject: [PATCH 1405/1861] net/tls: avoid potential deadlock in
 tls_set_device_offload_rx()

If device supports offload, but offload fails tls_set_device_offload_rx()
will call tls_sw_free_resources_rx() which (unhelpfully) releases
and reacquires the socket lock.

For a small fix release and reacquire the device_offload_lock.

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 9f3bdbc1e5934..1b5f55cac613d 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -904,7 +904,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
 	goto release_netdev;
 
 free_sw_resources:
+	up_read(&device_offload_lock);
 	tls_sw_free_resources_rx(sk);
+	down_read(&device_offload_lock);
 release_ctx:
 	ctx->priv_ctx_rx = NULL;
 release_netdev:
-- 
GitLab


From 12c7686111326148b4b5db189130522a4ad1be4a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 19 Apr 2019 16:52:19 -0700
Subject: [PATCH 1406/1861] net/tls: don't leak IV and record seq when offload
 fails

When device refuses the offload in tls_set_device_offload_rx()
it calls tls_sw_free_resources_rx() to clean up software context
state.

Unfortunately, tls_sw_free_resources_rx() does not free all
the state tls_set_sw_offload() allocated - it leaks IV and
sequence number buffers.  All other code paths which lead to
tls_sw_release_resources_rx() (which tls_sw_free_resources_rx()
calls) free those right before the call.

Avoid the leak by moving freeing of iv and rec_seq into
tls_sw_release_resources_rx().

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 2 --
 net/tls/tls_main.c   | 5 +----
 net/tls/tls_sw.c     | 3 +++
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 1b5f55cac613d..cc0256939eb63 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -941,8 +941,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
 	}
 out:
 	up_read(&device_offload_lock);
-	kfree(tls_ctx->rx.rec_seq);
-	kfree(tls_ctx->rx.iv);
 	tls_sw_release_resources_rx(sk);
 }
 
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 9547cea0ce3b0..478603f43964d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -293,11 +293,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 #endif
 	}
 
-	if (ctx->rx_conf == TLS_SW) {
-		kfree(ctx->rx.rec_seq);
-		kfree(ctx->rx.iv);
+	if (ctx->rx_conf == TLS_SW)
 		tls_sw_free_resources_rx(sk);
-	}
 
 #ifdef CONFIG_TLS_DEVICE
 	if (ctx->rx_conf == TLS_HW)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index b50ced862f6f9..29d6af43dd249 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2078,6 +2078,9 @@ void tls_sw_release_resources_rx(struct sock *sk)
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
+	kfree(tls_ctx->rx.rec_seq);
+	kfree(tls_ctx->rx.iv);
+
 	if (ctx->aead_recv) {
 		kfree_skb(ctx->recv_pkt);
 		ctx->recv_pkt = NULL;
-- 
GitLab


From 085b7755808aa11f78ab9377257e1dad2e6fa4bb Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 21 Apr 2019 10:45:57 -0700
Subject: [PATCH 1407/1861] Linux 5.1-rc6

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 71fd5c2ce0675..abe13538a8c04 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
-- 
GitLab


From 39420fe04f093c15e1674ef56dbae0df109738ec Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Sat, 20 Apr 2019 18:14:33 +0000
Subject: [PATCH 1408/1861] dt-bindings: add an explanation for internal
 phy-mode

When working on the Allwinner internal PHY, the first work was to use
the "internal" mode, but some answer was made my mail on what are really
internal mean for PHY.

This patch write that in the doc.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ethernet.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index 2974e63ba311a..a686215805846 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -16,7 +16,8 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
   Specification).
 - phy-mode: string, operation mode of the PHY interface. This is now a de-facto
   standard property; supported values are:
-  * "internal"
+  * "internal" (Internal means there is not a standard bus between the MAC and
+     the PHY, something proprietary is being used to embed the PHY in the MAC.)
   * "mii"
   * "gmii"
   * "sgmii"
-- 
GitLab


From 26d1b8586b4fe14814ff4fd471cfc56014359e59 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Sat, 20 Apr 2019 16:43:01 +0000
Subject: [PATCH 1409/1861] Documentation: decnet: remove reference to
 CONFIG_DECNET_ROUTE_FWMARK

CONFIG_DECNET_ROUTE_FWMARK was removed in commit 47dcf0cb1005 ("[NET]: Rethink mark field in struct flowi")
Since nothing replace it (and nothindg need to replace it, simply remove
it from documentation.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/decnet.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt
index e12a4900cf72c..d192f8b9948b5 100644
--- a/Documentation/networking/decnet.txt
+++ b/Documentation/networking/decnet.txt
@@ -22,8 +22,6 @@ you'll need the following options as well...
     CONFIG_DECNET_ROUTER (to be able to add/delete routes)
     CONFIG_NETFILTER (will be required for the DECnet routing daemon)
 
-    CONFIG_DECNET_ROUTE_FWMARK is optional
-
 Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
 that you need it, in general you won't and it can cause ifconfig to
 malfunction.
-- 
GitLab


From ea2f8d60603efbd1cb4e193a593945a2fe24d264 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 21 Apr 2019 20:35:24 +0200
Subject: [PATCH 1410/1861] x86/fault: Make fault messages more succinct

So we are going to be staring at those in the next years, let's make
them more succinct. In particular:

 - change "address = " to "address: "

 - "-privileged" reads funny. It should be simply "kernel" or "user"

 - "from kernel code" reads funny too. "kernel mode" or "user mode" is
   more natural.

An actual example says more than 1000 words, of course:

  [    0.248370] BUG: kernel NULL pointer dereference, address: 00000000000005b8
  [    0.249120] #PF: supervisor write access in kernel mode
  [    0.249717] #PF: error_code(0x0002) - not-present page

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave.hansen@linux.intel.com
Cc: luto@kernel.org
Cc: riel@surriel.com
Cc: sean.j.christopherson@intel.com
Cc: yu-cheng.yu@intel.com
Link: http://lkml.kernel.org/r/20190421183524.GC6048@zn.tnic
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/fault.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 74c9204c5751c..a0df19b0897df 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -630,13 +630,13 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
 	}
 
 	if (address < PAGE_SIZE && !user_mode(regs))
-		pr_alert("BUG: kernel NULL pointer dereference, address = %px\n",
+		pr_alert("BUG: kernel NULL pointer dereference, address: %px\n",
 			(void *)address);
 	else
-		pr_alert("BUG: unable to handle page fault for address = %px\n",
+		pr_alert("BUG: unable to handle page fault for address: %px\n",
 			(void *)address);
 
-	pr_alert("#PF: %s-privileged %s from %s code\n",
+	pr_alert("#PF: %s %s in %s mode\n",
 		 (error_code & X86_PF_USER)  ? "user" : "supervisor",
 		 (error_code & X86_PF_INSTR) ? "instruction fetch" :
 		 (error_code & X86_PF_WRITE) ? "write access" :
-- 
GitLab


From 9ca5c8e632ce8f144ec6d00da2dc5e16b41d593c Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Sun, 21 Apr 2019 11:50:59 +0800
Subject: [PATCH 1411/1861] x86/kdump: Have crashkernel=X reserve under 4G by
 default

The kdump crashkernel low reservation is limited to under 896M even for
X86_64. This obscure and miserable limitation exists for compatibility
with old kexec-tools but the reason is not documented anywhere.

Some more tests/investigations about the background:

a) Previously, old kexec-tools could only load purgatory to memory under
   2G. Eric removed that limitation in 2012 in kexec-tools:

     b4f9f8599679 ("kexec x86_64: Make purgatory relocatable anywhere
		   in the 64bit address space.")

b) Back in 2013 Yinghai removed all the limitations in new kexec-tools,
   bzImage64 can be loaded anywhere:

     82c3dd2280d2 ("kexec, x86_64: Load bzImage64 above 4G")

c) Test results with old kexec-tools with old and latest kernels:

  1. Old kexec-tools can not build with modern toolchain anymore,
     I built it in a RHEL6 vm.

  2. 2.0.0 kexec-tools does not work with the latest kernel even with
     memory under 896M and gives an error:

     "ELF core (kcore) parse failed"

     For that it needs below kexec-tools fix:

       ed15ba1b9977 ("build_mem_phdrs(): check if p_paddr is invalid")

  3. Even with patched kexec-tools which fixes 2),  it still needs some
     other fixes to work correctly for KASLR-enabled kernels.

So the situation is:

* Old kexec-tools is already broken with latest kernels.

* We can not keep these limitations forever just for compatibility with very
  old kexec-tools.

* If one must use old tools then he/she can choose crashkernel=X@Y.

* People have reported bugs where crashkernel=384M failed because KASLR
  makes the 0-896M space sparse.

* Crashkernel can reserve in low or high area, it is natural to understand
  low as memory under 4G.

Hence drop the 896M limitation and change crashkernel low reservation to
reserve under 4G by default.

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: piliu@redhat.com
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Sinan Kaya <okaya@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: vgoyal@redhat.com
Cc: x86-ml <x86@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Zhimin Gu <kookoo.gu@intel.com>
Link: https://lkml.kernel.org/r/20190421035058.943630505@redhat.com
---
 arch/x86/kernel/setup.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3d872a527cd96..daf7c5650c18c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -71,6 +71,7 @@
 #include <linux/tboot.h>
 #include <linux/jiffies.h>
 #include <linux/mem_encrypt.h>
+#include <linux/sizes.h>
 
 #include <linux/usb/xhci-dbgp.h>
 #include <video/edid.h>
@@ -448,18 +449,17 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 #ifdef CONFIG_KEXEC_CORE
 
 /* 16M alignment for crash kernel regions */
-#define CRASH_ALIGN		(16 << 20)
+#define CRASH_ALIGN		SZ_16M
 
 /*
  * Keep the crash kernel below this limit.  On 32 bits earlier kernels
  * would limit the kernel to the low 512 MiB due to mapping restrictions.
- * On 64bit, old kexec-tools need to under 896MiB.
  */
 #ifdef CONFIG_X86_32
-# define CRASH_ADDR_LOW_MAX	(512 << 20)
-# define CRASH_ADDR_HIGH_MAX	(512 << 20)
+# define CRASH_ADDR_LOW_MAX	SZ_512M
+# define CRASH_ADDR_HIGH_MAX	SZ_512M
 #else
-# define CRASH_ADDR_LOW_MAX	(896UL << 20)
+# define CRASH_ADDR_LOW_MAX	SZ_4G
 # define CRASH_ADDR_HIGH_MAX	MAXMEM
 #endif
 
-- 
GitLab


From b9ac3849af412fd3887d7652bdbabf29d2aecc16 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Mon, 22 Apr 2019 11:19:05 +0800
Subject: [PATCH 1412/1861] x86/kdump: Fall back to reserve high crashkernel
 memory

crashkernel=xM tries to reserve memory for the crash kernel under 4G,
which is enough, usually. But this could fail sometimes, for example
when one tries to reserve a big chunk like 2G, for example.

So let the crashkernel=xM just fall back to use high memory in case it
fails to find a suitable low range. Do not set the ,high as default
because it allocates extra low memory for DMA buffers and swiotlb, and
this is not always necessary for all machines.

Typically, crashkernel=128M usually works with low reservation under 4G,
so keep <4G as default.

 [ bp: Massage. ]

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: linux-doc@vger.kernel.org
Cc: "Paul E. McKenney" <paulmck@linux.ibm.com>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: piliu@redhat.com
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Sinan Kaya <okaya@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thymo van Beers <thymovanbeers@gmail.com>
Cc: vgoyal@redhat.com
Cc: x86-ml <x86@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Zhimin Gu <kookoo.gu@intel.com>
Link: https://lkml.kernel.org/r/20190422031905.GA8387@dhcp-128-65.nay.redhat.com
---
 .../admin-guide/kernel-parameters.txt         |  7 ++++--
 arch/x86/kernel/setup.c                       | 22 ++++++++++++-------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b8ee90bb6447..24d01648edeb1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -704,8 +704,11 @@
 			upon panic. This parameter reserves the physical
 			memory region [offset, offset + size] for that kernel
 			image. If '@offset' is omitted, then a suitable offset
-			is selected automatically. Check
-			Documentation/kdump/kdump.txt for further details.
+			is selected automatically.
+			[KNL, x86_64] select a region under 4G first, and
+			fall back to reserve region above 4G when '@offset'
+			hasn't been specified.
+			See Documentation/kdump/kdump.txt for further details.
 
 	crashkernel=range1:size1[,range2:size2,...][@offset]
 			[KNL] Same as above, but depends on the memory
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index daf7c5650c18c..c15f362a25168 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -541,21 +541,27 @@ static void __init reserve_crashkernel(void)
 	}
 
 	/* 0 means: find the address automatically */
-	if (crash_base <= 0) {
+	if (!crash_base) {
 		/*
 		 * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
-		 * as old kexec-tools loads bzImage below that, unless
-		 * "crashkernel=size[KMG],high" is specified.
+		 * crashkernel=x,high reserves memory over 4G, also allocates
+		 * 256M extra low memory for DMA buffers and swiotlb.
+		 * But the extra memory is not required for all machines.
+		 * So try low memory first and fall back to high memory
+		 * unless "crashkernel=size[KMG],high" is specified.
 		 */
-		crash_base = memblock_find_in_range(CRASH_ALIGN,
-						    high ? CRASH_ADDR_HIGH_MAX
-							 : CRASH_ADDR_LOW_MAX,
-						    crash_size, CRASH_ALIGN);
+		if (!high)
+			crash_base = memblock_find_in_range(CRASH_ALIGN,
+						CRASH_ADDR_LOW_MAX,
+						crash_size, CRASH_ALIGN);
+		if (!crash_base)
+			crash_base = memblock_find_in_range(CRASH_ALIGN,
+						CRASH_ADDR_HIGH_MAX,
+						crash_size, CRASH_ALIGN);
 		if (!crash_base) {
 			pr_info("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
-
 	} else {
 		unsigned long long start;
 
-- 
GitLab


From 7caa56f006e9d712b44f27b32520c66420d5cbc6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 15 Apr 2019 00:43:00 +0200
Subject: [PATCH 1413/1861] netfilter: ebtables: CONFIG_COMPAT: drop a bogus
 WARN_ON

It means userspace gave us a ruleset where there is some other
data after the ebtables target but before the beginning of the next rule.

Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support")
Reported-by: syzbot+659574e7bcc7f7eb4df7@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index eb15891f8b9ff..3cad01ac64e4a 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
 		if (match_kern)
 			match_kern->match_size = ret;
 
-		if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+		/* rule should have no remaining data after target */
+		if (type == EBT_COMPAT_TARGET && size_left)
 			return -EINVAL;
 
 		match32 = (struct compat_ebt_entry_mwt *) buf;
-- 
GitLab


From 916f6efae62305796e012e7c3a7884a267cbacbf Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 17 Apr 2019 02:17:23 +0200
Subject: [PATCH 1414/1861] netfilter: never get/set skb->tstamp

setting net.netfilter.nf_conntrack_timestamp=1 breaks xmit with fq
scheduler.  skb->tstamp might be "refreshed" using ktime_get_real(),
but fq expects CLOCK_MONOTONIC.

This patch removes all places in netfilter that check/set skb->tstamp:

1. To fix the bogus "start" time seen with conntrack timestamping for
   outgoing packets, never use skb->tstamp and always use current time.
2. In nfqueue and nflog, only use skb->tstamp for incoming packets,
   as determined by current hook (prerouting, input, forward).
3. xt_time has to use system clock as well rather than skb->tstamp.
   We could still use skb->tstamp for prerouting/input/foward, but
   I see no advantage to make this conditional.

Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC")
Cc: Eric Dumazet <edumazet@google.com>
Reported-by: Michal Soltys <soltys@ziu.info>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c |  7 ++-----
 net/netfilter/nfnetlink_log.c     |  2 +-
 net/netfilter/nfnetlink_queue.c   |  2 +-
 net/netfilter/xt_time.c           | 23 ++++++++++++++---------
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3c48d44d6fff9..2a714527cde17 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1017,12 +1017,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 
 	/* set conntrack timestamp, if enabled. */
 	tstamp = nf_conn_tstamp_find(ct);
-	if (tstamp) {
-		if (skb->tstamp == 0)
-			__net_timestamp(skb);
+	if (tstamp)
+		tstamp->start = ktime_get_real_ns();
 
-		tstamp->start = ktime_to_ns(skb->tstamp);
-	}
 	/* Since the lookup is lockless, hash insertion must be done after
 	 * starting the timer and setting the CONFIRMED bit. The RCU barriers
 	 * guarantee that no other CPU can find the conntrack before the above
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b1f9c5303f026..0b3347570265c 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -540,7 +540,7 @@ __build_packet_message(struct nfnl_log_net *log,
 			goto nla_put_failure;
 	}
 
-	if (skb->tstamp) {
+	if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
 		struct nfulnl_msg_packet_timestamp ts;
 		struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
 		ts.sec = cpu_to_be64(kts.tv_sec);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 0dcc3592d053f..e057b2961d313 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -582,7 +582,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (nfqnl_put_bridge(entry, skb) < 0)
 		goto nla_put_failure;
 
-	if (entskb->tstamp) {
+	if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) {
 		struct nfqnl_msg_packet_timestamp ts;
 		struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index c13bcd0ab4913..8dbb4d48f2ed5 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -163,19 +163,24 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	s64 stamp;
 
 	/*
-	 * We cannot use get_seconds() instead of __net_timestamp() here.
+	 * We need real time here, but we can neither use skb->tstamp
+	 * nor __net_timestamp().
+	 *
+	 * skb->tstamp and skb->skb_mstamp_ns overlap, however, they
+	 * use different clock types (real vs monotonic).
+	 *
 	 * Suppose you have two rules:
-	 * 	1. match before 13:00
-	 * 	2. match after 13:00
+	 *	1. match before 13:00
+	 *	2. match after 13:00
+	 *
 	 * If you match against processing time (get_seconds) it
 	 * may happen that the same packet matches both rules if
-	 * it arrived at the right moment before 13:00.
+	 * it arrived at the right moment before 13:00, so it would be
+	 * better to check skb->tstamp and set it via __net_timestamp()
+	 * if needed.  This however breaks outgoing packets tx timestamp,
+	 * and causes them to get delayed forever by fq packet scheduler.
 	 */
-	if (skb->tstamp == 0)
-		__net_timestamp((struct sk_buff *)skb);
-
-	stamp = ktime_to_ns(skb->tstamp);
-	stamp = div_s64(stamp, NSEC_PER_SEC);
+	stamp = get_seconds();
 
 	if (info->flags & XT_TIME_LOCAL_TZ)
 		/* Adjust for local timezone */
-- 
GitLab


From d48668052b2603b6262459625c86108c493588dd Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Wed, 17 Apr 2019 09:49:44 -0700
Subject: [PATCH 1415/1861] netfilter: fix nf_l4proto_log_invalid to log
 invalid packets

It doesn't log a packet if sysctl_log_invalid isn't equal to protonum
OR sysctl_log_invalid isn't equal to IPPROTO_RAW. This sentence is
always true. I believe we need to replace OR to AND.

Cc: Florian Westphal <fw@strlen.de>
Fixes: c4f3db1595827 ("netfilter: conntrack: add and use nf_l4proto_log_invalid")
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b9403a266a2e2..37bb530d848fa 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -55,7 +55,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
 	struct va_format vaf;
 	va_list args;
 
-	if (net->ct.sysctl_log_invalid != protonum ||
+	if (net->ct.sysctl_log_invalid != protonum &&
 	    net->ct.sysctl_log_invalid != IPPROTO_RAW)
 		return;
 
-- 
GitLab


From 36f0c423552dacaca152324b8e9bda42a6d88865 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 19 Apr 2019 15:40:14 +0200
Subject: [PATCH 1416/1861] x86/boot: Disable RSDP parsing temporarily

The original intention to move RDSP parsing very early, before KASLR
does its ranges selection, was to accommodate movable memory regions
machines (CONFIG_MEMORY_HOTREMOVE) to still be able to do memory
hotplug.

However, that broke kexec'ing a kernel on EFI machines because depending
on where the EFI systab was mapped, on at least one machine it isn't
present in the kexec mapping of the second kernel, leading to a triple
fault in the early code.

Fixing this properly requires significantly involved surgery and we
cannot allow ourselves to do that, that close to the merge window.

So disable the RSDP parsing code temporarily until it is fixed properly
in the next release cycle.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chao Fan <fanc.fnst@cn.fujitsu.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: indou.takao@jp.fujitsu.com
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: kasong@redhat.com
Cc: Kees Cook <keescook@chromium.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: msys.mizuma@gmail.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190419141952.GE10324@zn.tnic
---
 arch/x86/boot/compressed/misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69e..5a237e8dbf8d5 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	boot_params->hdr.loadflags &= ~KASLR_FLAG;
 
 	/* Save RSDP address for later use. */
-	boot_params->acpi_rsdp_addr = get_rsdp_addr();
+	/* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
 
 	sanitize_boot_params(boot_params);
 
-- 
GitLab


From 2c4645439e8f2f6e7c37f158feae6f6a82baa910 Mon Sep 17 00:00:00 2001
From: Jiang Biao <benbjiang@tencent.com>
Date: Mon, 22 Apr 2019 10:49:43 +0800
Subject: [PATCH 1417/1861] x86/irq: Fix outdated comments

INVALIDATE_TLB_VECTOR_START has been removed by:

  52aec3308db8("x86/tlb: replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR")

while VSYSCALL_EMU_VECTO(204) has also been removed, by:

  3ae36655b97a("x86-64: Rework vsyscall emulation and add vsyscall= parameter")

so update the comments in <asm/irq_vectors.h> accordingly.

Signed-off-by: Jiang Biao <benbjiang@tencent.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Link: http://lkml.kernel.org/r/20190422024943.71918-1-benbjiang@tencent.com
[ Improved the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/irq_vectors.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 548d90bbf919e..889f8b1b5b7f9 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,8 +18,8 @@
  *  Vectors   0 ...  31 : system traps and exceptions - hardcoded events
  *  Vectors  32 ... 127 : device interrupts
  *  Vector  128         : legacy int80 syscall interface
- *  Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
- *  Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
+ *  Vectors 129 ... LOCAL_TIMER_VECTOR-1
+ *  Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
  *
  * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
  *
-- 
GitLab


From 35fa71a030caa50458a043560d4814ea9bcd639f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 22 Apr 2019 10:23:23 -0600
Subject: [PATCH 1418/1861] io_uring: fail io_uring_register(2) on a dying
 io_uring instance

If we have multiple threads doing io_uring_register(2) on an io_uring
fd, then we can potentially try and kill the percpu reference while
someone else has already killed it.

Prevent this race by failing io_uring_register(2) if the ref is marked
dying. This is safe since we're inside the io_uring mutex.

Fixes: b19062a56726 ("io_uring: fix possible deadlock between io_uring_{enter,register}")
Reported-by: syzbot <syzbot+10d25e23199614b7721f@syzkaller.appspotmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f65f85d892174..a2f39faed6a7e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2934,6 +2934,14 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 {
 	int ret;
 
+	/*
+	 * We're inside the ring mutex, if the ref is already dying, then
+	 * someone else killed the ctx or is already going through
+	 * io_uring_register().
+	 */
+	if (percpu_ref_is_dying(&ctx->refs))
+		return -ENXIO;
+
 	percpu_ref_kill(&ctx->refs);
 
 	/*
-- 
GitLab


From e523a29c4f2703bdb98f68ce1bb256e259fd8d5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Fri, 19 Apr 2019 11:57:44 +0200
Subject: [PATCH 1419/1861] io_uring: fix race condition reading SQ entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A read memory barrier is required between reading SQ tail and reading
the actual data belonging to the SQ entry.

Userspace needs a matching write barrier between writing SQ entries and
updating SQ tail (using smp_store_release to update tail will do).

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a2f39faed6a7e..41e3a6f6a096a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1739,7 +1739,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
 	head = ctx->cached_sq_head;
 	/* See comment at the top of this file */
 	smp_rmb();
-	if (head == READ_ONCE(ring->r.tail))
+	/* make sure SQ entry isn't read before tail */
+	if (head == smp_load_acquire(&ring->r.tail))
 		return false;
 
 	head = READ_ONCE(ring->array[head & ctx->sq_mask]);
-- 
GitLab


From 0d7bae69c574c5f25802f8a71252e7d66933a3ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Fri, 19 Apr 2019 11:57:45 +0200
Subject: [PATCH 1420/1861] io_uring: fix race condition when sq threads goes
 sleeping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reading the SQ tail needs to come after setting IORING_SQ_NEED_WAKEUP in
flags; there is no cheap barrier for ordering a store before a load, a
full memory barrier is required.

Userspace needs a full memory barrier between updating SQ tail and
checking for the IORING_SQ_NEED_WAKEUP too.

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 41e3a6f6a096a..69910fd9ccca0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1865,7 +1865,8 @@ static int io_sq_thread(void *data)
 
 			/* Tell userspace we may need a wakeup call */
 			ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP;
-			smp_wmb();
+			/* make sure to read SQ tail after writing flags */
+			smp_mb();
 
 			if (!io_get_sqring(ctx, &sqes[0])) {
 				if (kthread_should_stop()) {
-- 
GitLab


From fb775faa9e46ff481e4ced11116c9bd45359cb43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Fri, 19 Apr 2019 11:57:46 +0200
Subject: [PATCH 1421/1861] io_uring: fix poll full SQ detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

io_uring_poll shouldn't signal EPOLLOUT | EPOLLWRNORM if the queue is
full; the old check would always signal EPOLLOUT | EPOLLWRNORM (unless
there were U32_MAX - 1 entries in the SQ queue).

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 69910fd9ccca0..b998e98acd01e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2576,7 +2576,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 	poll_wait(file, &ctx->cq_wait, wait);
 	/* See comment at the top of this file */
 	smp_rmb();
-	if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head)
+	if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head !=
+	    ctx->sq_ring->ring_entries)
 		mask |= EPOLLOUT | EPOLLWRNORM;
 	if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail)
 		mask |= EPOLLIN | EPOLLRDNORM;
-- 
GitLab


From 6aaafc43a4ecc5bc8a3f6a2811d5eddc996a97f3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 22 Apr 2019 12:34:23 -0400
Subject: [PATCH 1422/1861] nfsd: wake waiters blocked on file_lock before
 deleting it

After a blocked nfsd file_lock request is deleted, knfsd will send a
callback to the client and then free the request. Commit 16306a61d3b7
("fs/locks: always delete_block after waiting.") changed it such that
locks_delete_block is always called on a request after it is awoken,
but that patch missed fixing up blocked nfsd request handling.

Call locks_delete_block on the block to wake up any locks still blocked
on the nfsd lock request before freeing it. Some of its callers already
do this however, so just remove those calls.

URL: https://bugzilla.kernel.org/show_bug.cgi?id=203363
Fixes: 16306a61d3b7 ("fs/locks: always delete_block after waiting.")
Reported-by: Slawomir Pryczek <slawek1211@gmail.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6a45fb00c5fcd..e87e15df2044d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -265,6 +265,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
 static void
 free_blocked_lock(struct nfsd4_blocked_lock *nbl)
 {
+	locks_delete_block(&nbl->nbl_lock);
 	locks_release_private(&nbl->nbl_lock);
 	kfree(nbl);
 }
@@ -293,7 +294,6 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
 		nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
 					nbl_lru);
 		list_del_init(&nbl->nbl_lru);
-		locks_delete_block(&nbl->nbl_lock);
 		free_blocked_lock(nbl);
 	}
 }
@@ -4863,7 +4863,6 @@ nfs4_laundromat(struct nfsd_net *nn)
 		nbl = list_first_entry(&reaplist,
 					struct nfsd4_blocked_lock, nbl_lru);
 		list_del_init(&nbl->nbl_lru);
-		locks_delete_block(&nbl->nbl_lock);
 		free_blocked_lock(nbl);
 	}
 out:
-- 
GitLab


From f456458e4d25a8962d0946891617c76cc3ff5fb9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 22 Apr 2019 12:34:24 -0400
Subject: [PATCH 1423/1861] nfsd: wake blocked file lock waiters before sending
 callback

When a blocked NFS lock is "awoken" we send a callback to the server and
then wake any hosts waiting on it. If a client attempts to get a lock
and then drops off the net, we could end up waiting for a long time
until we end up waking locks blocked on that request.

So, wake any other waiting lock requests before sending the callback.
Do this by calling locks_delete_block in a new "prepare" phase for
CB_NOTIFY_LOCK callbacks.

URL: https://bugzilla.kernel.org/show_bug.cgi?id=203363
Fixes: 16306a61d3b7 ("fs/locks: always delete_block after waiting.")
Reported-by: Slawomir Pryczek <slawek1211@gmail.com>
Cc: Neil Brown <neilb@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e87e15df2044d..f056b1d3fecd6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -298,6 +298,14 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
 	}
 }
 
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+	struct nfsd4_blocked_lock	*nbl = container_of(cb,
+						struct nfsd4_blocked_lock, nbl_cb);
+	locks_delete_block(&nbl->nbl_lock);
+}
+
 static int
 nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
 {
@@ -325,6 +333,7 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
 }
 
 static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+	.prepare	= nfsd4_cb_notify_lock_prepare,
 	.done		= nfsd4_cb_notify_lock_done,
 	.release	= nfsd4_cb_notify_lock_release,
 };
-- 
GitLab


From b561af36b1841088552464cdc3f6371d92f17710 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Mon, 22 Apr 2019 15:15:32 +0530
Subject: [PATCH 1424/1861] net: stmmac: move stmmac_check_ether_addr() to
 driver probe

stmmac_check_ether_addr() checks the MAC address and assigns one in
driver open(). In many cases when we create slave netdevice, the dev
addr is inherited from master but the master dev addr maybe NULL at
that time, so move this call to driver probe so that address is
always valid.

Signed-off-by: Xiaofei Shen <xiaofeis@codeaurora.org>
Tested-by: Xiaofei Shen <xiaofeis@codeaurora.org>
Signed-off-by: Sneh Shah <snehshah@codeaurora.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a26e36dbb5df0..48712437d0da8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2616,8 +2616,6 @@ static int stmmac_open(struct net_device *dev)
 	u32 chan;
 	int ret;
 
-	stmmac_check_ether_addr(priv);
-
 	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
 	    priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI) {
@@ -4303,6 +4301,8 @@ int stmmac_dvr_probe(struct device *device,
 	if (ret)
 		goto error_hw_init;
 
+	stmmac_check_ether_addr(priv);
+
 	/* Configure real RX and TX queues */
 	netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
 	netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
-- 
GitLab


From f147384774a7b24dda4783a3dcd61af272757ea8 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Mon, 1 Apr 2019 20:38:19 +0200
Subject: [PATCH 1425/1861] dmaengine: bcm2835: Avoid GFP_KERNEL in
 device_prep_slave_sg

The commit af19b7ce76ba ("mmc: bcm2835: Avoid possible races on
data requests") introduces a possible circular locking dependency,
which is triggered by swapping to the sdhost interface.

So instead of reintroduce the race condition again, we could also
avoid this situation by using GFP_NOWAIT for the allocation of the
DMA buffer descriptors.

Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: af19b7ce76ba ("mmc: bcm2835: Avoid possible races on data requests")
Link: http://lists.infradead.org/pipermail/linux-rpi-kernel/2019-March/008615.html
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/bcm2835-dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index ec8a291d62bab..54093ffd0aefa 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -671,7 +671,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
 	d = bcm2835_dma_create_cb_chain(chan, direction, false,
 					info, extra,
 					frames, src, dst, 0, 0,
-					GFP_KERNEL);
+					GFP_NOWAIT);
 	if (!d)
 		return NULL;
 
-- 
GitLab


From 907bd68a2edc491849e2fdcfe52c4596627bca94 Mon Sep 17 00:00:00 2001
From: Dirk Behme <dirk.behme@de.bosch.com>
Date: Fri, 12 Apr 2019 07:29:13 +0200
Subject: [PATCH 1426/1861] dmaengine: sh: rcar-dmac: With cyclic DMA residue 0
 is valid

Having a cyclic DMA, a residue 0 is not an indication of a completed
DMA. In case of cyclic DMA make sure that dma_set_residue() is called
and with this a residue of 0 is forwarded correctly to the caller.

Fixes: 3544d2878817 ("dmaengine: rcar-dmac: use result of updated get_residue in tx_status")
Signed-off-by: Dirk Behme <dirk.behme@de.bosch.com>
Signed-off-by: Achim Dahlhoff <Achim.Dahlhoff@de.bosch.com>
Signed-off-by: Hiroyuki Yokoyama <hiroyuki.yokoyama.vx@renesas.com>
Signed-off-by: Yao Lihua <ylhuajnu@outlook.com>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: <stable@vger.kernel.org> # v4.8+
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/sh/rcar-dmac.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 2b4f256981695..54810ffd95e24 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1368,6 +1368,7 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
 	enum dma_status status;
 	unsigned long flags;
 	unsigned int residue;
+	bool cyclic;
 
 	status = dma_cookie_status(chan, cookie, txstate);
 	if (status == DMA_COMPLETE || !txstate)
@@ -1375,10 +1376,11 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
 
 	spin_lock_irqsave(&rchan->lock, flags);
 	residue = rcar_dmac_chan_get_residue(rchan, cookie);
+	cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false;
 	spin_unlock_irqrestore(&rchan->lock, flags);
 
 	/* if there's no residue, the cookie is complete */
-	if (!residue)
+	if (!residue && !cyclic)
 		return DMA_COMPLETE;
 
 	dma_set_residue(txstate, residue);
-- 
GitLab


From 6e7da74775348d96e2d7efaf3f91410e18c481ef Mon Sep 17 00:00:00 2001
From: Achim Dahlhoff <Achim.Dahlhoff@de.bosch.com>
Date: Fri, 12 Apr 2019 07:29:14 +0200
Subject: [PATCH 1427/1861] dmaengine: sh: rcar-dmac: Fix glitch in
 dmaengine_tx_status

The tx_status poll in the rcar_dmac driver reads the status register
which indicates which chunk is busy (DMACHCRB). Afterwards the point
inside the chunk is read from DMATCRB. It is possible that the chunk
has changed between the two reads. The result is a non-monotonous
increase of the residue. Fix this by introducing a 'safe read' logic.

Fixes: 73a47bd0da66 ("dmaengine: rcar-dmac: use TCRB instead of TCR for residue")
Signed-off-by: Achim Dahlhoff <Achim.Dahlhoff@de.bosch.com>
Signed-off-by: Dirk Behme <dirk.behme@de.bosch.com>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Cc: <stable@vger.kernel.org> # v4.16+
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/sh/rcar-dmac.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 54810ffd95e24..e2a5398f89b51 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1282,6 +1282,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 	enum dma_status status;
 	unsigned int residue = 0;
 	unsigned int dptr = 0;
+	unsigned int chcrb;
+	unsigned int tcrb;
+	unsigned int i;
 
 	if (!desc)
 		return 0;
@@ -1329,6 +1332,24 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 		return 0;
 	}
 
+	/*
+	 * We need to read two registers.
+	 * Make sure the control register does not skip to next chunk
+	 * while reading the counter.
+	 * Trying it 3 times should be enough: Initial read, retry, retry
+	 * for the paranoid.
+	 */
+	for (i = 0; i < 3; i++) {
+		chcrb = rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+					    RCAR_DMACHCRB_DPTR_MASK;
+		tcrb = rcar_dmac_chan_read(chan, RCAR_DMATCRB);
+		/* Still the same? */
+		if (chcrb == (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+			      RCAR_DMACHCRB_DPTR_MASK))
+			break;
+	}
+	WARN_ONCE(i >= 3, "residue might be not continuous!");
+
 	/*
 	 * In descriptor mode the descriptor running pointer is not maintained
 	 * by the interrupt handler, find the running descriptor from the
@@ -1336,8 +1357,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 	 * mode just use the running descriptor pointer.
 	 */
 	if (desc->hwdescs.use) {
-		dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
-			RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
+		dptr = chcrb >> RCAR_DMACHCRB_DPTR_SHIFT;
 		if (dptr == 0)
 			dptr = desc->nchunks;
 		dptr--;
@@ -1355,7 +1375,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 	}
 
 	/* Add the residue for the current chunk. */
-	residue += rcar_dmac_chan_read(chan, RCAR_DMATCRB) << desc->xfer_shift;
+	residue += tcrb << desc->xfer_shift;
 
 	return residue;
 }
-- 
GitLab


From c7d5f21e8d184f255785e02760139037a97fd796 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 22 Apr 2019 11:39:34 -0500
Subject: [PATCH 1428/1861] ACPI: event: replace strcpy() by strscpy()

The strcpy() function is being deprecated. Replace it by the safer
strscpy() and fix the following Coverity warnings:

"You might overrun the 15-character fixed-size string event->bus_id
by copying bus_id without checking the length."

"You might overrun the 20-character fixed-size string event->device_class
by copying device_class without checking the length."

Addresses-Coverity-ID: 139001 ("Copy into fixed size buffer")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index 5a127f3f2d5c3..47f21599f2ab5 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -131,8 +131,8 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 	event = nla_data(attr);
 	memset(event, 0, sizeof(struct acpi_genl_event));
 
-	strcpy(event->device_class, device_class);
-	strcpy(event->bus_id, bus_id);
+	strscpy(event->device_class, device_class, sizeof(event->device_class));
+	strscpy(event->bus_id, bus_id, sizeof(event->bus_id));
 	event->type = type;
 	event->data = data;
 
-- 
GitLab


From b0f65b917987dee71d0b1d3c65c0b52db00a55e0 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 23 Apr 2019 15:49:56 +0800
Subject: [PATCH 1429/1861] ACPI / DPTF: Use dev_get_drvdata()

Skip conversion to platform_device and use dev_get_drvdata() directly.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/dptf/dptf_power.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c
index e1c242568341d..0c081390930a7 100644
--- a/drivers/acpi/dptf/dptf_power.c
+++ b/drivers/acpi/dptf/dptf_power.c
@@ -31,8 +31,7 @@ static ssize_t name##_show(struct device *dev,\
 			   struct device_attribute *attr,\
 			   char *buf)\
 {\
-	struct platform_device *pdev = to_platform_device(dev);\
-	struct acpi_device *acpi_dev = platform_get_drvdata(pdev);\
+	struct acpi_device *acpi_dev = dev_get_drvdata(dev);\
 	unsigned long long val;\
 	acpi_status status;\
 \
-- 
GitLab


From 4db7c34cb41a3d7ed7db7a84413d7c225278c36a Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Fri, 19 Apr 2019 14:27:58 +0800
Subject: [PATCH 1430/1861] cpufreq: Move ->get callback check outside of
 __cpufreq_get()

Currenly, __cpufreq_get() called by show_cpuinfo_cur_freq() will check
->get callback. That is needless since cpuinfo_cur_freq attribute will
not be created if ->get is not set. So let's drop it in __cpufreq_get().
Also keep this check in cpufreq_get().

Signed-off-by: Yue Hu <huyue2@yulong.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index df7d112fe6218..92604afdeec44 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1585,7 +1585,7 @@ static unsigned int __cpufreq_get(struct cpufreq_policy *policy)
 {
 	unsigned int ret_freq = 0;
 
-	if (unlikely(policy_is_inactive(policy)) || !cpufreq_driver->get)
+	if (unlikely(policy_is_inactive(policy)))
 		return ret_freq;
 
 	ret_freq = cpufreq_driver->get(policy->cpu);
@@ -1623,7 +1623,8 @@ unsigned int cpufreq_get(unsigned int cpu)
 
 	if (policy) {
 		down_read(&policy->rwsem);
-		ret_freq = __cpufreq_get(policy);
+		if (cpufreq_driver->get)
+			ret_freq = __cpufreq_get(policy);
 		up_read(&policy->rwsem);
 
 		cpufreq_cpu_put(policy);
-- 
GitLab


From fe066621c7966fe47fa17c6be6fd81adb3c0509f Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 12 Apr 2019 23:19:11 +0800
Subject: [PATCH 1431/1861] gpio: merrifield: Fix build err without CONFIG_ACPI

When building CONFIG_ACPI is not set
gcc warn this:

drivers/gpio/gpio-merrifield.c: In function mrfld_gpio_get_pinctrl_dev_name:
drivers/gpio/gpio-merrifield.c:388:19: error: dereferencing pointer to incomplete type struct acpi_device
   put_device(&adev->dev);
                   ^~

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: d00d2109c367 ("gpio: merrifield: Convert to use acpi_dev_get_first_match_dev()")
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/gpio/gpio-merrifield.c | 2 +-
 include/acpi/acpi_bus.h        | 4 ++++
 include/linux/acpi.h           | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c
index 2383dc78b1235..3302125e52651 100644
--- a/drivers/gpio/gpio-merrifield.c
+++ b/drivers/gpio/gpio-merrifield.c
@@ -385,7 +385,7 @@ static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv)
 	adev = acpi_dev_get_first_match_dev("INTC1002", NULL, -1);
 	if (adev) {
 		name = devm_kstrdup(priv->dev, acpi_dev_name(adev), GFP_KERNEL);
-		put_device(&adev->dev);
+		acpi_dev_put(adev);
 	} else {
 		name = "pinctrl-merrifield";
 	}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index f7981751ac776..2a462cf4eaa92 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -687,6 +687,10 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev)
 		adev->power.states[ACPI_STATE_D3_HOT].flags.explicit_set);
 }
 
+static inline void acpi_dev_put(struct acpi_device *adev)
+{
+	put_device(&adev->dev);
+}
 #else	/* CONFIG_ACPI */
 
 static inline int register_acpi_bus_type(void *bus) { return 0; }
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 392413075cc0f..ca55ae00f8c91 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -675,6 +675,8 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
 	return NULL;
 }
 
+static inline void acpi_dev_put(struct acpi_device *adev) {}
+
 static inline bool is_acpi_node(struct fwnode_handle *fwnode)
 {
 	return false;
-- 
GitLab


From d4d18e3ec6091843f607e8929a56723e28f393a6 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 17 Apr 2019 21:29:29 -0700
Subject: [PATCH 1432/1861] arm64: mm: Ensure tail of unaligned initrd is
 reserved

In the event that the start address of the initrd is not aligned, but
has an aligned size, the base + size will not cover the entire initrd
image and there is a chance that the kernel will corrupt the tail of the
image.

By aligning the end of the initrd to a page boundary and then
subtracting the adjusted start address the memblock reservation will
cover all pages that contains the initrd.

Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size")
Cc: stable@vger.kernel.org
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 6bc135042f5e4..7cae155e81a5f 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -363,7 +363,7 @@ void __init arm64_memblock_init(void)
 		 * Otherwise, this is a no-op
 		 */
 		u64 base = phys_initrd_start & PAGE_MASK;
-		u64 size = PAGE_ALIGN(phys_initrd_size);
+		u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
 
 		/*
 		 * We can only add back the initrd memory if we don't end up
-- 
GitLab


From 0d2e2a82d4de298d006bf8eddc86829e3c7da820 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 16 Apr 2019 16:24:24 +0100
Subject: [PATCH 1433/1861] perf/arm-cci: Remove broken race mitigation

Uncore PMU drivers face an awkward cyclic dependency wherein:

 - They have to pick a valid online CPU to associate with before
   registering the PMU device, since it will get exposed to userspace
   immediately.
 - The PMU registration has to be be at least partly complete before
   hotplug events can be handled, since trying to migrate an
   uninitialised context would be bad.
 - The hotplug handler has to be ready as soon as a CPU is chosen, lest
   it go offline without the user-visible cpumask value getting updated.

The arm-cci driver has tried to solve this by using get_cpu() to pick
the current CPU and prevent it from disappearing while both
registrations are performed, but that results in taking mutexes with
preemption disabled, which makes certain configurations very unhappy:

[ 1.983337] BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:2004
[ 1.983340] in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: swapper/0
[ 1.983342] Preemption disabled at:
[ 1.983353] [<ffffff80089801f4>] cci_pmu_probe+0x1dc/0x488
[ 1.983360] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.20-rt8-yocto-preempt-rt #1
[ 1.983362] Hardware name: ZynqMP ZCU102 Rev1.0 (DT)
[ 1.983364] Call trace:
[ 1.983369] dump_backtrace+0x0/0x158
[ 1.983372] show_stack+0x24/0x30
[ 1.983378] dump_stack+0x80/0xa4
[ 1.983383] ___might_sleep+0x138/0x160
[ 1.983386] __might_sleep+0x58/0x90
[ 1.983391] __rt_mutex_lock_state+0x30/0xc0
[ 1.983395] _mutex_lock+0x24/0x30
[ 1.983400] perf_pmu_register+0x2c/0x388
[ 1.983404] cci_pmu_probe+0x2bc/0x488
[ 1.983409] platform_drv_probe+0x58/0xa8

It is not feasible to resolve all the possible races outside of the perf
core itself, so address the immediate bug by following the example of
nearly every other PMU driver and not even trying to do so. Registering
the hotplug notifier first should minimise the window in which things
can go wrong, so that's about as much as we can reasonably do here. This
also revealed an additional race in assigning the global pointer too
late relative to the hotplug notifier, which gets fixed in the process.

Reported-by: Li, Meng <Meng.Li@windriver.com>
Tested-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/arm-cci.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index bfd03e0233084..8f8606b9bc9ee 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1684,21 +1684,24 @@ static int cci_pmu_probe(struct platform_device *pdev)
 	raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock);
 	mutex_init(&cci_pmu->reserve_mutex);
 	atomic_set(&cci_pmu->active_events, 0);
-	cci_pmu->cpu = get_cpu();
-
-	ret = cci_pmu_init(cci_pmu, pdev);
-	if (ret) {
-		put_cpu();
-		return ret;
-	}
 
+	cci_pmu->cpu = raw_smp_processor_id();
+	g_cci_pmu = cci_pmu;
 	cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
 				  "perf/arm/cci:online", NULL,
 				  cci_pmu_offline_cpu);
-	put_cpu();
-	g_cci_pmu = cci_pmu;
+
+	ret = cci_pmu_init(cci_pmu, pdev);
+	if (ret)
+		goto error_pmu_init;
+
 	pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
 	return 0;
+
+error_pmu_init:
+	cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE);
+	g_cci_pmu = NULL;
+	return ret;
 }
 
 static int cci_pmu_remove(struct platform_device *pdev)
-- 
GitLab


From 9bcb929f969e4054732158908b1d70e787ef780f Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 16 Apr 2019 16:24:25 +0100
Subject: [PATCH 1434/1861] perf/arm-ccn: Clean up CPU hotplug handling

Like arm-cci, arm-ccn has the same issue of disabling preemption around
operations which can take mutexes. Again, remove the definite bug by
simply not trying to fight the theoretical races. And since we are
touching the hotplug handling code, take the opportunity to streamline
it, as there's really no need to store a full-sized cpumask to keep
track of a single CPU ID.

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/arm-ccn.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 2ae76026e947f..0bb52d9bdcf7f 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -167,7 +167,7 @@ struct arm_ccn_dt {
 
 	struct hrtimer hrtimer;
 
-	cpumask_t cpu;
+	unsigned int cpu;
 	struct hlist_node node;
 
 	struct pmu pmu;
@@ -559,7 +559,7 @@ static ssize_t arm_ccn_pmu_cpumask_show(struct device *dev,
 {
 	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
 
-	return cpumap_print_to_pagebuf(true, buf, &ccn->dt.cpu);
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(ccn->dt.cpu));
 }
 
 static struct device_attribute arm_ccn_pmu_cpumask_attr =
@@ -759,7 +759,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 	 * mitigate this, we enforce CPU assignment to one, selected
 	 * processor (the one described in the "cpumask" attribute).
 	 */
-	event->cpu = cpumask_first(&ccn->dt.cpu);
+	event->cpu = ccn->dt.cpu;
 
 	node_xp = CCN_CONFIG_NODE(event->attr.config);
 	type = CCN_CONFIG_TYPE(event->attr.config);
@@ -1215,15 +1215,15 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
 	unsigned int target;
 
-	if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu))
+	if (cpu != dt->cpu)
 		return 0;
 	target = cpumask_any_but(cpu_online_mask, cpu);
 	if (target >= nr_cpu_ids)
 		return 0;
 	perf_pmu_migrate_context(&dt->pmu, cpu, target);
-	cpumask_set_cpu(target, &dt->cpu);
+	dt->cpu = target;
 	if (ccn->irq)
-		WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0);
+		WARN_ON(irq_set_affinity_hint(ccn->irq, cpumask_of(dt->cpu)));
 	return 0;
 }
 
@@ -1299,29 +1299,30 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
 	}
 
 	/* Pick one CPU which we will use to collect data from CCN... */
-	cpumask_set_cpu(get_cpu(), &ccn->dt.cpu);
+	ccn->dt.cpu = raw_smp_processor_id();
 
 	/* Also make sure that the overflow interrupt is handled by this CPU */
 	if (ccn->irq) {
-		err = irq_set_affinity_hint(ccn->irq, &ccn->dt.cpu);
+		err = irq_set_affinity_hint(ccn->irq, cpumask_of(ccn->dt.cpu));
 		if (err) {
 			dev_err(ccn->dev, "Failed to set interrupt affinity!\n");
 			goto error_set_affinity;
 		}
 	}
 
+	cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+					 &ccn->dt.node);
+
 	err = perf_pmu_register(&ccn->dt.pmu, name, -1);
 	if (err)
 		goto error_pmu_register;
 
-	cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
-					 &ccn->dt.node);
-	put_cpu();
 	return 0;
 
 error_pmu_register:
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+					    &ccn->dt.node);
 error_set_affinity:
-	put_cpu();
 error_choose_name:
 	ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
 	for (i = 0; i < ccn->num_xps; i++)
-- 
GitLab


From f1267cf3c01b12e0f843fb6a7450a7f0b2efab8a Mon Sep 17 00:00:00 2001
From: Bhagavathi Perumal S <bperumal@codeaurora.org>
Date: Tue, 16 Apr 2019 12:54:40 +0530
Subject: [PATCH 1435/1861] mac80211: Fix kernel panic due to use of txq after
 free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The txq of vif is added to active_txqs list for ATF TXQ scheduling
in the function ieee80211_queue_skb(), but it was not properly removed
before freeing the txq object. It was causing use after free of the txq
objects from the active_txqs list, result was kernel panic
due to invalid memory access.

Fix kernel invalid memory access by properly removing txq object
from active_txqs list before free the object.

Signed-off-by: Bhagavathi Perumal S <bperumal@codeaurora.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 4a6ff1482a9ff..02d2e6f11e936 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1908,6 +1908,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
 	list_del_rcu(&sdata->list);
 	mutex_unlock(&sdata->local->iflist_mtx);
 
+	if (sdata->vif.txq)
+		ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq));
+
 	synchronize_rcu();
 
 	if (sdata->dev) {
-- 
GitLab


From 8772eed9a95abd82cf188c93edb9645543ca4418 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Tue, 16 Apr 2019 11:16:33 +0530
Subject: [PATCH 1436/1861] cfg80211: Notify previous user request during self
 managed wiphy registration

Commit c82c06ce43d3("cfg80211: Notify all User Hints To self managed wiphys")
notified all new user hints to self managed wiphy's after device registration.
But it didn't do this for anything other than cell base hints done before
registration.

This needs to be done during wiphy registration of a self managed device also,
so that the previous user settings are retained.

Fixes: c82c06ce43d3 ("cfg80211: Notify all User Hints To self managed wiphys")
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0ba778f371cb2..a6fd5ce199da1 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3778,10 +3778,9 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
 		/*
 		 * The last request may have been received before this
 		 * registration call. Call the driver notifier if
-		 * initiator is USER and user type is CELL_BASE.
+		 * initiator is USER.
 		 */
-		if (lr->initiator == NL80211_REGDOM_SET_BY_USER &&
-		    lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE)
+		if (lr->initiator == NL80211_REGDOM_SET_BY_USER)
 			reg_call_notifier(wiphy, lr);
 	}
 
-- 
GitLab


From 517879147493a5e1df6b89a50f708f1133fcaddb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 15 Apr 2019 11:39:33 +0200
Subject: [PATCH 1437/1861] mac80211: don't attempt to rename ERR_PTR() debugfs
 dirs

We need to dereference the directory to get its parent to
be able to rename it, so it's clearly not safe to try to
do this with ERR_PTR() pointers. Skip in this case.

It seems that this is most likely what was causing the
report by syzbot, but I'm not entirely sure as it didn't
come with a reproducer this time.

Cc: stable@vger.kernel.org
Reported-by: syzbot+4ece1a28b8f4730547c9@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/debugfs_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cff0fb3578c9a..deb3faf083372 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -841,7 +841,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
 
 	dir = sdata->vif.debugfs_dir;
 
-	if (!dir)
+	if (IS_ERR_OR_NULL(dir))
 		return;
 
 	sprintf(buf, "netdev:%s", sdata->name);
-- 
GitLab


From 9726840d9cf0d42377e1591263d7c1d9ae0988ac Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 12 Apr 2019 13:42:18 +0100
Subject: [PATCH 1438/1861] docs/memory-barriers.txt: Update I/O section to be
 clearer about CPU vs thread

The revised I/O ordering section of memory-barriers.txt introduced in
4614bbdee357 ("docs/memory-barriers.txt: Rewrite "KERNEL I/O BARRIER
EFFECTS" section") loosely refers to "the CPU", whereas the ordering
guarantees generally apply within a thread of execution that can migrate
between cores, with the scheduler providing the relevant barrier
semantics.

Reword the section to refer to "CPU thread" and call out ordering of
MMIO writes separately from ordering of writes to memory. Ben also
spotted that the string accessors are native-endian, so fix that up too.

Link: https://lkml.kernel.org/r/080d1ec73e3e29d6ffeeeb50b39b613da28afb37.camel@kernel.crashing.org
Fixes: 4614bbdee357 ("docs/memory-barriers.txt: Rewrite "KERNEL I/O BARRIER EFFECTS" section")
Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/memory-barriers.txt | 67 ++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 1660dde75e14f..f70ebcdfe592d 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2523,27 +2523,37 @@ guarantees:
 	ioremap()), the ordering guarantees are as follows:
 
 	1. All readX() and writeX() accesses to the same peripheral are ordered
-	   with respect to each other. This ensures that MMIO register writes by
-	   the CPU to a particular device will arrive in program order.
-
-	2. A writeX() by the CPU to the peripheral will first wait for the
-	   completion of all prior CPU writes to memory. This ensures that
-	   writes by the CPU to an outbound DMA buffer allocated by
-	   dma_alloc_coherent() will be visible to a DMA engine when the CPU
-	   writes to its MMIO control register to trigger the transfer.
-
-	3. A readX() by the CPU from the peripheral will complete before any
-	   subsequent CPU reads from memory can begin. This ensures that reads
-	   by the CPU from an incoming DMA buffer allocated by
-	   dma_alloc_coherent() will not see stale data after reading from the
-	   DMA engine's MMIO status register to establish that the DMA transfer
-	   has completed.
-
-	4. A readX() by the CPU from the peripheral will complete before any
-	   subsequent delay() loop can begin execution. This ensures that two
-	   MMIO register writes by the CPU to a peripheral will arrive at least
-	   1us apart if the first write is immediately read back with readX()
-	   and udelay(1) is called prior to the second writeX():
+	   with respect to each other. This ensures that MMIO register accesses
+	   by the same CPU thread to a particular device will arrive in program
+	   order.
+
+	2. A writeX() issued by a CPU thread holding a spinlock is ordered
+	   before a writeX() to the same peripheral from another CPU thread
+	   issued after a later acquisition of the same spinlock. This ensures
+	   that MMIO register writes to a particular device issued while holding
+	   a spinlock will arrive in an order consistent with acquisitions of
+	   the lock.
+
+	3. A writeX() by a CPU thread to the peripheral will first wait for the
+	   completion of all prior writes to memory either issued by, or
+	   propagated to, the same thread. This ensures that writes by the CPU
+	   to an outbound DMA buffer allocated by dma_alloc_coherent() will be
+	   visible to a DMA engine when the CPU writes to its MMIO control
+	   register to trigger the transfer.
+
+	4. A readX() by a CPU thread from the peripheral will complete before
+	   any subsequent reads from memory by the same thread can begin. This
+	   ensures that reads by the CPU from an incoming DMA buffer allocated
+	   by dma_alloc_coherent() will not see stale data after reading from
+	   the DMA engine's MMIO status register to establish that the DMA
+	   transfer has completed.
+
+	5. A readX() by a CPU thread from the peripheral will complete before
+	   any subsequent delay() loop can begin execution on the same thread.
+	   This ensures that two MMIO register writes by the CPU to a peripheral
+	   will arrive at least 1us apart if the first write is immediately read
+	   back with readX() and udelay(1) is called prior to the second
+	   writeX():
 
 		writel(42, DEVICE_REGISTER_0); // Arrives at the device...
 		readl(DEVICE_REGISTER_0);
@@ -2559,10 +2569,11 @@ guarantees:
 
 	These are similar to readX() and writeX(), but provide weaker memory
 	ordering guarantees. Specifically, they do not guarantee ordering with
-	respect to normal memory accesses or delay() loops (i.e. bullets 2-4
-	above) but they are still guaranteed to be ordered with respect to other
-	accesses to the same peripheral when operating on __iomem pointers
-	mapped with the default I/O attributes.
+	respect to locking, normal memory accesses or delay() loops (i.e.
+	bullets 2-5 above) but they are still guaranteed to be ordered with
+	respect to other accesses from the same CPU thread to the same
+	peripheral when operating on __iomem pointers mapped with the default
+	I/O attributes.
 
  (*) readsX(), writesX():
 
@@ -2600,8 +2611,10 @@ guarantees:
 	These will perform appropriately for the type of access they're actually
 	doing, be it inX()/outX() or readX()/writeX().
 
-All of these accessors assume that the underlying peripheral is little-endian,
-and will therefore perform byte-swapping operations on big-endian architectures.
+With the exception of the string accessors (insX(), outsX(), readsX() and
+writesX()), all of the above assume that the underlying peripheral is
+little-endian and will therefore perform byte-swapping operations on big-endian
+architectures.
 
 
 ========================================
-- 
GitLab


From 4e69ecf4da1ee0b2ac735e1f1bb13935acd5a38d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 12 Apr 2019 23:59:25 -0700
Subject: [PATCH 1439/1861] arm64/module: ftrace: deal with place relative
 nature of PLTs

Another bodge for the ftrace PLT code: plt_entries_equal() now takes
the place relative nature of the ADRP/ADD based PLT entries into
account, which means that a struct trampoline instance on the stack
is no longer equal to the same set of opcodes in the module struct,
given that they don't point to the same place in memory anymore.

Work around this by using memcmp() in the ftrace PLT handling code.

Acked-by: Will Deacon <will.deacon@arm.com>
Tested-by: dann frazier <dann.frazier@canonical.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/ftrace.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 07b2981201820..65a51331088eb 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -103,10 +103,15 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		 * to be revisited if support for multiple ftrace entry points
 		 * is added in the future, but for now, the pr_err() below
 		 * deals with a theoretical issue only.
+		 *
+		 * Note that PLTs are place relative, and plt_entries_equal()
+		 * checks whether they point to the same target. Here, we need
+		 * to check if the actual opcodes are in fact identical,
+		 * regardless of the offset in memory so use memcmp() instead.
 		 */
 		trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
-		if (!plt_entries_equal(mod->arch.ftrace_trampoline,
-				       &trampoline)) {
+		if (memcmp(mod->arch.ftrace_trampoline, &trampoline,
+			   sizeof(trampoline))) {
 			if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) {
 				pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
 				return -EINVAL;
-- 
GitLab


From d4fad0a426c6e26f48c9a7cdd21a7fe9c198d645 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 12 Apr 2019 17:59:40 +0200
Subject: [PATCH 1440/1861] gpu: ipu-v3: dp: fix CSC handling

Initialize the flow input colorspaces to unknown and reset to that value
when the channel gets disabled. This avoids the state getting mixed up
with a previous mode.

Also keep the CSC settings for the background flow intact when disabling
the foreground flow.

Root-caused-by: Jonathan Marek <jonathan@marek.ca>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-dp.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c
index 9b2b3fa479c46..5e44ff1f20851 100644
--- a/drivers/gpu/ipu-v3/ipu-dp.c
+++ b/drivers/gpu/ipu-v3/ipu-dp.c
@@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp,
 		ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs,
 				DP_COM_CONF_CSC_DEF_BOTH);
 	} else {
-		if (flow->foreground.in_cs == flow->out_cs)
+		if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN ||
+		    flow->foreground.in_cs == flow->out_cs)
 			/*
 			 * foreground identical to output, apply color
 			 * conversion on background
@@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync)
 	struct ipu_dp_priv *priv = flow->priv;
 	u32 reg, csc;
 
+	dp->in_cs = IPUV3_COLORSPACE_UNKNOWN;
+
 	if (!dp->foreground)
 		return;
 
@@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync)
 
 	reg = readl(flow->base + DP_COM_CONF);
 	csc = reg & DP_COM_CONF_CSC_DEF_MASK;
-	if (csc == DP_COM_CONF_CSC_DEF_FG)
-		reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+	reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+	if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG)
+		reg |= DP_COM_CONF_CSC_DEF_BG;
 
 	reg &= ~DP_COM_CONF_FG_EN;
 	writel(reg, flow->base + DP_COM_CONF);
@@ -347,6 +351,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base)
 	mutex_init(&priv->mutex);
 
 	for (i = 0; i < IPUV3_NUM_FLOWS; i++) {
+		priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN;
+		priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN;
 		priv->flow[i].foreground.foreground = true;
 		priv->flow[i].base = priv->base + ipu_dp_flow_base[i];
 		priv->flow[i].priv = priv;
-- 
GitLab


From 7bcde275eb1d0ac8793c77c7e666a886eb16633d Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 12 Apr 2019 17:59:41 +0200
Subject: [PATCH 1441/1861] drm/imx: don't skip DP channel disable for
 background plane

In order to make sure that the plane color space gets reset correctly.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/drm/imx/ipuv3-crtc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index ec3602ebbc1cd..54011df8c2e80 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -71,7 +71,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc,
 	if (disable_partial)
 		ipu_plane_disable(ipu_crtc->plane[1], true);
 	if (disable_full)
-		ipu_plane_disable(ipu_crtc->plane[0], false);
+		ipu_plane_disable(ipu_crtc->plane[0], true);
 }
 
 static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
-- 
GitLab


From 8358e3a8264a228cf2dfb6f3a05c0328f4118f12 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 23 Apr 2019 08:17:58 -0600
Subject: [PATCH 1442/1861] io_uring: remove 'state' argument from
 io_{read,write} path

Since commit 09bb839434b we don't use the state argument for any sort
of on-stack caching in the io read and write path. Remove the stale
and unused argument from them, and bubble it up to __io_submit_sqe()
and down to io_prep_rw().

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b998e98acd01e..0e9fb2cb1984b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -740,7 +740,7 @@ static bool io_file_supports_async(struct file *file)
 }
 
 static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
-		      bool force_nonblock, struct io_submit_state *state)
+		      bool force_nonblock)
 {
 	const struct io_uring_sqe *sqe = s->sqe;
 	struct io_ring_ctx *ctx = req->ctx;
@@ -938,7 +938,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
 }
 
 static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
-		   bool force_nonblock, struct io_submit_state *state)
+		   bool force_nonblock)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw;
@@ -947,7 +947,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
 	size_t iov_count;
 	int ret;
 
-	ret = io_prep_rw(req, s, force_nonblock, state);
+	ret = io_prep_rw(req, s, force_nonblock);
 	if (ret)
 		return ret;
 	file = kiocb->ki_filp;
@@ -985,7 +985,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
 }
 
 static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
-		    bool force_nonblock, struct io_submit_state *state)
+		    bool force_nonblock)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw;
@@ -994,7 +994,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
 	size_t iov_count;
 	int ret;
 
-	ret = io_prep_rw(req, s, force_nonblock, state);
+	ret = io_prep_rw(req, s, force_nonblock);
 	if (ret)
 		return ret;
 
@@ -1336,8 +1336,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 }
 
 static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
-			   const struct sqe_submit *s, bool force_nonblock,
-			   struct io_submit_state *state)
+			   const struct sqe_submit *s, bool force_nonblock)
 {
 	int ret, opcode;
 
@@ -1353,18 +1352,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	case IORING_OP_READV:
 		if (unlikely(s->sqe->buf_index))
 			return -EINVAL;
-		ret = io_read(req, s, force_nonblock, state);
+		ret = io_read(req, s, force_nonblock);
 		break;
 	case IORING_OP_WRITEV:
 		if (unlikely(s->sqe->buf_index))
 			return -EINVAL;
-		ret = io_write(req, s, force_nonblock, state);
+		ret = io_write(req, s, force_nonblock);
 		break;
 	case IORING_OP_READ_FIXED:
-		ret = io_read(req, s, force_nonblock, state);
+		ret = io_read(req, s, force_nonblock);
 		break;
 	case IORING_OP_WRITE_FIXED:
-		ret = io_write(req, s, force_nonblock, state);
+		ret = io_write(req, s, force_nonblock);
 		break;
 	case IORING_OP_FSYNC:
 		ret = io_fsync(req, s->sqe, force_nonblock);
@@ -1457,7 +1456,7 @@ restart:
 			s->has_user = cur_mm != NULL;
 			s->needs_lock = true;
 			do {
-				ret = __io_submit_sqe(ctx, req, s, false, NULL);
+				ret = __io_submit_sqe(ctx, req, s, false);
 				/*
 				 * We can get EAGAIN for polled IO even though
 				 * we're forcing a sync submission from here,
@@ -1623,7 +1622,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
 	if (unlikely(ret))
 		goto out;
 
-	ret = __io_submit_sqe(ctx, req, s, true, state);
+	ret = __io_submit_sqe(ctx, req, s, true);
 	if (ret == -EAGAIN) {
 		struct io_uring_sqe *sqe_copy;
 
-- 
GitLab


From d1874a0c2805fcfa9162c972d6b7541e57adb542 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 23 Apr 2019 10:51:12 +0200
Subject: [PATCH 1443/1861] s390/mm: make the pxd_offset functions more robust

Change the way how pgd_offset, p4d_offset, pud_offset and pmd_offset
walk the page tables. pgd_offset now always calculates the index for
the top-level page table and adds it to the pgd, this is either a
segment table offset for a 2-level setup, a region-3 offset for 3-levels,
region-2 offset for 4-levels, or a region-1 offset for a 5-level setup.
The other three functions p4d_offset, pud_offset and pmd_offset will
only add the respective offset if they dereference the passed pointer.

With the new way of walking the page tables a sequence like this from
mm/gup.c now works:

     pgdp = pgd_offset(current->mm, addr);
     pgd = READ_ONCE(*pgdp);
     p4dp = p4d_offset(&pgd, addr);
     p4d = READ_ONCE(*p4dp);
     pudp = pud_offset(&p4d, addr);
     pud = READ_ONCE(*pudp);
     pmdp = pmd_offset(&pud, addr);
     pmd = READ_ONCE(*pmdp);

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 67 +++++++++++++++++++++------------
 arch/s390/mm/gup.c              | 33 ++++++----------
 2 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 76dc344edb8cf..d48be9c27df92 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1204,42 +1204,67 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
 
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
-
 #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
 #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
 #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
 #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
 
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+/*
+ * The pgd_offset function *always* adds the index for the top-level
+ * region/segment table. This is done to get a sequence like the
+ * following to work:
+ *	pgdp = pgd_offset(current->mm, addr);
+ *	pgd = READ_ONCE(*pgdp);
+ *	p4dp = p4d_offset(&pgd, addr);
+ *	...
+ * The subsequent p4d_offset, pud_offset and pmd_offset functions
+ * only add an index if they dereferenced the pointer.
+ */
+static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address)
 {
-	p4d_t *p4d = (p4d_t *) pgd;
+	unsigned long rste;
+	unsigned int shift;
 
-	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
-		p4d = (p4d_t *) pgd_deref(*pgd);
-	return p4d + p4d_index(address);
+	/* Get the first entry of the top level table */
+	rste = pgd_val(*pgd);
+	/* Pick up the shift from the table type of the first entry */
+	shift = ((rste & _REGION_ENTRY_TYPE_MASK) >> 2) * 11 + 20;
+	return pgd + ((address >> shift) & (PTRS_PER_PGD - 1));
 }
 
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 {
-	pud_t *pud = (pud_t *) p4d;
+	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
+		return (p4d_t *) pgd_deref(*pgd) + p4d_index(address);
+	return (p4d_t *) pgd;
+}
 
-	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
-		pud = (pud_t *) p4d_deref(*p4d);
-	return pud + pud_index(address);
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
+		return (pud_t *) p4d_deref(*p4d) + pud_index(address);
+	return (pud_t *) p4d;
 }
 
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 {
-	pmd_t *pmd = (pmd_t *) pud;
+	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
+		return (pmd_t *) pud_deref(*pud) + pmd_index(address);
+	return (pmd_t *) pud;
+}
 
-	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
-		pmd = (pmd_t *) pud_deref(*pud);
-	return pmd + pmd_index(address);
+static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
+{
+	return (pte_t *) pmd_deref(*pmd) + pte_index(address);
 }
 
+#define pte_offset_kernel(pmd, address) pte_offset(pmd, address)
+#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+#define pte_unmap(pte) do { } while (0)
+
 #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
 #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
@@ -1249,12 +1274,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
 #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
 
-/* Find an entry in the lowest level page table.. */
-#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
-#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
-#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
-#define pte_unmap(pte) do { } while (0)
-
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
 	pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 2809d11c7a283..a5ffcb132191b 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -18,7 +18,7 @@
  * inlines everything into a single function which results in too much
  * register pressure.
  */
-static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+static inline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	struct page *head, *page;
@@ -27,7 +27,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 
 	mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
 
-	ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
+	ptep = pte_offset_map(&pmd, addr);
 	do {
 		pte = *ptep;
 		barrier();
@@ -93,16 +93,13 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 }
 
 
-static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
+static inline int gup_pmd_range(pud_t pud, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pmd_t *pmdp, pmd;
 
-	pmdp = (pmd_t *) pudp;
-	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
-		pmdp = (pmd_t *) pud_deref(pud);
-	pmdp += pmd_index(addr);
+	pmdp = pmd_offset(&pud, addr);
 	do {
 		pmd = *pmdp;
 		barrier();
@@ -120,7 +117,7 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 			if (!gup_huge_pmd(pmdp, pmd, addr, next,
 					  write, pages, nr))
 				return 0;
-		} else if (!gup_pte_range(pmdp, pmd, addr, next,
+		} else if (!gup_pte_range(pmd, addr, next,
 					  write, pages, nr))
 			return 0;
 	} while (pmdp++, addr = next, addr != end);
@@ -166,16 +163,13 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
 	return 1;
 }
 
-static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
+static inline int gup_pud_range(p4d_t p4d, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pud_t *pudp, pud;
 
-	pudp = (pud_t *) p4dp;
-	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
-		pudp = (pud_t *) p4d_deref(p4d);
-	pudp += pud_index(addr);
+	pudp = pud_offset(&p4d, addr);
 	do {
 		pud = *pudp;
 		barrier();
@@ -186,7 +180,7 @@ static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
 			if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
 					  nr))
 				return 0;
-		} else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
+		} else if (!gup_pmd_range(pud, addr, next, write, pages,
 					  nr))
 			return 0;
 	} while (pudp++, addr = next, addr != end);
@@ -194,23 +188,20 @@ static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
 	return 1;
 }
 
-static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+static inline int gup_p4d_range(pgd_t pgd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	unsigned long next;
 	p4d_t *p4dp, p4d;
 
-	p4dp = (p4d_t *) pgdp;
-	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
-		p4dp = (p4d_t *) pgd_deref(pgd);
-	p4dp += p4d_index(addr);
+	p4dp = p4d_offset(&pgd, addr);
 	do {
 		p4d = *p4dp;
 		barrier();
 		next = p4d_addr_end(addr, end);
 		if (p4d_none(p4d))
 			return 0;
-		if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
+		if (!gup_pud_range(p4d, addr, next, write, pages, nr))
 			return 0;
 	} while (p4dp++, addr = next, addr != end);
 
@@ -253,7 +244,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none(pgd))
 			break;
-		if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
+		if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
 			break;
 	} while (pgdp++, addr = next, addr != end);
 	local_irq_restore(flags);
-- 
GitLab


From 1a42010cdc26bb7e5912984f3c91b8c6d55f089a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 23 Apr 2019 10:53:21 +0200
Subject: [PATCH 1444/1861] s390/mm: convert to the generic get_user_pages_fast
 code

Define the gup_fast_permitted to check against the asce_limit of the
mm attached to the current task, then replace the s390 specific gup
code with the generic implementation in mm/gup.c.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig               |   1 +
 arch/s390/include/asm/pgtable.h |  12 ++
 arch/s390/mm/Makefile           |   2 +-
 arch/s390/mm/gup.c              | 291 --------------------------------
 4 files changed, 14 insertions(+), 292 deletions(-)
 delete mode 100644 arch/s390/mm/gup.c

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 566f0b460d219..1c3fcf19c3af5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -149,6 +149,7 @@ config S390
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_GCC_PLUGINS
+	select HAVE_GENERIC_GUP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZ4
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index d48be9c27df92..394bec31cb97d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1265,6 +1265,18 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_unmap(pte) do { } while (0)
 
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
+{
+	unsigned long len, end;
+
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+	if (end < start)
+		return false;
+	return end <= current->mm->context.asce_limit;
+}
+#define gup_fast_permitted gup_fast_permitted
+
 #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
 #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index f5880bfd1b0cb..3175413186b9d 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,7 +4,7 @@
 #
 
 obj-y		:= init.o fault.o extmem.o mmap.o vmem.o maccess.o
-obj-y		+= page-states.o gup.o pageattr.o pgtable.o pgalloc.o
+obj-y		+= page-states.o pageattr.o pgtable.o pgalloc.o
 
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
deleted file mode 100644
index a5ffcb132191b..0000000000000
--- a/arch/s390/mm/gup.c
+++ /dev/null
@@ -1,291 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  Lockless get_user_pages_fast for s390
- *
- *  Copyright IBM Corp. 2010
- *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static inline int gup_pte_range(pmd_t pmd, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	struct page *head, *page;
-	unsigned long mask;
-	pte_t *ptep, pte;
-
-	mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
-
-	ptep = pte_offset_map(&pmd, addr);
-	do {
-		pte = *ptep;
-		barrier();
-		/* Similar to the PMD case, NUMA hinting must take slow path */
-		if (pte_protnone(pte))
-			return 0;
-		if ((pte_val(pte) & mask) != 0)
-			return 0;
-		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-		page = pte_page(pte);
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
-			return 0;
-		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
-			put_page(head);
-			return 0;
-		}
-		VM_BUG_ON_PAGE(compound_head(page) != head, page);
-		pages[*nr] = page;
-		(*nr)++;
-
-	} while (ptep++, addr += PAGE_SIZE, addr != end);
-
-	return 1;
-}
-
-static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	struct page *head, *page;
-	unsigned long mask;
-	int refs;
-
-	mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
-	if ((pmd_val(pmd) & mask) != 0)
-		return 0;
-	VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
-
-	refs = 0;
-	head = pmd_page(pmd);
-	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-	do {
-		VM_BUG_ON(compound_head(page) != head);
-		pages[*nr] = page;
-		(*nr)++;
-		page++;
-		refs++;
-	} while (addr += PAGE_SIZE, addr != end);
-
-	if (!page_cache_add_speculative(head, refs)) {
-		*nr -= refs;
-		return 0;
-	}
-
-	if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
-		*nr -= refs;
-		while (refs--)
-			put_page(head);
-		return 0;
-	}
-
-	return 1;
-}
-
-
-static inline int gup_pmd_range(pud_t pud, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	unsigned long next;
-	pmd_t *pmdp, pmd;
-
-	pmdp = pmd_offset(&pud, addr);
-	do {
-		pmd = *pmdp;
-		barrier();
-		next = pmd_addr_end(addr, end);
-		if (pmd_none(pmd))
-			return 0;
-		if (unlikely(pmd_large(pmd))) {
-			/*
-			 * NUMA hinting faults need to be handled in the GUP
-			 * slowpath for accounting purposes and so that they
-			 * can be serialised against THP migration.
-			 */
-			if (pmd_protnone(pmd))
-				return 0;
-			if (!gup_huge_pmd(pmdp, pmd, addr, next,
-					  write, pages, nr))
-				return 0;
-		} else if (!gup_pte_range(pmd, addr, next,
-					  write, pages, nr))
-			return 0;
-	} while (pmdp++, addr = next, addr != end);
-
-	return 1;
-}
-
-static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	struct page *head, *page;
-	unsigned long mask;
-	int refs;
-
-	mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
-	if ((pud_val(pud) & mask) != 0)
-		return 0;
-	VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
-
-	refs = 0;
-	head = pud_page(pud);
-	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-	do {
-		VM_BUG_ON_PAGE(compound_head(page) != head, page);
-		pages[*nr] = page;
-		(*nr)++;
-		page++;
-		refs++;
-	} while (addr += PAGE_SIZE, addr != end);
-
-	if (!page_cache_add_speculative(head, refs)) {
-		*nr -= refs;
-		return 0;
-	}
-
-	if (unlikely(pud_val(pud) != pud_val(*pudp))) {
-		*nr -= refs;
-		while (refs--)
-			put_page(head);
-		return 0;
-	}
-
-	return 1;
-}
-
-static inline int gup_pud_range(p4d_t p4d, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	unsigned long next;
-	pud_t *pudp, pud;
-
-	pudp = pud_offset(&p4d, addr);
-	do {
-		pud = *pudp;
-		barrier();
-		next = pud_addr_end(addr, end);
-		if (pud_none(pud))
-			return 0;
-		if (unlikely(pud_large(pud))) {
-			if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
-					  nr))
-				return 0;
-		} else if (!gup_pmd_range(pud, addr, next, write, pages,
-					  nr))
-			return 0;
-	} while (pudp++, addr = next, addr != end);
-
-	return 1;
-}
-
-static inline int gup_p4d_range(pgd_t pgd, unsigned long addr,
-		unsigned long end, int write, struct page **pages, int *nr)
-{
-	unsigned long next;
-	p4d_t *p4dp, p4d;
-
-	p4dp = p4d_offset(&pgd, addr);
-	do {
-		p4d = *p4dp;
-		barrier();
-		next = p4d_addr_end(addr, end);
-		if (p4d_none(p4d))
-			return 0;
-		if (!gup_pud_range(p4d, addr, next, write, pages, nr))
-			return 0;
-	} while (p4dp++, addr = next, addr != end);
-
-	return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- * Note a difference with get_user_pages_fast: this always returns the
- * number of pages pinned, 0 if no pages were pinned.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			  struct page **pages)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long addr, len, end;
-	unsigned long next, flags;
-	pgd_t *pgdp, pgd;
-	int nr = 0;
-
-	start &= PAGE_MASK;
-	addr = start;
-	len = (unsigned long) nr_pages << PAGE_SHIFT;
-	end = start + len;
-	if ((end <= start) || (end > mm->context.asce_limit))
-		return 0;
-	/*
-	 * local_irq_save() doesn't prevent pagetable teardown, but does
-	 * prevent the pagetables from being freed on s390.
-	 *
-	 * So long as we atomically load page table pointers versus teardown,
-	 * we can follow the address down to the the page and take a ref on it.
-	 */
-	local_irq_save(flags);
-	pgdp = pgd_offset(mm, addr);
-	do {
-		pgd = *pgdp;
-		barrier();
-		next = pgd_addr_end(addr, end);
-		if (pgd_none(pgd))
-			break;
-		if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
-			break;
-	} while (pgdp++, addr = next, addr != end);
-	local_irq_restore(flags);
-
-	return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start:	starting user address
- * @nr_pages:	number of pages from start to pin
- * @write:	whether pages will be written to
- * @pages:	array that receives pointers to the pages pinned.
- *		Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages)
-{
-	int nr, ret;
-
-	might_sleep();
-	start &= PAGE_MASK;
-	nr = __get_user_pages_fast(start, nr_pages, write, pages);
-	if (nr == nr_pages)
-		return nr;
-
-	/* Try to get the remaining pages with get_user_pages */
-	start += nr << PAGE_SHIFT;
-	pages += nr;
-	ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
-				      write ? FOLL_WRITE : 0);
-	/* Have to be a bit careful with return values */
-	if (nr > 0)
-		ret = (ret < 0) ? nr : ret + nr;
-	return ret;
-}
-- 
GitLab


From 1c410fd6a561af452aba282b1cd3cabef2080d72 Mon Sep 17 00:00:00 2001
From: Thomas-Mich Richter <tmricht@linux.ibm.com>
Date: Tue, 23 Apr 2019 11:36:27 +0200
Subject: [PATCH 1445/1861] s390/cpum_cf_diag: Add support for CPU-MF SVN 6

Add support for the CPU-Measurement Facility counter
second version number 6. This number is used to detect some
more counters in the crypto counter set and the extended
counter set.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_cf_diag.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index b6854812d2ed5..d4e031f7b9c81 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -306,15 +306,20 @@ static size_t cf_diag_ctrset_size(enum cpumf_ctr_set ctrset,
 			ctrset_size = 2;
 		break;
 	case CPUMF_CTR_SET_CRYPTO:
-		ctrset_size = 16;
+		if (info->csvn >= 1 && info->csvn <= 5)
+			ctrset_size = 16;
+		else if (info->csvn == 6)
+			ctrset_size = 20;
 		break;
 	case CPUMF_CTR_SET_EXT:
 		if (info->csvn == 1)
 			ctrset_size = 32;
 		else if (info->csvn == 2)
 			ctrset_size = 48;
-		else if (info->csvn >= 3)
+		else if (info->csvn >= 3 && info->csvn <= 5)
 			ctrset_size = 128;
+		else if (info->csvn == 6)
+			ctrset_size = 160;
 		break;
 	case CPUMF_CTR_SET_MT_DIAG:
 		if (info->csvn > 3)
-- 
GitLab


From 45d4b7b9cb88526f6d5bd4c03efab88d75d10e4f Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Mon, 25 Mar 2019 16:34:22 +0000
Subject: [PATCH 1446/1861] x86/MCE: Add an MCE-record filtering function

Some systems may report spurious MCA errors. In general, spurious MCA
errors may be disabled by clearing a particular bit in MCA_CTL. However,
clearing a bit in MCA_CTL may not be recommended for some errors, so the
only option is to ignore them.

An MCA error is printed and handled after it has been added to the MCE
event pool. So an MCA error can be ignored by not adding it to that pool
in the first place.

Add such a filtering function.

 [ bp: Move function prototype to the internal header and massage. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "clemej@gmail.com" <clemej@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: "rafal@milecki.pl" <rafal@milecki.pl>
Cc: Shirish S <Shirish.S@amd.com>
Cc: <stable@vger.kernel.org> # 5.0.x
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190325163410.171021-1-Yazen.Ghannam@amd.com
---
 arch/x86/kernel/cpu/mce/core.c     | 5 +++++
 arch/x86/kernel/cpu/mce/genpool.c  | 3 +++
 arch/x86/kernel/cpu/mce/internal.h | 3 +++
 3 files changed, 11 insertions(+)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 3e081428117c5..80b8c6bff8eda 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1775,6 +1775,11 @@ static void __mcheck_cpu_init_timer(void)
 	mce_start_timer(t);
 }
 
+bool filter_mce(struct mce *m)
+{
+	return false;
+}
+
 /* Handle unconfigured int18 (should never happen) */
 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c
index 3395549c51d3f..64d1d5a00f397 100644
--- a/arch/x86/kernel/cpu/mce/genpool.c
+++ b/arch/x86/kernel/cpu/mce/genpool.c
@@ -99,6 +99,9 @@ int mce_gen_pool_add(struct mce *mce)
 {
 	struct mce_evt_llist *node;
 
+	if (filter_mce(mce))
+		return -EINVAL;
+
 	if (!mce_evt_pool)
 		return -EINVAL;
 
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index af5eab1e65e27..b822a645395d9 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -173,4 +173,7 @@ struct mca_msr_regs {
 
 extern struct mca_msr_regs msr_ops;
 
+/* Decide whether to add MCE record to MCE event pool or filter it out. */
+extern bool filter_mce(struct mce *m);
+
 #endif /* __X86_MCE_INTERNAL_H__ */
-- 
GitLab


From f5d356328d676deca698d01324000e0d98fba643 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sat, 20 Apr 2019 14:50:50 +0200
Subject: [PATCH 1447/1861] drm/sched: Fix description of drm_sched_stop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 222b5f044159 ("drm/sched: Refactor ring mirror list
handling."), drm_sched_hw_job_reset is no longer there, so let's adjust
the doc comment accordingly.

Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 19fc601c9eeb5..a1bec2779e762 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -366,10 +366,9 @@ void drm_sched_increase_karma(struct drm_sched_job *bad)
 EXPORT_SYMBOL(drm_sched_increase_karma);
 
 /**
- * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job
+ * drm_sched_stop - stop the scheduler
  *
  * @sched: scheduler instance
- * @bad: bad scheduler job
  *
  */
 void drm_sched_stop(struct drm_gpu_scheduler *sched)
-- 
GitLab


From 71a84402b93e5fbd8f817f40059c137e10171788 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Mon, 25 Mar 2019 16:34:22 +0000
Subject: [PATCH 1448/1861] x86/MCE/AMD: Don't report L1 BTB MCA errors on some
 family 17h models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AMD family 17h Models 10h-2Fh may report a high number of L1 BTB MCA
errors under certain conditions. The errors are benign and can safely be
ignored. However, the high error rate may cause the MCA threshold
counter to overflow causing a high rate of thresholding interrupts.

In addition, users may see the errors reported through the AMD MCE
decoder module, even with the interrupt disabled, due to MCA polling.

Clear the "Counter Present" bit in the Instruction Fetch bank's
MCA_MISC0 register. This will prevent enabling MCA thresholding on this
bank which will prevent the high interrupt rate due to this error.

Define an AMD-specific function to filter these errors from the MCE
event pool so that they don't get reported during early boot.

Rename filter function in EDAC/mce_amd to avoid a naming conflict, while
at it.

 [ bp: Move function prototype to the internal header and
   massage/cleanup, fix typos. ]

Reported-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "clemej@gmail.com" <clemej@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: Shirish S <Shirish.S@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Cc: <stable@vger.kernel.org> # 5.0.x: c95b323dcd35: x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models
Cc: <stable@vger.kernel.org> # 5.0.x: 30aa3d26edb0: x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk
Cc: <stable@vger.kernel.org> # 5.0.x: 9308fd407455: x86/MCE: Group AMD function prototypes in <asm/mce.h>
Cc: <stable@vger.kernel.org> # 5.0.x
Link: https://lkml.kernel.org/r/20190325163410.171021-2-Yazen.Ghannam@amd.com
---
 arch/x86/kernel/cpu/mce/amd.c      | 52 ++++++++++++++++++++++--------
 arch/x86/kernel/cpu/mce/core.c     |  3 ++
 arch/x86/kernel/cpu/mce/internal.h |  6 ++++
 drivers/edac/mce_amd.c             |  4 +--
 4 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index e64de5149e50e..d904aafe64094 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -563,33 +563,59 @@ out:
 	return offset;
 }
 
+bool amd_filter_mce(struct mce *m)
+{
+	enum smca_bank_types bank_type = smca_get_bank_type(m->bank);
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u8 xec = (m->status >> 16) & 0x3F;
+
+	/* See Family 17h Models 10h-2Fh Erratum #1114. */
+	if (c->x86 == 0x17 &&
+	    c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
+	    bank_type == SMCA_IF && xec == 10)
+		return true;
+
+	return false;
+}
+
 /*
- * Turn off MC4_MISC thresholding banks on all family 0x15 models since
- * they're not supported there.
+ * Turn off thresholding banks for the following conditions:
+ * - MC4_MISC thresholding is not supported on Family 0x15.
+ * - Prevent possible spurious interrupts from the IF bank on Family 0x17
+ *   Models 0x10-0x2F due to Erratum #1114.
  */
-void disable_err_thresholding(struct cpuinfo_x86 *c)
+void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
 {
-	int i;
+	int i, num_msrs;
 	u64 hwcr;
 	bool need_toggle;
-	u32 msrs[] = {
-		0x00000413, /* MC4_MISC0 */
-		0xc0000408, /* MC4_MISC1 */
-	};
+	u32 msrs[NR_BLOCKS];
+
+	if (c->x86 == 0x15 && bank == 4) {
+		msrs[0] = 0x00000413; /* MC4_MISC0 */
+		msrs[1] = 0xc0000408; /* MC4_MISC1 */
+		num_msrs = 2;
+	} else if (c->x86 == 0x17 &&
+		   (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {
 
-	if (c->x86 != 0x15)
+		if (smca_get_bank_type(bank) != SMCA_IF)
+			return;
+
+		msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
+		num_msrs = 1;
+	} else {
 		return;
+	}
 
 	rdmsrl(MSR_K7_HWCR, hwcr);
 
 	/* McStatusWrEn has to be set */
 	need_toggle = !(hwcr & BIT(18));
-
 	if (need_toggle)
 		wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
 
 	/* Clear CntP bit safely */
-	for (i = 0; i < ARRAY_SIZE(msrs); i++)
+	for (i = 0; i < num_msrs; i++)
 		msr_clear_bit(msrs[i], 62);
 
 	/* restore old settings */
@@ -604,12 +630,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 	unsigned int bank, block, cpu = smp_processor_id();
 	int offset = -1;
 
-	disable_err_thresholding(c);
-
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		if (mce_flags.smca)
 			smca_configure(bank, cpu);
 
+		disable_err_thresholding(c, bank);
+
 		for (block = 0; block < NR_BLOCKS; ++block) {
 			address = get_block_address(address, low, high, bank, block);
 			if (!address)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 80b8c6bff8eda..5112a50e6486f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1777,6 +1777,9 @@ static void __mcheck_cpu_init_timer(void)
 
 bool filter_mce(struct mce *m)
 {
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return amd_filter_mce(m);
+
 	return false;
 }
 
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b822a645395d9..a34b55baa7aa8 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -176,4 +176,10 @@ extern struct mca_msr_regs msr_ops;
 /* Decide whether to add MCE record to MCE event pool or filter it out. */
 extern bool filter_mce(struct mce *m);
 
+#ifdef CONFIG_X86_MCE_AMD
+extern bool amd_filter_mce(struct mce *m);
+#else
+static inline bool amd_filter_mce(struct mce *m)			{ return false; };
+#endif
+
 #endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 0a1814dad6cf2..bb0202ad7a133 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1004,7 +1004,7 @@ static inline void amd_decode_err_code(u16 ec)
 /*
  * Filter out unwanted MCE signatures here.
  */
-static bool amd_filter_mce(struct mce *m)
+static bool ignore_mce(struct mce *m)
 {
 	/*
 	 * NB GART TLB error reporting is disabled by default.
@@ -1038,7 +1038,7 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 	unsigned int fam = x86_family(m->cpuid);
 	int ecc;
 
-	if (amd_filter_mce(m))
+	if (ignore_mce(m))
 		return NOTIFY_STOP;
 
 	pr_emerg(HW_ERR "%s\n", decode_error_status(m));
-- 
GitLab


From 503621628b32782a07b2318e4112bd4372aa3401 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 23 Apr 2019 17:09:38 +0100
Subject: [PATCH 1449/1861] ARM: fix function graph tracer and unwinder
 dependencies

Naresh Kamboju recently reported that the function-graph tracer crashes
on ARM. The function-graph tracer assumes that the kernel is built with
frame pointers.

We explicitly disabled the function-graph tracer when building Thumb2,
since the Thumb2 ABI doesn't have frame pointers.

We recently changed the way the unwinder method was selected, which
seems to have made it more likely that we can end up with the function-
graph tracer enabled but without the kernel built with frame pointers.

Fix up the function graph tracer dependencies so the option is not
available when we have no possibility of having frame pointers, and
adjust the dependencies on the unwinder option to hide the non-frame
pointer unwinder options if the function-graph tracer is enabled.

Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/Kconfig       | 2 +-
 arch/arm/Kconfig.debug | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 054ead960f983..5bd8d781b9ffa 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -73,7 +73,7 @@ config ARM
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
 	select HAVE_EXIT_THREAD
 	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
-	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL
+	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
 	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
 	select HAVE_GCC_PLUGINS
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 6d6e0330930b5..e388af4594a6e 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -47,8 +47,8 @@ config DEBUG_WX
 
 choice
 	prompt "Choose kernel unwinder"
-	default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER
-	default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER
+	default UNWINDER_ARM if AEABI
+	default UNWINDER_FRAME_POINTER if !AEABI
 	help
 	  This determines which method will be used for unwinding kernel stack
 	  traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
@@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER
 
 config UNWINDER_ARM
 	bool "ARM EABI stack unwinder"
-	depends on AEABI
+	depends on AEABI && !FUNCTION_GRAPH_TRACER
 	select ARM_UNWIND
 	help
 	  This option enables stack unwinding support in the kernel
-- 
GitLab


From c3143967807adb1357c36b68a7563fc0c4e1f615 Mon Sep 17 00:00:00 2001
From: Tigran Tadevosyan <tigran.tadevosyan@arm.com>
Date: Fri, 5 Apr 2019 14:16:13 +0100
Subject: [PATCH 1450/1861] ARM: 8856/1: NOMMU: Fix CCR register faulty
 initialization when MPU is disabled

When CONFIG_ARM_MPU is not defined, the base address of v7M SCB register
is not initialized with correct value. This prevents enabling I/D caches
when the L1 cache poilcy is applied in kernel.

Fixes: 3c24121039c9da14692eb48f6e39565b28c0f3cf ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init")
Signed-off-by: Tigran Tadevosyan <tigran.tadevosyan@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/head-nommu.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index c08d2d890f7b9..b38bbd011b358 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -133,9 +133,9 @@ __secondary_data:
  */
 	.text
 __after_proc_init:
-#ifdef CONFIG_ARM_MPU
 M_CLASS(movw	r12, #:lower16:BASEADDR_V7M_SCB)
 M_CLASS(movt	r12, #:upper16:BASEADDR_V7M_SCB)
+#ifdef CONFIG_ARM_MPU
 M_CLASS(ldr	r3, [r12, 0x50])
 AR_CLASS(mrc	p15, 0, r3, c0, c1, 4)          @ Read ID_MMFR0
 	and	r3, r3, #(MMFR0_PMSA)           @ PMSA field
-- 
GitLab


From e17b1af96b2afc38e684aa2f1033387e2ed10029 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 12 Apr 2019 22:34:18 +0100
Subject: [PATCH 1451/1861] ARM: 8857/1: efi: enable CP15 DMB instructions
 before cleaning the cache

The EFI stub is entered with the caches and MMU enabled by the
firmware, and once the stub is ready to hand over to the decompressor,
we clean and disable the caches.

The cache clean routines use CP15 barrier instructions, which can be
disabled via SCTLR. Normally, when using the provided cache handling
routines to enable the caches and MMU, this bit is enabled as well.
However, but since we entered the stub with the caches already enabled,
this routine is not executed before we call the cache clean routines,
resulting in undefined instruction exceptions if the firmware never
enabled this bit.

So set the bit explicitly in the EFI entry code, but do so in a way that
guarantees that the resulting code can still run on v6 cores as well
(which are guaranteed to have CP15 barriers enabled)

Cc: <stable@vger.kernel.org> # v4.9+
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/head.S | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 6c7ccb428c079..7135820f76d4f 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1438,7 +1438,21 @@ ENTRY(efi_stub_entry)
 
 		@ Preserve return value of efi_entry() in r4
 		mov	r4, r0
-		bl	cache_clean_flush
+
+		@ our cache maintenance code relies on CP15 barrier instructions
+		@ but since we arrived here with the MMU and caches configured
+		@ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
+		@ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
+		@ the enable path will be executed on v7+ only.
+		mrc	p15, 0, r1, c1, c0, 0	@ read SCTLR
+		tst	r1, #(1 << 5)		@ CP15BEN bit set?
+		bne	0f
+		orr	r1, r1, #(1 << 5)	@ CP15 barrier instructions
+		mcr	p15, 0, r1, c1, c0, 0	@ write SCTLR
+ ARM(		.inst	0xf57ff06f		@ v7+ isb	)
+ THUMB(		isb						)
+
+0:		bl	cache_clean_flush
 		bl	cache_off
 
 		@ Set parameters for booting zImage according to boot protocol
-- 
GitLab


From 0d747f6585954d7285a3995058e7dfeae7236cf9 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 15 Apr 2019 10:49:34 +0100
Subject: [PATCH 1452/1861] arm64: compat: Alloc separate pages for vectors and
 sigpage

For AArch32 tasks, we install a special "[vectors]" page that contains
the sigreturn trampolines and kuser helpers, which is mapped at a fixed
address specified by the kuser helpers ABI.

Having the sigreturn trampolines in the same page as the kuser helpers
makes it impossible to disable the kuser helpers independently.

Follow the Arm implementation, by moving the signal trampolines out of
the "[vectors]" page and into their own "[sigpage]".

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
[will: tweaked comments and fixed sparse warning]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/elf.h      |   6 +-
 arch/arm64/include/asm/signal32.h |   2 -
 arch/arm64/kernel/signal32.c      |   3 +-
 arch/arm64/kernel/vdso.c          | 116 +++++++++++++++++++++++-------
 4 files changed, 93 insertions(+), 34 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 6adc1a90e7e6b..355d120b78cb6 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -214,10 +214,10 @@ typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
 	set_thread_flag(TIF_32BIT);					\
  })
 #define COMPAT_ARCH_DLINFO
-extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
-				      int uses_interp);
+extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
+					  int uses_interp);
 #define compat_arch_setup_additional_pages \
-					aarch32_setup_vectors_page
+					aarch32_setup_additional_pages
 
 #endif /* CONFIG_COMPAT */
 
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index 81abea0b76508..58e288aaf0bae 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -20,8 +20,6 @@
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
 
-#define AARCH32_KERN_SIGRET_CODE_OFFSET	0x500
-
 int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
 		       struct pt_regs *regs);
 int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index cb7800acd19fb..caea6e25db2ac 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -403,8 +403,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
 		if (ka->sa.sa_flags & SA_SIGINFO)
 			idx += 3;
 
-		retcode = AARCH32_VECTORS_BASE +
-			  AARCH32_KERN_SIGRET_CODE_OFFSET +
+		retcode = (unsigned long)current->mm->context.vdso +
 			  (idx << 2) + thumb;
 	}
 
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 42b7082029e1d..6bb7038dc96c8 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -1,5 +1,5 @@
 /*
- * VDSO implementation for AArch64 and vector page setup for AArch32.
+ * VDSO implementations.
  *
  * Copyright (C) 2012 ARM Limited
  *
@@ -53,61 +53,123 @@ struct vdso_data *vdso_data = &vdso_data_store.data;
 /*
  * Create and map the vectors page for AArch32 tasks.
  */
-static struct page *vectors_page[1] __ro_after_init;
+#define C_VECTORS	0
+#define C_SIGPAGE	1
+#define C_PAGES		(C_SIGPAGE + 1)
+static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
+static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
+	{
+		.name	= "[vectors]", /* ABI */
+		.pages	= &aarch32_vdso_pages[C_VECTORS],
+	},
+	{
+		.name	= "[sigpage]", /* ABI */
+		.pages	= &aarch32_vdso_pages[C_SIGPAGE],
+	},
+};
 
-static int __init alloc_vectors_page(void)
+static int __init aarch32_alloc_vdso_pages(void)
 {
 	extern char __kuser_helper_start[], __kuser_helper_end[];
 	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
 
 	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
 	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
-	unsigned long vpage;
+	unsigned long vdso_pages[C_PAGES];
 
-	vpage = get_zeroed_page(GFP_ATOMIC);
+	vdso_pages[C_VECTORS] = get_zeroed_page(GFP_ATOMIC);
+	if (!vdso_pages[C_VECTORS])
+		return -ENOMEM;
 
-	if (!vpage)
+	vdso_pages[C_SIGPAGE] = get_zeroed_page(GFP_ATOMIC);
+	if (!vdso_pages[C_SIGPAGE]) {
+		free_page(vdso_pages[C_VECTORS]);
 		return -ENOMEM;
+	}
 
 	/* kuser helpers */
-	memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start,
-		kuser_sz);
+	memcpy((void *)(vdso_pages[C_VECTORS] + 0x1000 - kuser_sz),
+	       __kuser_helper_start,
+	       kuser_sz);
 
 	/* sigreturn code */
-	memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET,
-               __aarch32_sigret_code_start, sigret_sz);
+	memcpy((void *)vdso_pages[C_SIGPAGE], __aarch32_sigret_code_start,
+	       sigret_sz);
+
+	flush_icache_range(vdso_pages[C_VECTORS],
+			   vdso_pages[C_VECTORS] + PAGE_SIZE);
+	flush_icache_range(vdso_pages[C_SIGPAGE],
+			   vdso_pages[C_SIGPAGE] + PAGE_SIZE);
 
-	flush_icache_range(vpage, vpage + PAGE_SIZE);
-	vectors_page[0] = virt_to_page(vpage);
+	aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_pages[C_VECTORS]);
+	aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(vdso_pages[C_SIGPAGE]);
 
 	return 0;
 }
-arch_initcall(alloc_vectors_page);
+arch_initcall(aarch32_alloc_vdso_pages);
 
-int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
+static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
 {
-	struct mm_struct *mm = current->mm;
-	unsigned long addr = AARCH32_VECTORS_BASE;
-	static const struct vm_special_mapping spec = {
-		.name	= "[vectors]",
-		.pages	= vectors_page,
+	void *ret;
+
+	/*
+	 * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's
+	 * not safe to CoW the page containing the CPU exception vectors.
+	 */
+	ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
+				       VM_READ | VM_EXEC |
+				       VM_MAYREAD | VM_MAYEXEC,
+				       &aarch32_vdso_spec[C_VECTORS]);
+
+	return PTR_ERR_OR_ZERO(ret);
+}
 
-	};
+static int aarch32_sigreturn_setup(struct mm_struct *mm)
+{
+	unsigned long addr;
 	void *ret;
 
-	if (down_write_killable(&mm->mmap_sem))
-		return -EINTR;
-	current->mm->context.vdso = (void *)addr;
+	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = ERR_PTR(addr);
+		goto out;
+	}
 
-	/* Map vectors page at the high address. */
+	/*
+	 * VM_MAYWRITE is required to allow gdb to Copy-on-Write and
+	 * set breakpoints.
+	 */
 	ret = _install_special_mapping(mm, addr, PAGE_SIZE,
-				       VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC,
-				       &spec);
+				       VM_READ | VM_EXEC | VM_MAYREAD |
+				       VM_MAYWRITE | VM_MAYEXEC,
+				       &aarch32_vdso_spec[C_SIGPAGE]);
+	if (IS_ERR(ret))
+		goto out;
 
-	up_write(&mm->mmap_sem);
+	mm->context.vdso = (void *)addr;
 
+out:
 	return PTR_ERR_OR_ZERO(ret);
 }
+
+int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	int ret;
+
+	if (down_write_killable(&mm->mmap_sem))
+		return -EINTR;
+
+	ret = aarch32_kuser_helpers_setup(mm);
+	if (ret)
+		goto out;
+
+	ret = aarch32_sigreturn_setup(mm);
+
+out:
+	up_write(&mm->mmap_sem);
+	return ret;
+}
 #endif /* CONFIG_COMPAT */
 
 static int vdso_mremap(const struct vm_special_mapping *sm,
-- 
GitLab


From d1e5ca64d5bab864000566ea695617d0e124d27e Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 15 Apr 2019 10:49:35 +0100
Subject: [PATCH 1453/1861] arm64: compat: Split kuser32

To make it possible to disable kuser helpers in aarch32 we need to
divide the kuser and the sigreturn functionalities.

Split the current version of kuser32 in kuser32 (for kuser helpers)
and sigreturn32 (for sigreturn helpers).

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/Makefile      |  2 +-
 arch/arm64/kernel/kuser32.S     | 59 ++-------------------------------
 arch/arm64/kernel/sigreturn32.S | 46 +++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 57 deletions(-)
 create mode 100644 arch/arm64/kernel/sigreturn32.S

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index a30bbecb23d82..c18c34d560290 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
 	$(call if_changed,objcopy)
 
 obj-$(CONFIG_COMPAT)			+= sys32.o kuser32.o signal32.o 	\
-					   sys_compat.o
+					   sigreturn32.o sys_compat.o
 obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o entry-ftrace.o
 obj-$(CONFIG_MODULES)			+= module.o
 obj-$(CONFIG_ARM64_MODULE_PLTS)		+= module-plts.o
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 997e6b27ff6a4..c5f2bbafd7230 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Low-level user helpers placed in the vectors page for AArch32.
+ * AArch32 user helpers.
  * Based on the kuser helpers in arch/arm/kernel/entry-armv.S.
  *
  * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *
- * AArch32 user helpers.
+ * Copyright (C) 2012-2018 ARM Ltd.
  *
  * Each segment is 32-byte aligned and will be moved to the top of the high
  * vector page.  New segments (if ever needed) must be added in front of
@@ -77,42 +63,3 @@ __kuser_helper_version:			// 0xffff0ffc
 	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
 	.globl	__kuser_helper_end
 __kuser_helper_end:
-
-/*
- * AArch32 sigreturn code
- *
- * For ARM syscalls, the syscall number has to be loaded into r7.
- * We do not support an OABI userspace.
- *
- * For Thumb syscalls, we also pass the syscall number via r7. We therefore
- * need two 16-bit instructions.
- */
-	.globl __aarch32_sigret_code_start
-__aarch32_sigret_code_start:
-
-	/*
-	 * ARM Code
-	 */
-	.byte	__NR_compat_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_sigreturn
-	.byte	__NR_compat_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_sigreturn
-
-	/*
-	 * Thumb code
-	 */
-	.byte	__NR_compat_sigreturn, 0x27			// svc	#__NR_compat_sigreturn
-	.byte	__NR_compat_sigreturn, 0xdf			// mov	r7, #__NR_compat_sigreturn
-
-	/*
-	 * ARM code
-	 */
-	.byte	__NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_rt_sigreturn
-	.byte	__NR_compat_rt_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_rt_sigreturn
-
-	/*
-	 * Thumb code
-	 */
-	.byte	__NR_compat_rt_sigreturn, 0x27			// svc	#__NR_compat_rt_sigreturn
-	.byte	__NR_compat_rt_sigreturn, 0xdf			// mov	r7, #__NR_compat_rt_sigreturn
-
-        .globl __aarch32_sigret_code_end
-__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
new file mode 100644
index 0000000000000..475d30d471ac1
--- /dev/null
+++ b/arch/arm64/kernel/sigreturn32.S
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch32 sigreturn code.
+ * Based on the kuser helpers in arch/arm/kernel/entry-armv.S.
+ *
+ * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
+ * Copyright (C) 2012-2018 ARM Ltd.
+ *
+ * For ARM syscalls, the syscall number has to be loaded into r7.
+ * We do not support an OABI userspace.
+ *
+ * For Thumb syscalls, we also pass the syscall number via r7. We therefore
+ * need two 16-bit instructions.
+ */
+
+#include <asm/unistd.h>
+
+	.globl __aarch32_sigret_code_start
+__aarch32_sigret_code_start:
+
+	/*
+	 * ARM Code
+	 */
+	.byte	__NR_compat_sigreturn, 0x70, 0xa0, 0xe3		// mov	r7, #__NR_compat_sigreturn
+	.byte	__NR_compat_sigreturn, 0x00, 0x00, 0xef		// svc	#__NR_compat_sigreturn
+
+	/*
+	 * Thumb code
+	 */
+	.byte	__NR_compat_sigreturn, 0x27			// svc	#__NR_compat_sigreturn
+	.byte	__NR_compat_sigreturn, 0xdf			// mov	r7, #__NR_compat_sigreturn
+
+	/*
+	 * ARM code
+	 */
+	.byte	__NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_rt_sigreturn
+	.byte	__NR_compat_rt_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_rt_sigreturn
+
+	/*
+	 * Thumb code
+	 */
+	.byte	__NR_compat_rt_sigreturn, 0x27			// svc	#__NR_compat_rt_sigreturn
+	.byte	__NR_compat_rt_sigreturn, 0xdf			// mov	r7, #__NR_compat_rt_sigreturn
+
+        .globl __aarch32_sigret_code_end
+__aarch32_sigret_code_end:
-- 
GitLab


From 1255a7341bee6cd96df056ed0c13e44b59023687 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 15 Apr 2019 10:49:36 +0100
Subject: [PATCH 1454/1861] arm64: compat: Refactor aarch32_alloc_vdso_pages()

aarch32_alloc_vdso_pages() needs to be refactored to make it
easier to disable kuser helpers.

Divide the function in aarch32_alloc_kuser_vdso_page() and
aarch32_alloc_sigreturn_vdso_page().

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
[will: Inlined sigpage allocation to simplify error paths]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/vdso.c | 52 ++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 6bb7038dc96c8..41f4d75bbc143 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -68,43 +68,43 @@ static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
 	},
 };
 
-static int __init aarch32_alloc_vdso_pages(void)
+static int aarch32_alloc_kuser_vdso_page(void)
 {
 	extern char __kuser_helper_start[], __kuser_helper_end[];
-	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
-
 	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
-	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
-	unsigned long vdso_pages[C_PAGES];
-
-	vdso_pages[C_VECTORS] = get_zeroed_page(GFP_ATOMIC);
-	if (!vdso_pages[C_VECTORS])
-		return -ENOMEM;
+	unsigned long vdso_page;
 
-	vdso_pages[C_SIGPAGE] = get_zeroed_page(GFP_ATOMIC);
-	if (!vdso_pages[C_SIGPAGE]) {
-		free_page(vdso_pages[C_VECTORS]);
+	vdso_page = get_zeroed_page(GFP_ATOMIC);
+	if (!vdso_page)
 		return -ENOMEM;
-	}
 
-	/* kuser helpers */
-	memcpy((void *)(vdso_pages[C_VECTORS] + 0x1000 - kuser_sz),
-	       __kuser_helper_start,
+	memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
 	       kuser_sz);
+	aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page);
+	flush_dcache_page(aarch32_vdso_pages[C_VECTORS]);
+	return 0;
+}
 
-	/* sigreturn code */
-	memcpy((void *)vdso_pages[C_SIGPAGE], __aarch32_sigret_code_start,
-	       sigret_sz);
+static int __init aarch32_alloc_vdso_pages(void)
+{
+	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
+	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
+	unsigned long sigpage;
+	int ret;
 
-	flush_icache_range(vdso_pages[C_VECTORS],
-			   vdso_pages[C_VECTORS] + PAGE_SIZE);
-	flush_icache_range(vdso_pages[C_SIGPAGE],
-			   vdso_pages[C_SIGPAGE] + PAGE_SIZE);
+	sigpage = get_zeroed_page(GFP_ATOMIC);
+	if (!sigpage)
+		return -ENOMEM;
 
-	aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_pages[C_VECTORS]);
-	aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(vdso_pages[C_SIGPAGE]);
+	memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz);
+	aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage);
+	flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]);
 
-	return 0;
+	ret = aarch32_alloc_kuser_vdso_page();
+	if (ret)
+		free_page(sigpage);
+
+	return ret;
 }
 arch_initcall(aarch32_alloc_vdso_pages);
 
-- 
GitLab


From af1b3cf2c2a3f42bbb680812ff1bbd715ac8af69 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 15 Apr 2019 10:49:37 +0100
Subject: [PATCH 1455/1861] arm64: compat: Add KUSER_HELPERS config option

When kuser helpers are enabled the kernel maps the relative code at
a fixed address (0xffff0000). Making configurable the option to disable
them means that the kernel can remove this mapping and any access to
this memory area results in a sigfault.

Add a KUSER_HELPERS config option that can be used to disable the
mapping when it is turned off.

This option can be turned off if and only if the applications are
designed specifically for the platform and they do not make use of the
kuser helpers code.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
[will: Use IS_ENABLED() instead of #ifdef]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig          | 28 ++++++++++++++++++++++++++++
 arch/arm64/kernel/Makefile  |  3 ++-
 arch/arm64/kernel/kuser32.S |  7 +++----
 arch/arm64/kernel/vdso.c    |  6 ++++++
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c383625ec02c9..0861a6f5ee4ab 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1498,6 +1498,34 @@ config COMPAT
 
 	  If you want to execute 32-bit userspace applications, say Y.
 
+config KUSER_HELPERS
+	bool "Enable kuser helpers page for 32 bit applications."
+	depends on COMPAT
+	default y
+	help
+	  Warning: disabling this option may break 32-bit user programs.
+
+	  Provide kuser helpers to compat tasks. The kernel provides
+	  helper code to userspace in read only form at a fixed location
+	  to allow userspace to be independent of the CPU type fitted to
+	  the system. This permits binaries to be run on ARMv4 through
+	  to ARMv8 without modification.
+
+	  See Documentation/arm/kernel_user_helpers.txt for details.
+
+	  However, the fixed address nature of these helpers can be used
+	  by ROP (return orientated programming) authors when creating
+	  exploits.
+
+	  If all of the binaries and libraries which run on your platform
+	  are built specifically for your platform, and make no use of
+	  these helpers, then you can turn this option off to hinder
+	  such exploits. However, in that case, if a binary or library
+	  relying on those helpers is run, it will not function correctly.
+
+	  Say N here only if you are absolutely certain that you do not
+	  need these helpers; otherwise, the safe option is to say Y.
+
 config SYSVIPC_COMPAT
 	def_bool y
 	depends on COMPAT && SYSVIPC
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index c18c34d560290..9e7dcb2c31c7a 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -27,8 +27,9 @@ OBJCOPYFLAGS := --prefix-symbols=__efistub_
 $(obj)/%.stub.o: $(obj)/%.o FORCE
 	$(call if_changed,objcopy)
 
-obj-$(CONFIG_COMPAT)			+= sys32.o kuser32.o signal32.o 	\
+obj-$(CONFIG_COMPAT)			+= sys32.o signal32.o			\
 					   sigreturn32.o sys_compat.o
+obj-$(CONFIG_KUSER_HELPERS)		+= kuser32.o
 obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o entry-ftrace.o
 obj-$(CONFIG_MODULES)			+= module.o
 obj-$(CONFIG_ARM64_MODULE_PLTS)		+= module-plts.o
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index c5f2bbafd7230..49825e9e421e0 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -6,10 +6,9 @@
  * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
  * Copyright (C) 2012-2018 ARM Ltd.
  *
- * Each segment is 32-byte aligned and will be moved to the top of the high
- * vector page.  New segments (if ever needed) must be added in front of
- * existing ones.  This mechanism should be used only for things that are
- * really small and justified, and not be abused freely.
+ * The kuser helpers below are mapped at a fixed address by
+ * aarch32_setup_additional_pages() and are provided for compatibility
+ * reasons with 32 bit (aarch32) applications that need them.
  *
  * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
  */
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 41f4d75bbc143..8074cbd3a3a85 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -74,6 +74,9 @@ static int aarch32_alloc_kuser_vdso_page(void)
 	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
 	unsigned long vdso_page;
 
+	if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
+		return 0;
+
 	vdso_page = get_zeroed_page(GFP_ATOMIC);
 	if (!vdso_page)
 		return -ENOMEM;
@@ -112,6 +115,9 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
 {
 	void *ret;
 
+	if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
+		return 0;
+
 	/*
 	 * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's
 	 * not safe to CoW the page containing the CPU exception vectors.
-- 
GitLab


From dd523791c939cfe642557cb2fac754ac0fed80fe Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 23 Apr 2019 14:37:24 +0100
Subject: [PATCH 1456/1861] arm64: Kconfig: Make CONFIG_COMPAT a menuconfig
 entry

Make CONFIG_COMPAT a menuconfig entry so that we can place
CONFIG_KUSER_HELPERS and CONFIG_ARMV8_DEPRECATED underneath it.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig | 113 +++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 55 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0861a6f5ee4ab..75c1a07abc728 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1082,9 +1082,65 @@ config RODATA_FULL_DEFAULT_ENABLED
 	  This requires the linear region to be mapped down to pages,
 	  which may adversely affect performance in some cases.
 
+config ARM64_SW_TTBR0_PAN
+	bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
+	help
+	  Enabling this option prevents the kernel from accessing
+	  user-space memory directly by pointing TTBR0_EL1 to a reserved
+	  zeroed area and reserved ASID. The user access routines
+	  restore the valid TTBR0_EL1 temporarily.
+
+menuconfig COMPAT
+	bool "Kernel support for 32-bit EL0"
+	depends on ARM64_4K_PAGES || EXPERT
+	select COMPAT_BINFMT_ELF if BINFMT_ELF
+	select HAVE_UID16
+	select OLD_SIGSUSPEND3
+	select COMPAT_OLD_SIGACTION
+	help
+	  This option enables support for a 32-bit EL0 running under a 64-bit
+	  kernel at EL1. AArch32-specific components such as system calls,
+	  the user helper functions, VFP support and the ptrace interface are
+	  handled appropriately by the kernel.
+
+	  If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
+	  that you will only be able to execute AArch32 binaries that were compiled
+	  with page size aligned segments.
+
+	  If you want to execute 32-bit userspace applications, say Y.
+
+if COMPAT
+
+config KUSER_HELPERS
+	bool "Enable kuser helpers page for 32 bit applications"
+	default y
+	help
+	  Warning: disabling this option may break 32-bit user programs.
+
+	  Provide kuser helpers to compat tasks. The kernel provides
+	  helper code to userspace in read only form at a fixed location
+	  to allow userspace to be independent of the CPU type fitted to
+	  the system. This permits binaries to be run on ARMv4 through
+	  to ARMv8 without modification.
+
+	  See Documentation/arm/kernel_user_helpers.txt for details.
+
+	  However, the fixed address nature of these helpers can be used
+	  by ROP (return orientated programming) authors when creating
+	  exploits.
+
+	  If all of the binaries and libraries which run on your platform
+	  are built specifically for your platform, and make no use of
+	  these helpers, then you can turn this option off to hinder
+	  such exploits. However, in that case, if a binary or library
+	  relying on those helpers is run, it will not function correctly.
+
+	  Say N here only if you are absolutely certain that you do not
+	  need these helpers; otherwise, the safe option is to say Y.
+
+
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
-	depends on COMPAT
 	depends on SYSCTL
 	help
 	  Legacy software support may require certain instructions
@@ -1150,13 +1206,7 @@ config SETEND_EMULATION
 	  If unsure, say Y
 endif
 
-config ARM64_SW_TTBR0_PAN
-	bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
-	help
-	  Enabling this option prevents the kernel from accessing
-	  user-space memory directly by pointing TTBR0_EL1 to a reserved
-	  zeroed area and reserved ASID. The user access routines
-	  restore the valid TTBR0_EL1 temporarily.
+endif
 
 menu "ARMv8.1 architectural features"
 
@@ -1479,53 +1529,6 @@ config DMI
 
 endmenu
 
-config COMPAT
-	bool "Kernel support for 32-bit EL0"
-	depends on ARM64_4K_PAGES || EXPERT
-	select COMPAT_BINFMT_ELF if BINFMT_ELF
-	select HAVE_UID16
-	select OLD_SIGSUSPEND3
-	select COMPAT_OLD_SIGACTION
-	help
-	  This option enables support for a 32-bit EL0 running under a 64-bit
-	  kernel at EL1. AArch32-specific components such as system calls,
-	  the user helper functions, VFP support and the ptrace interface are
-	  handled appropriately by the kernel.
-
-	  If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
-	  that you will only be able to execute AArch32 binaries that were compiled
-	  with page size aligned segments.
-
-	  If you want to execute 32-bit userspace applications, say Y.
-
-config KUSER_HELPERS
-	bool "Enable kuser helpers page for 32 bit applications."
-	depends on COMPAT
-	default y
-	help
-	  Warning: disabling this option may break 32-bit user programs.
-
-	  Provide kuser helpers to compat tasks. The kernel provides
-	  helper code to userspace in read only form at a fixed location
-	  to allow userspace to be independent of the CPU type fitted to
-	  the system. This permits binaries to be run on ARMv4 through
-	  to ARMv8 without modification.
-
-	  See Documentation/arm/kernel_user_helpers.txt for details.
-
-	  However, the fixed address nature of these helpers can be used
-	  by ROP (return orientated programming) authors when creating
-	  exploits.
-
-	  If all of the binaries and libraries which run on your platform
-	  are built specifically for your platform, and make no use of
-	  these helpers, then you can turn this option off to hinder
-	  such exploits. However, in that case, if a binary or library
-	  relying on those helpers is run, it will not function correctly.
-
-	  Say N here only if you are absolutely certain that you do not
-	  need these helpers; otherwise, the safe option is to say Y.
-
 config SYSVIPC_COMPAT
 	def_bool y
 	depends on COMPAT && SYSVIPC
-- 
GitLab


From 06a916feca2b262ab0c1a2aeb68882f4b1108a07 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 18 Apr 2019 18:41:38 +0100
Subject: [PATCH 1457/1861] arm64: Expose SVE2 features for userspace

This patch provides support for reporting the presence of SVE2 and
its optional features to userspace.

This will also enable visibility of SVE2 for guests, when KVM
support for SVE-enabled guests is available.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/arm64/cpu-feature-registers.txt | 16 +++++++++++++
 Documentation/arm64/elf_hwcaps.txt            | 24 +++++++++++++++++++
 Documentation/arm64/sve.txt                   | 17 +++++++++++++
 arch/arm64/Kconfig                            |  3 +++
 arch/arm64/include/asm/hwcap.h                |  6 +++++
 arch/arm64/include/asm/sysreg.h               | 14 +++++++++++
 arch/arm64/include/uapi/asm/hwcap.h           |  6 +++++
 arch/arm64/kernel/cpufeature.c                | 17 ++++++++++++-
 arch/arm64/kernel/cpuinfo.c                   |  6 +++++
 9 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
index d4b4dd1fe786a..684a0da393786 100644
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -209,6 +209,22 @@ infrastructure:
      | AT                           | [35-32] |    y    |
      x--------------------------------------------------x
 
+  6) ID_AA64ZFR0_EL1 - SVE feature ID register 0
+
+     x--------------------------------------------------x
+     | Name                         |  bits   | visible |
+     |--------------------------------------------------|
+     | SM4                          | [43-40] |    y    |
+     |--------------------------------------------------|
+     | SHA3                         | [35-32] |    y    |
+     |--------------------------------------------------|
+     | BitPerm                      | [19-16] |    y    |
+     |--------------------------------------------------|
+     | AES                          | [7-4]   |    y    |
+     |--------------------------------------------------|
+     | SVEVer                       | [3-0]   |    y    |
+     x--------------------------------------------------x
+
 Appendix I: Example
 ---------------------------
 
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index 55431fd2a67a6..b73a2519ecf23 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -163,6 +163,30 @@ HWCAP_SVE
 
     Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
 
+HWCAP2_SVE2
+
+    Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
+
+HWCAP2_SVEAES
+
+    Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
+
+HWCAP2_SVEPMULL
+
+    Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
+
+HWCAP2_SVEBITPERM
+
+    Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
+
+HWCAP2_SVESHA3
+
+    Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
+
+HWCAP2_SVESM4
+
+    Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
+
 HWCAP_ASIMDFHM
 
    Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
index 7169a0ec41d86..9940e924a47ed 100644
--- a/Documentation/arm64/sve.txt
+++ b/Documentation/arm64/sve.txt
@@ -34,6 +34,23 @@ model features for SVE is included in Appendix A.
   following sections: software that needs to verify that those interfaces are
   present must check for HWCAP_SVE instead.
 
+* On hardware that supports the SVE2 extensions, HWCAP2_SVE2 will also
+  be reported in the AT_HWCAP2 aux vector entry.  In addition to this,
+  optional extensions to SVE2 may be reported by the presence of:
+
+	HWCAP2_SVE2
+	HWCAP2_SVEAES
+	HWCAP2_SVEPMULL
+	HWCAP2_SVEBITPERM
+	HWCAP2_SVESHA3
+	HWCAP2_SVESM4
+
+  This list may be extended over time as the SVE architecture evolves.
+
+  These extensions are also reported via the CPU ID register ID_AA64ZFR0_EL1,
+  which userspace can read using an MRS instruction.  See elf_hwcaps.txt and
+  cpu-feature-registers.txt for details.
+
 * Debuggers should restrict themselves to interacting with the target via the
   NT_ARM_SVE regset.  The recommended way of detecting support for this regset
   is to connect to a target process first and then attempt a
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 75c1a07abc728..4791d48571244 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1372,6 +1372,9 @@ config ARM64_SVE
 
 	  To enable use of this extension on CPUs that implement it, say Y.
 
+	  On CPUs that support the SVE2 extensions, this option will enable
+	  those too.
+
 	  Note that for architectural reasons, firmware _must_ implement SVE
 	  support when running on SVE capable hardware.  The required support
 	  is present in:
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index f78c86c64e682..b4bfb6672168b 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -89,6 +89,12 @@
 
 #define __khwcap2_feature(x)		(const_ilog2(HWCAP2_ ## x) + 32)
 #define KERNEL_HWCAP_DCPODP		__khwcap2_feature(DCPODP)
+#define KERNEL_HWCAP_SVE2		__khwcap2_feature(SVE2)
+#define KERNEL_HWCAP_SVEAES		__khwcap2_feature(SVEAES)
+#define KERNEL_HWCAP_SVEPMULL		__khwcap2_feature(SVEPMULL)
+#define KERNEL_HWCAP_SVEBITPERM		__khwcap2_feature(SVEBITPERM)
+#define KERNEL_HWCAP_SVESHA3		__khwcap2_feature(SVESHA3)
+#define KERNEL_HWCAP_SVESM4		__khwcap2_feature(SVESM4)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 5b267dec6194e..29c6559693204 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -606,6 +606,20 @@
 #define ID_AA64PFR1_SSBS_PSTATE_ONLY	1
 #define ID_AA64PFR1_SSBS_PSTATE_INSNS	2
 
+/* id_aa64zfr0 */
+#define ID_AA64ZFR0_SM4_SHIFT		40
+#define ID_AA64ZFR0_SHA3_SHIFT		32
+#define ID_AA64ZFR0_BITPERM_SHIFT	16
+#define ID_AA64ZFR0_AES_SHIFT		4
+#define ID_AA64ZFR0_SVEVER_SHIFT	0
+
+#define ID_AA64ZFR0_SM4			0x1
+#define ID_AA64ZFR0_SHA3		0x1
+#define ID_AA64ZFR0_BITPERM		0x1
+#define ID_AA64ZFR0_AES			0x1
+#define ID_AA64ZFR0_AES_PMULL		0x2
+#define ID_AA64ZFR0_SVEVER_SVE2		0x1
+
 /* id_aa64mmfr0 */
 #define ID_AA64MMFR0_TGRAN4_SHIFT	28
 #define ID_AA64MMFR0_TGRAN64_SHIFT	24
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index d64af3913a9e0..1a772b162191a 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -57,5 +57,11 @@
  * HWCAP2 flags - for AT_HWCAP2
  */
 #define HWCAP2_DCPODP		(1 << 0)
+#define HWCAP2_SVE2		(1 << 1)
+#define HWCAP2_SVEAES		(1 << 2)
+#define HWCAP2_SVEPMULL		(1 << 3)
+#define HWCAP2_SVEBITPERM	(1 << 4)
+#define HWCAP2_SVESHA3		(1 << 5)
+#define HWCAP2_SVESM4		(1 << 6)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9d18e45311fd4..d856c55445a24 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -184,6 +184,15 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
 	ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
 	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
@@ -392,7 +401,7 @@ static const struct __ftr_reg_entry {
 	/* Op1 = 0, CRn = 0, CRm = 4 */
 	ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
 	ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1),
-	ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz),
+	ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
 
 	/* Op1 = 0, CRn = 0, CRm = 5 */
 	ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -1610,6 +1619,12 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
 #endif
 	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
 #ifdef CONFIG_ARM64_PTR_AUTH
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 093ca53ce1d1c..f6f7936be6e7f 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -86,6 +86,12 @@ static const char *const hwcap_str[] = {
 	"paca",
 	"pacg",
 	"dcpodp",
+	"sve2",
+	"sveaes",
+	"svepmull",
+	"svebitperm",
+	"svesha3",
+	"svesm4",
 	NULL
 };
 
-- 
GitLab


From 2f23a2a768bee7ad2ff1e9527c3f7e279e794a46 Mon Sep 17 00:00:00 2001
From: Daniel Gomez <dagmcr@gmail.com>
Date: Mon, 22 Apr 2019 21:08:03 +0200
Subject: [PATCH 1458/1861] spi: Micrel eth switch: declare missing of table

Add missing <of_device_id> table for SPI driver relying on SPI
device match since compatible is in a DT binding or in a DTS.

Before this patch:
modinfo drivers/net/phy/spi_ks8995.ko | grep alias
alias:          spi:ksz8795
alias:          spi:ksz8864
alias:          spi:ks8995

After this patch:
modinfo drivers/net/phy/spi_ks8995.ko | grep alias
alias:          spi:ksz8795
alias:          spi:ksz8864
alias:          spi:ks8995
alias:          of:N*T*Cmicrel,ksz8795C*
alias:          of:N*T*Cmicrel,ksz8795
alias:          of:N*T*Cmicrel,ksz8864C*
alias:          of:N*T*Cmicrel,ksz8864
alias:          of:N*T*Cmicrel,ks8995C*
alias:          of:N*T*Cmicrel,ks8995

Reported-by: Javier Martinez Canillas <javier@dowhile0.org>
Signed-off-by: Daniel Gomez <dagmcr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/spi_ks8995.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 92b64e254b44e..7475cef17cf76 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -159,6 +159,14 @@ static const struct spi_device_id ks8995_id[] = {
 };
 MODULE_DEVICE_TABLE(spi, ks8995_id);
 
+static const struct of_device_id ks8895_spi_of_match[] = {
+        { .compatible = "micrel,ks8995" },
+        { .compatible = "micrel,ksz8864" },
+        { .compatible = "micrel,ksz8795" },
+        { },
+ };
+MODULE_DEVICE_TABLE(of, ks8895_spi_of_match);
+
 static inline u8 get_chip_id(u8 val)
 {
 	return (val >> ID1_CHIPID_S) & ID1_CHIPID_M;
@@ -526,6 +534,7 @@ static int ks8995_remove(struct spi_device *spi)
 static struct spi_driver ks8995_driver = {
 	.driver = {
 		.name	    = "spi-ks8995",
+		.of_match_table = of_match_ptr(ks8895_spi_of_match),
 	},
 	.probe	  = ks8995_probe,
 	.remove	  = ks8995_remove,
-- 
GitLab


From d04830531d0c4a99c897a44038e5da3d23331d2f Mon Sep 17 00:00:00 2001
From: Daniel Gomez <dagmcr@gmail.com>
Date: Mon, 22 Apr 2019 21:08:04 +0200
Subject: [PATCH 1459/1861] spi: ST ST95HF NFC: declare missing of table

Add missing <of_device_id> table for SPI driver relying on SPI
device match since compatible is in a DT binding or in a DTS.

Before this patch:
modinfo drivers/nfc/st95hf/st95hf.ko | grep alias
alias:          spi:st95hf

After this patch:
modinfo drivers/nfc/st95hf/st95hf.ko | grep alias
alias:          spi:st95hf
alias:          of:N*T*Cst,st95hfC*
alias:          of:N*T*Cst,st95hf

Reported-by: Javier Martinez Canillas <javier@dowhile0.org>
Signed-off-by: Daniel Gomez <dagmcr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/st95hf/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index 2b26f762fbc3b..01acb6e533655 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1074,6 +1074,12 @@ static const struct spi_device_id st95hf_id[] = {
 };
 MODULE_DEVICE_TABLE(spi, st95hf_id);
 
+static const struct of_device_id st95hf_spi_of_match[] = {
+        { .compatible = "st,st95hf" },
+        { },
+};
+MODULE_DEVICE_TABLE(of, st95hf_spi_of_match);
+
 static int st95hf_probe(struct spi_device *nfc_spi_dev)
 {
 	int ret;
@@ -1260,6 +1266,7 @@ static struct spi_driver st95hf_driver = {
 	.driver = {
 		.name = "st95hf",
 		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(st95hf_spi_of_match),
 	},
 	.id_table = st95hf_id,
 	.probe = st95hf_probe,
-- 
GitLab


From 1bcb344086f3ecf8d6705f6d708441baa823beb3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 15 Apr 2019 12:00:42 -0400
Subject: [PATCH 1460/1861] ceph: only use d_name directly when parent is
 locked

Ben reported tripping the BUG_ON in create_request_message during some
performance testing. Analysis of the vmcore showed that the length of
the r_dentry->d_name string changed after we allocated the buffer, but
before we encoded it.

build_dentry_path returns pointers to d_name in the common case of
non-snapped dentries, but this optimization isn't safe unless the parent
directory is locked. When it isn't, have the code make a copy of the
d_name while holding the d_lock.

Cc: stable@vger.kernel.org
Reported-by: Ben England <bengland@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 61 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 21c33ed048ed7..bc5d70d6bfe65 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2161,10 +2161,39 @@ retry:
 	return path;
 }
 
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+			     int *ppathlen)
+{
+	u32 len;
+	char *name;
+
+retry:
+	len = READ_ONCE(dentry->d_name.len);
+	name = kmalloc(len + 1, GFP_NOFS);
+	if (!name)
+		return -ENOMEM;
+
+	spin_lock(&dentry->d_lock);
+	if (dentry->d_name.len != len) {
+		spin_unlock(&dentry->d_lock);
+		kfree(name);
+		goto retry;
+	}
+	memcpy(name, dentry->d_name.name, len);
+	spin_unlock(&dentry->d_lock);
+
+	name[len] = '\0';
+	*ppath = name;
+	*ppathlen = len;
+	return 0;
+}
+
 static int build_dentry_path(struct dentry *dentry, struct inode *dir,
 			     const char **ppath, int *ppathlen, u64 *pino,
-			     int *pfreepath)
+			     bool *pfreepath, bool parent_locked)
 {
+	int ret;
 	char *path;
 
 	rcu_read_lock();
@@ -2173,8 +2202,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
 	if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
 		*pino = ceph_ino(dir);
 		rcu_read_unlock();
-		*ppath = dentry->d_name.name;
-		*ppathlen = dentry->d_name.len;
+		if (parent_locked) {
+			*ppath = dentry->d_name.name;
+			*ppathlen = dentry->d_name.len;
+		} else {
+			ret = clone_dentry_name(dentry, ppath, ppathlen);
+			if (ret)
+				return ret;
+			*pfreepath = true;
+		}
 		return 0;
 	}
 	rcu_read_unlock();
@@ -2182,13 +2218,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
 	if (IS_ERR(path))
 		return PTR_ERR(path);
 	*ppath = path;
-	*pfreepath = 1;
+	*pfreepath = true;
 	return 0;
 }
 
 static int build_inode_path(struct inode *inode,
 			    const char **ppath, int *ppathlen, u64 *pino,
-			    int *pfreepath)
+			    bool *pfreepath)
 {
 	struct dentry *dentry;
 	char *path;
@@ -2204,7 +2240,7 @@ static int build_inode_path(struct inode *inode,
 	if (IS_ERR(path))
 		return PTR_ERR(path);
 	*ppath = path;
-	*pfreepath = 1;
+	*pfreepath = true;
 	return 0;
 }
 
@@ -2215,7 +2251,7 @@ static int build_inode_path(struct inode *inode,
 static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
 				  struct inode *rdiri, const char *rpath,
 				  u64 rino, const char **ppath, int *pathlen,
-				  u64 *ino, int *freepath)
+				  u64 *ino, bool *freepath, bool parent_locked)
 {
 	int r = 0;
 
@@ -2225,7 +2261,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
 		     ceph_snap(rinode));
 	} else if (rdentry) {
 		r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
-					freepath);
+					freepath, parent_locked);
 		dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
 		     *ppath);
 	} else if (rpath || rino) {
@@ -2251,7 +2287,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 	const char *path2 = NULL;
 	u64 ino1 = 0, ino2 = 0;
 	int pathlen1 = 0, pathlen2 = 0;
-	int freepath1 = 0, freepath2 = 0;
+	bool freepath1 = false, freepath2 = false;
 	int len;
 	u16 releases;
 	void *p, *end;
@@ -2259,16 +2295,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 
 	ret = set_request_path_attr(req->r_inode, req->r_dentry,
 			      req->r_parent, req->r_path1, req->r_ino1.ino,
-			      &path1, &pathlen1, &ino1, &freepath1);
+			      &path1, &pathlen1, &ino1, &freepath1,
+			      test_bit(CEPH_MDS_R_PARENT_LOCKED,
+					&req->r_req_flags));
 	if (ret < 0) {
 		msg = ERR_PTR(ret);
 		goto out;
 	}
 
+	/* If r_old_dentry is set, then assume that its parent is locked */
 	ret = set_request_path_attr(NULL, req->r_old_dentry,
 			      req->r_old_dentry_dir,
 			      req->r_path2, req->r_ino2.ino,
-			      &path2, &pathlen2, &ino2, &freepath2);
+			      &path2, &pathlen2, &ino2, &freepath2, true);
 	if (ret < 0) {
 		msg = ERR_PTR(ret);
 		goto out_free1;
-- 
GitLab


From 76a495d666e5043ffc315695f8241f5e94a98849 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 17 Apr 2019 12:58:28 -0400
Subject: [PATCH 1461/1861] ceph: ensure d_name stability in ceph_dentry_hash()

Take the d_lock here to ensure that d_name doesn't change.

Cc: stable@vger.kernel.org
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/dir.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a8f4298822494..0637149fb9f9a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1766,6 +1766,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
 {
 	struct ceph_inode_info *dci = ceph_inode(dir);
+	unsigned hash;
 
 	switch (dci->i_dir_layout.dl_dir_hash) {
 	case 0:	/* for backward compat */
@@ -1773,8 +1774,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
 		return dn->d_name.hash;
 
 	default:
-		return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+		spin_lock(&dn->d_lock);
+		hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
 				     dn->d_name.name, dn->d_name.len);
+		spin_unlock(&dn->d_lock);
+		return hash;
 	}
 }
 
-- 
GitLab


From 4b8222870032715f9d995f3eb7c7acd8379a275d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 15 Apr 2019 12:00:42 -0400
Subject: [PATCH 1462/1861] ceph: handle the case where a dentry has been
 renamed on outstanding req

It's possible for us to issue a lookup to revalidate a dentry
concurrently with a rename. If done in the right order, then we could
end up processing dentry info in the reply that no longer reflects the
state of the dentry.

If req->r_dentry->d_name differs from the one in the trace, then just
ignore the trace in the reply. We only need to do this however if the
parent's i_rwsem is not held.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2d61ddda9bf56..c2feb310ac1e0 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1163,6 +1163,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
 	return 0;
 }
 
+static int d_name_cmp(struct dentry *dentry, const char *name, size_t len)
+{
+	int ret;
+
+	/* take d_lock to ensure dentry->d_name stability */
+	spin_lock(&dentry->d_lock);
+	ret = dentry->d_name.len - len;
+	if (!ret)
+		ret = memcmp(dentry->d_name.name, name, len);
+	spin_unlock(&dentry->d_lock);
+	return ret;
+}
+
 /*
  * Incorporate results into the local cache.  This is either just
  * one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
@@ -1412,7 +1425,8 @@ retry_lookup:
 		err = splice_dentry(&req->r_dentry, in);
 		if (err < 0)
 			goto done;
-	} else if (rinfo->head->is_dentry) {
+	} else if (rinfo->head->is_dentry &&
+		   !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) {
 		struct ceph_vino *ptvino = NULL;
 
 		if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
-- 
GitLab


From 37659182bff1eeaaeadcfc8f853c6d2b6dbc3f47 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 18 Apr 2019 11:24:57 +0800
Subject: [PATCH 1463/1861] ceph: fix ci->i_head_snapc leak

We missed two places that i_wrbuffer_ref_head, i_wr_ref, i_dirty_caps
and i_flushing_caps may change. When they are all zeros, we should free
i_head_snapc.

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/38224
Reported-and-tested-by: Luis Henriques <lhenriques@suse.com>
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 9 +++++++++
 fs/ceph/snap.c       | 7 ++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bc5d70d6bfe65..9049c2a3e972f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1414,6 +1414,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
 			ci->i_prealloc_cap_flush = NULL;
 		}
+
+               if (drop &&
+                  ci->i_wrbuffer_ref_head == 0 &&
+                  ci->i_wr_ref == 0 &&
+                  ci->i_dirty_caps == 0 &&
+                  ci->i_flushing_caps == 0) {
+                      ceph_put_snap_context(ci->i_head_snapc);
+                      ci->i_head_snapc = NULL;
+               }
 	}
 	spin_unlock(&ci->i_ceph_lock);
 	while (!list_empty(&to_remove)) {
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 89aa37fa0f84c..b26e12cd8ec33 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -572,7 +572,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 	old_snapc = NULL;
 
 update_snapc:
-	if (ci->i_head_snapc) {
+       if (ci->i_wrbuffer_ref_head == 0 &&
+           ci->i_wr_ref == 0 &&
+           ci->i_dirty_caps == 0 &&
+           ci->i_flushing_caps == 0) {
+               ci->i_head_snapc = NULL;
+       } else {
 		ci->i_head_snapc = ceph_get_snap_context(new_snapc);
 		dout(" new snapc is %p\n", new_snapc);
 	}
-- 
GitLab


From d386bb9042f4629bf62cdc5952ea8aab225f24a7 Mon Sep 17 00:00:00 2001
From: Anson Huang <anson.huang@nxp.com>
Date: Wed, 17 Apr 2019 01:59:34 +0000
Subject: [PATCH 1464/1861] i2c: imx: correct the method of getting private
 data in notifier_call

The way of getting private imx_i2c_struct in i2c_imx_clk_notifier_call()
is incorrect, should use clk_change_nb element to get correct address
and avoid below kernel dump during POST_RATE_CHANGE notify by clk
framework:

Unable to handle kernel paging request at virtual address 03ef1488
pgd = (ptrval)
[03ef1488] *pgd=00000000
Internal error: Oops: 5 [#1] PREEMPT SMP ARM
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Workqueue: events reduce_bus_freq_handler
PC is at i2c_imx_set_clk+0x10/0xb8
LR is at i2c_imx_clk_notifier_call+0x20/0x28
pc : [<806a893c>]    lr : [<806a8a04>]    psr: a0080013
sp : bf399dd8  ip : bf3432ac  fp : bf7c1dc0
r10: 00000002  r9 : 00000000  r8 : 00000000
r7 : 03ef1480  r6 : bf399e50  r5 : ffffffff  r4 : 00000000
r3 : bf025300  r2 : bf399e50  r1 : 00b71b00  r0 : bf399be8
Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
Control: 10c5387d  Table: 4e03004a  DAC: 00000051
Process kworker/2:1 (pid: 38, stack limit = 0x(ptrval))
Stack: (0xbf399dd8 to 0xbf39a000)
9dc0:                                                       806a89e4 00000000
9de0: ffffffff bf399e50 00000002 806a8a04 806a89e4 80142900 ffffffff 00000000
9e00: bf34ef18 bf34ef04 00000000 ffffffff bf399e50 80142d84 00000000 bf399e6c
9e20: bf34ef00 80f214c4 bf025300 00000002 80f08d08 bf017480 00000000 80142df0
9e40: 00000000 80166ed8 80c27638 8045de58 bf352340 03ef1480 00b71b00 0f82e242
9e60: bf025300 00000002 03ef1480 80f60e5c 00000001 8045edf0 00000002 8045eb08
9e80: bf025300 00000002 03ef1480 8045ee10 03ef1480 8045eb08 bf01be40 00000002
9ea0: 03ef1480 8045ee10 07de2900 8045eb08 bf01b780 00000002 07de2900 8045ee10
9ec0: 80c27898 bf399ee4 bf020a80 00000002 1f78a400 8045ee10 80f60e5c 80460514
9ee0: 80f60e5c bf01b600 bf01b480 80460460 0f82e242 bf383a80 bf383a00 80f60e5c
9f00: 00000000 bf7c1dc0 80f60e70 80460564 80f60df0 80f60d24 80f60df0 8011e72c
9f20: 00000000 80f60df0 80f60e6c bf7c4f00 00000000 8011e7ac bf274000 8013bd84
9f40: bf7c1dd8 80f03d00 bf274000 bf7c1dc0 bf274014 bf7c1dd8 80f03d00 bf398000
9f60: 00000008 8013bfb4 00000000 bf25d100 bf25d0c0 00000000 bf274000 8013bf88
9f80: bf25d11c bf0cfebc 00000000 8014140c bf25d0c0 801412ec 00000000 00000000
9fa0: 00000000 00000000 00000000 801010e8 00000000 00000000 00000000 00000000
9fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
9fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
[<806a893c>] (i2c_imx_set_clk) from [<806a8a04>] (i2c_imx_clk_notifier_call+0x20/0x28)
[<806a8a04>] (i2c_imx_clk_notifier_call) from [<80142900>] (notifier_call_chain+0x44/0x84)
[<80142900>] (notifier_call_chain) from [<80142d84>] (__srcu_notifier_call_chain+0x44/0x98)
[<80142d84>] (__srcu_notifier_call_chain) from [<80142df0>] (srcu_notifier_call_chain+0x18/0x20)
[<80142df0>] (srcu_notifier_call_chain) from [<8045de58>] (__clk_notify+0x78/0xa4)
[<8045de58>] (__clk_notify) from [<8045edf0>] (__clk_recalc_rates+0x60/0xb4)
[<8045edf0>] (__clk_recalc_rates) from [<8045ee10>] (__clk_recalc_rates+0x80/0xb4)
Code: e92d40f8 e5903298 e59072a0 e1530001 (e5975008)
---[ end trace fc7f5514b97b6cbb ]---

Fixes: 90ad2cbe88c2 ("i2c: imx: use clk notifier for rate changes")
Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
---
 drivers/i2c/busses/i2c-imx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index c0c3043b5d611..fd70b110e8f4e 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -515,9 +515,9 @@ static int i2c_imx_clk_notifier_call(struct notifier_block *nb,
 				     unsigned long action, void *data)
 {
 	struct clk_notifier_data *ndata = data;
-	struct imx_i2c_struct *i2c_imx = container_of(&ndata->clk,
+	struct imx_i2c_struct *i2c_imx = container_of(nb,
 						      struct imx_i2c_struct,
-						      clk);
+						      clk_change_nb);
 
 	if (action & POST_RATE_CHANGE)
 		i2c_imx_set_clk(i2c_imx, ndata->new_rate);
-- 
GitLab


From 9fa246256e09dc30820524401cdbeeaadee94025 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 24 Apr 2019 10:47:56 +1000
Subject: [PATCH 1465/1861] Revert "drm/i915/fbdev: Actually configure untiled
 displays"

This reverts commit d179b88deb3bf6fed4991a31fd6f0f2cad21fab5.

This commit is documented to break userspace X.org modesetting driver in certain configurations.

The X.org modesetting userspace driver is broken. No fixes are available yet. In order for this patch to be applied it either needs a config option or a workaround developed.

This has been reported a few times, saying it's a userspace problem is clearly against the regression rules.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109806
Signed-off-by: Dave Airlie <airlied@redhat.com>
Cc: <stable@vger.kernel.org> # v3.19+
---
 drivers/gpu/drm/i915/intel_fbdev.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index e8f694b57b8ac..376ffe842e267 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -338,8 +338,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
 				    bool *enabled, int width, int height)
 {
 	struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
+	unsigned long conn_configured, conn_seq, mask;
 	unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
-	unsigned long conn_configured, conn_seq;
 	int i, j;
 	bool *save_enabled;
 	bool fallback = true, ret = true;
@@ -357,9 +357,10 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
 		drm_modeset_backoff(&ctx);
 
 	memcpy(save_enabled, enabled, count);
-	conn_seq = GENMASK(count - 1, 0);
+	mask = GENMASK(count - 1, 0);
 	conn_configured = 0;
 retry:
+	conn_seq = conn_configured;
 	for (i = 0; i < count; i++) {
 		struct drm_fb_helper_connector *fb_conn;
 		struct drm_connector *connector;
@@ -372,8 +373,7 @@ retry:
 		if (conn_configured & BIT(i))
 			continue;
 
-		/* First pass, only consider tiled connectors */
-		if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile)
+		if (conn_seq == 0 && !connector->has_tile)
 			continue;
 
 		if (connector->status == connector_status_connected)
@@ -477,10 +477,8 @@ retry:
 		conn_configured |= BIT(i);
 	}
 
-	if (conn_configured != conn_seq) { /* repeat until no more are found */
-		conn_seq = conn_configured;
+	if ((conn_configured & mask) != mask && conn_configured != conn_seq)
 		goto retry;
-	}
 
 	/*
 	 * If the BIOS didn't enable everything it could, fall back to have the
-- 
GitLab


From a0cecc23cfcbf2626497a8c8770856dd56b67917 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 24 Apr 2019 10:52:20 +1000
Subject: [PATCH 1466/1861] Revert "drm/virtio: drop prime import/export
 callbacks"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch does more harm than good, as it breaks both Xwayland and
gnome-shell with X11.

Xwayland requires DRI3 & DRI3 requires PRIME.

X11 crash for obscure double-free reason which are hard to debug
(starting X11 by hand doesn't trigger the crash).

I don't see an apparent problem implementing those stub prime
functions, they may return an error at run-time, and it seems to be
handled fine by GNOME at least.

This reverts commit b318e3ff7ca065d6b107e424c85a63d7a6798a69.
[airlied:
This broke userspace for virtio-gpus, and regressed things from DRI3 to DRI2.

This brings back the original problem, but it's better than regressions.]

Fixes: b318e3ff7ca065d6b107e424c85a63d7a6798a ("drm/virtio: drop prime import/export callbacks")
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/virtio/virtgpu_drv.c   |  4 ++++
 drivers/gpu/drm/virtio/virtgpu_drv.h   |  4 ++++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 12 ++++++++++++
 3 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index b996ac1d4fcc9..af92964b6889d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -205,10 +205,14 @@ static struct drm_driver driver = {
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = virtio_gpu_debugfs_init,
 #endif
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = virtgpu_gem_prime_pin,
 	.gem_prime_unpin = virtgpu_gem_prime_unpin,
+	.gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table,
+	.gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = virtgpu_gem_prime_vmap,
 	.gem_prime_vunmap = virtgpu_gem_prime_vunmap,
 	.gem_prime_mmap = virtgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 3238fdf58eb48..d577cb76f5ad6 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -354,6 +354,10 @@ int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait);
 /* virtgpu_prime.c */
 int virtgpu_gem_prime_pin(struct drm_gem_object *obj);
 void virtgpu_gem_prime_unpin(struct drm_gem_object *obj);
+struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *sgt);
 void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int virtgpu_gem_prime_mmap(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c
index c59ec34c80a5d..eb51a78e11991 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@ -39,6 +39,18 @@ void virtgpu_gem_prime_unpin(struct drm_gem_object *obj)
 	WARN_ONCE(1, "not implemented");
 }
 
+struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *table)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj)
 {
 	struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj);
-- 
GitLab


From 66c031716bcd9647cd2304e4875163663b086405 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 23 Apr 2019 15:30:07 +0100
Subject: [PATCH 1467/1861] net: atheros: fix spelling mistake "underun" ->
 "underrun"

There are spelling mistakes in structure elements, fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atlx/atl1.c | 4 ++--
 drivers/net/ethernet/atheros/atlx/atl1.h | 2 +-
 drivers/net/ethernet/atheros/atlx/atl2.c | 2 +-
 drivers/net/ethernet/atheros/atlx/atl2.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 9e07b469066a4..156fbc5601ca3 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1721,7 +1721,7 @@ static void atl1_inc_smb(struct atl1_adapter *adapter)
 	adapter->soft_stats.scc += smb->tx_1_col;
 	adapter->soft_stats.mcc += smb->tx_2_col;
 	adapter->soft_stats.latecol += smb->tx_late_col;
-	adapter->soft_stats.tx_underun += smb->tx_underrun;
+	adapter->soft_stats.tx_underrun += smb->tx_underrun;
 	adapter->soft_stats.tx_trunc += smb->tx_trunc;
 	adapter->soft_stats.tx_pause += smb->tx_pause;
 
@@ -3179,7 +3179,7 @@ static struct atl1_stats atl1_gstrings_stats[] = {
 	{"tx_deferred_ok", ATL1_STAT(soft_stats.deffer)},
 	{"tx_single_coll_ok", ATL1_STAT(soft_stats.scc)},
 	{"tx_multi_coll_ok", ATL1_STAT(soft_stats.mcc)},
-	{"tx_underun", ATL1_STAT(soft_stats.tx_underun)},
+	{"tx_underrun", ATL1_STAT(soft_stats.tx_underrun)},
 	{"tx_trunc", ATL1_STAT(soft_stats.tx_trunc)},
 	{"tx_pause", ATL1_STAT(soft_stats.tx_pause)},
 	{"rx_pause", ATL1_STAT(soft_stats.rx_pause)},
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.h b/drivers/net/ethernet/atheros/atlx/atl1.h
index 34a58cd846a05..eacff19ea05b8 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.h
+++ b/drivers/net/ethernet/atheros/atlx/atl1.h
@@ -681,7 +681,7 @@ struct atl1_sft_stats {
 	u64 scc;		/* packets TX after a single collision */
 	u64 mcc;		/* packets TX after multiple collisions */
 	u64 latecol;		/* TX packets w/ late collisions */
-	u64 tx_underun;		/* TX packets aborted due to TX FIFO underrun
+	u64 tx_underrun;	/* TX packets aborted due to TX FIFO underrun
 				 * or TRD FIFO underrun */
 	u64 tx_trunc;		/* TX packets truncated due to size > MTU */
 	u64 rx_pause;		/* num Pause packets received. */
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index d99317b3d891b..98da0fa27192d 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -553,7 +553,7 @@ static void atl2_intr_tx(struct atl2_adapter *adapter)
 			netdev->stats.tx_aborted_errors++;
 		if (txs->late_col)
 			netdev->stats.tx_window_errors++;
-		if (txs->underun)
+		if (txs->underrun)
 			netdev->stats.tx_fifo_errors++;
 	} while (1);
 
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.h b/drivers/net/ethernet/atheros/atlx/atl2.h
index c64a6bdfa7ae4..25ec84cb48535 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.h
+++ b/drivers/net/ethernet/atheros/atlx/atl2.h
@@ -260,7 +260,7 @@ struct tx_pkt_status {
 	unsigned multi_col:1;
 	unsigned late_col:1;
 	unsigned abort_col:1;
-	unsigned underun:1;	/* current packet is aborted
+	unsigned underrun:1;	/* current packet is aborted
 				 * due to txram underrun */
 	unsigned:3;		/* reserved */
 	unsigned update:1;	/* always 1'b1 in tx_status_buf */
-- 
GitLab


From ffbf9870dcf1342592a1a26f4cf70bda39046134 Mon Sep 17 00:00:00 2001
From: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Date: Tue, 23 Apr 2019 09:01:41 +0300
Subject: [PATCH 1468/1861] net: socionext: replace napi_alloc_frag with the
 netdev variant on init

The netdev variant is usable on any context since it disables interrupts.
The napi variant of the call should only be used within softirq context.
Replace napi_alloc_frag on driver init with the correct netdev_alloc_frag
call

Changes since v1:
- Adjusted commit message

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jassi Brar <jaswinder.singh@linaro.org>
Fixes: 4acb20b46214 ("net: socionext: different approach on DMA")
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/socionext/netsec.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index a18149720aa2e..cba5881b2746a 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -673,7 +673,8 @@ static void netsec_process_tx(struct netsec_priv *priv)
 }
 
 static void *netsec_alloc_rx_data(struct netsec_priv *priv,
-				  dma_addr_t *dma_handle, u16 *desc_len)
+				  dma_addr_t *dma_handle, u16 *desc_len,
+				  bool napi)
 {
 	size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	size_t payload_len = NETSEC_RX_BUF_SZ;
@@ -682,7 +683,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
 
 	total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);
 
-	buf = napi_alloc_frag(total_len);
+	buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len);
 	if (!buf)
 		return NULL;
 
@@ -765,7 +766,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 		/* allocate a fresh buffer and map it to the hardware.
 		 * This will eventually replace the old buffer in the hardware
 		 */
-		buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
+		buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len,
+						true);
 		if (unlikely(!buf_addr))
 			break;
 
@@ -1069,7 +1071,8 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
 		void *buf;
 		u16 len;
 
-		buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
+		buf = netsec_alloc_rx_data(priv, &dma_handle, &len,
+					   false);
 		if (!buf) {
 			netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
 			goto err_out;
-- 
GitLab


From 1c5c12ee308aacf635c8819cd4baa3bd58f8a8b7 Mon Sep 17 00:00:00 2001
From: Tao Ren <taoren@fb.com>
Date: Wed, 24 Apr 2019 01:43:32 +0000
Subject: [PATCH 1469/1861] net/ncsi: handle overflow when incrementing mac
 address

Previously BMC's MAC address is calculated by simply adding 1 to the
last byte of network controller's MAC address, and it produces incorrect
result when network controller's MAC address ends with 0xFF.

The problem can be fixed by calling eth_addr_inc() function to increment
MAC address; besides, the MAC address is also validated before assigning
to BMC.

Fixes: cb10c7c0dfd9 ("net/ncsi: Add NCSI Broadcom OEM command")
Signed-off-by: Tao Ren <taoren@fb.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 12 ++++++++++++
 net/ncsi/ncsi-rsp.c         |  6 +++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index e2f3b21cd72a2..aa8bfd6f738c7 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -448,6 +448,18 @@ static inline void eth_addr_dec(u8 *addr)
 	u64_to_ether_addr(u, addr);
 }
 
+/**
+ * eth_addr_inc() - Increment the given MAC address.
+ * @addr: Pointer to a six-byte array containing Ethernet address to increment.
+ */
+static inline void eth_addr_inc(u8 *addr)
+{
+	u64 u = ether_addr_to_u64(addr);
+
+	u++;
+	u64_to_ether_addr(u, addr);
+}
+
 /**
  * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
  * @dev: Pointer to a device structure
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index dc07fcc7938ec..802db01e30754 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 
 #include <net/ncsi.h>
@@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
 	ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
 	/* Increase mac address by 1 for BMC's address */
-	saddr.sa_data[ETH_ALEN - 1]++;
+	eth_addr_inc((u8 *)saddr.sa_data);
+	if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+		return -ENXIO;
+
 	ret = ops->ndo_set_mac_address(ndev, &saddr);
 	if (ret < 0)
 		netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
-- 
GitLab


From d08106796a78a4273e39e1bbdf538dc4334b2635 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Fri, 1 Mar 2019 13:56:11 +0100
Subject: [PATCH 1470/1861] drm/vc4: Fix memory leak during gpu reset.

__drm_atomic_helper_crtc_destroy_state does not free memory, it only
cleans it up. Fix this by calling the functions own destroy function.

Fixes: 6d6e50039187 ("drm/vc4: Allocate the right amount of space for boot-time CRTC state.")
Cc: Eric Anholt <eric@anholt.net>
Cc: <stable@vger.kernel.org> # v4.6+
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190301125627.7285-2-maarten.lankhorst@linux.intel.com
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 730008d3da761..e7c04a9eb219b 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -1042,7 +1042,7 @@ static void
 vc4_crtc_reset(struct drm_crtc *crtc)
 {
 	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+		vc4_crtc_destroy_state(crtc->state);
 
 	crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
 	if (crtc->state)
-- 
GitLab


From 0d02113b31b2017dd349ec9df2314e798a90fa6e Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Tue, 23 Apr 2019 12:58:11 -0400
Subject: [PATCH 1471/1861] x86/mm: Fix a crash with kmemleak_scan()

The first kmemleak_scan() call after boot would trigger the crash below
because this callpath:

  kernel_init
    free_initmem
      mem_encrypt_free_decrypted_mem
        free_init_pages

unmaps memory inside the .bss when DEBUG_PAGEALLOC=y.

kmemleak_init() will register the .data/.bss sections and then
kmemleak_scan() will scan those addresses and dereference them looking
for pointer references. If free_init_pages() frees and unmaps pages in
those sections, kmemleak_scan() will crash if referencing one of those
addresses:

  BUG: unable to handle kernel paging request at ffffffffbd402000
  CPU: 12 PID: 325 Comm: kmemleak Not tainted 5.1.0-rc4+ #4
  RIP: 0010:scan_block
  Call Trace:
   scan_gray_list
   kmemleak_scan
   kmemleak_scan_thread
   kthread
   ret_from_fork

Since kmemleak_free_part() is tolerant to unknown objects (not tracked
by kmemleak), it is fine to call it from free_init_pages() even if not
all address ranges passed to this function are known to kmemleak.

 [ bp: Massage. ]

Fixes: b3f0907c71e0 ("x86/mm: Add .bss..decrypted section to hold shared variables")
Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190423165811.36699-1-cai@lca.pw
---
 arch/x86/mm/init.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f905a2371080b..8dacdb96899ec 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,6 +5,7 @@
 #include <linux/memblock.h>
 #include <linux/swapfile.h>
 #include <linux/swapops.h>
+#include <linux/kmemleak.h>
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
@@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
 	if (debug_pagealloc_enabled()) {
 		pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
 			begin, end - 1);
+		/*
+		 * Inform kmemleak about the hole in the memory since the
+		 * corresponding pages will be unmapped.
+		 */
+		kmemleak_free_part((void *)begin, end - begin);
 		set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
 	} else {
 		/*
-- 
GitLab


From d4645d30b50d1691c26ff0f8fa4e718b08f8d3bb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 24 Apr 2019 10:52:53 +0200
Subject: [PATCH 1472/1861] smpboot: Place the __percpu annotation correctly

The test robot reported a wrong assignment of a per-CPU variable which
it detected by using sparse and sent a report. The assignment itself is
correct. The annotation for sparse was wrong and hence the report.
The first pointer is a "normal" pointer and points to the per-CPU memory
area. That means that the __percpu annotation has to be moved.

Move the __percpu annotation to pointer which points to the per-CPU
area. This change affects only the sparse tool (and is ignored by the
compiler).

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: f97f8f06a49fe ("smpboot: Provide infrastructure for percpu hotplug threads")
Link: http://lkml.kernel.org/r/20190424085253.12178-1-bigeasy@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/smpboot.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index d0884b5250010..9d1bc65d226cc 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -29,7 +29,7 @@ struct smpboot_thread_data;
  * @thread_comm:	The base name of the thread
  */
 struct smp_hotplug_thread {
-	struct task_struct __percpu	**store;
+	struct task_struct		* __percpu *store;
 	struct list_head		list;
 	int				(*thread_should_run)(unsigned int cpu);
 	void				(*thread_fn)(unsigned int cpu);
-- 
GitLab


From 6ae865615fc43d014da2fd1f1bba7e81ee622d1b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Apr 2019 09:19:24 +0200
Subject: [PATCH 1473/1861] x86/uaccess: Dont leak the AC flag into
 __put_user() argument evaluation

The __put_user() macro evaluates it's @ptr argument inside the
__uaccess_begin() / __uaccess_end() region. While this would normally
not be expected to be an issue, an UBSAN bug (it ignored -fwrapv,
fixed in GCC 8+) would transform the @ptr evaluation for:

  drivers/gpu/drm/i915/i915_gem_execbuffer.c: if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) {

into a signed-overflow-UB check and trigger the objtool AC validation.

Finish this commit:

  2a418cf3f5f1 ("x86/uaccess: Don't leak the AC flag into __put_user() value evaluation")

and explicitly evaluate all 3 arguments early.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: luto@kernel.org
Fixes: 2a418cf3f5f1 ("x86/uaccess: Don't leak the AC flag into __put_user() value evaluation")
Link: http://lkml.kernel.org/r/20190424072208.695962771@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uaccess.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5ca7b91faf67c..bb21913885a35 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -427,10 +427,11 @@ do {									\
 ({								\
 	__label__ __pu_label;					\
 	int __pu_err = -EFAULT;					\
-	__typeof__(*(ptr)) __pu_val;				\
-	__pu_val = x;						\
+	__typeof__(*(ptr)) __pu_val = (x);			\
+	__typeof__(ptr) __pu_ptr = (ptr);			\
+	__typeof__(size) __pu_size = (size);			\
 	__uaccess_begin();					\
-	__put_user_size(__pu_val, (ptr), (size), __pu_label);	\
+	__put_user_size(__pu_val, __pu_ptr, __pu_size, __pu_label);	\
 	__pu_err = 0;						\
 __pu_label:							\
 	__uaccess_end();					\
-- 
GitLab


From 29da93fea3ea39ab9b12270cc6be1b70ef201c9e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Apr 2019 09:19:25 +0200
Subject: [PATCH 1474/1861] mm/uaccess: Use 'unsigned long' to placate UBSAN
 warnings on older GCC versions

Randy reported objtool triggered on his (GCC-7.4) build:

  lib/strncpy_from_user.o: warning: objtool: strncpy_from_user()+0x315: call to __ubsan_handle_add_overflow() with UACCESS enabled
  lib/strnlen_user.o: warning: objtool: strnlen_user()+0x337: call to __ubsan_handle_sub_overflow() with UACCESS enabled

This is due to UBSAN generating signed-overflow-UB warnings where it
should not. Prior to GCC-8 UBSAN ignored -fwrapv (which the kernel
uses through -fno-strict-overflow).

Make the functions use 'unsigned long' throughout.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: luto@kernel.org
Link: http://lkml.kernel.org/r/20190424072208.754094071@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/strncpy_from_user.c | 5 +++--
 lib/strnlen_user.c      | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 58eacd41526c5..023ba9f3b99f0 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -23,10 +23,11 @@
  * hit it), 'max' is the address space maximum (and we return
  * -EFAULT if we hit it).
  */
-static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
+static inline long do_strncpy_from_user(char *dst, const char __user *src,
+					unsigned long count, unsigned long max)
 {
 	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
-	long res = 0;
+	unsigned long res = 0;
 
 	/*
 	 * Truncate 'max' to the user-specified limit, so that
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 1c1a1b0e38a5f..7f2db3fe311fd 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -28,7 +28,7 @@
 static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
 {
 	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
-	long align, res = 0;
+	unsigned long align, res = 0;
 	unsigned long c;
 
 	/*
@@ -42,7 +42,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
 	 * Do everything aligned. But that means that we
 	 * need to also expand the maximum..
 	 */
-	align = (sizeof(long) - 1) & (unsigned long)src;
+	align = (sizeof(unsigned long) - 1) & (unsigned long)src;
 	src -= align;
 	max += align;
 
-- 
GitLab


From a65c88e16f32aa9ef2e8caa68ea5c29bd5eb0ff0 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 24 Apr 2019 09:04:57 +0200
Subject: [PATCH 1475/1861] x86/mm: Remove in_nmi() warning from 64-bit
 implementation of vmalloc_fault()

In-NMI warnings have been added to vmalloc_fault() via:

  ebc8827f75 ("x86: Barf when vmalloc and kmemcheck faults happen in NMI")

back in the time when our NMI entry code could not cope with nested NMIs.

These days, it's perfectly fine to take a fault in NMI context and we
don't have to care about the fact that IRET from the fault handler might
cause NMI nesting.

This warning has already been removed from 32-bit implementation of
vmalloc_fault() in:

  6863ea0cda8 ("x86/mm: Remove in_nmi() warning from vmalloc_fault()")

but the 64-bit version was omitted.

Remove the bogus warning also from 64-bit implementation of vmalloc_fault().

Reported-by: Nicolai Stange <nstange@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 6863ea0cda8 ("x86/mm: Remove in_nmi() warning from vmalloc_fault()")
Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1904240902280.9803@cbobk.fhfr.pm
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/fault.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a0df19b0897df..bd20de9db1a84 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -359,8 +359,6 @@ static noinline int vmalloc_fault(unsigned long address)
 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
 		return -1;
 
-	WARN_ON_ONCE(in_nmi());
-
 	/*
 	 * Copy kernel mappings over when needed. This can also
 	 * happen within a race in page table update. In the later
-- 
GitLab


From 0a29c57b76624723b6b00c027e0e992d130ace49 Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Wed, 24 Apr 2019 16:34:25 +0800
Subject: [PATCH 1476/1861] ALSA: hda/realtek - Add new Dell platform for
 headset mode

Add two Dell platform for headset mode.

[ Note: this is a further correction / addition of the previous
  pin-based quirks for Dell machines; another entry for ALC236 with
  the d-mic pin 0x12 and an entry for ALC295 -- tiwai ]

Fixes: b26e36b7ef36 ("ALSA: hda/realtek - add two more pin configuration sets to quirk table")
Signed-off-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f5b510f119edd..070749b50eff8 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7268,6 +7268,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x21, 0x02211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x21, 0x02211020}),
+	SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x12, 0x40000000},
+		{0x14, 0x90170110},
+		{0x21, 0x02211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
 		{0x14, 0x90170110},
 		{0x21, 0x02211020}),
@@ -7541,6 +7545,9 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x12, 0x90a60130},
 		{0x17, 0x90170110},
 		{0x21, 0x04211020}),
+	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x14, 0x90170110},
+		{0x21, 0x04211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC295_STANDARD_PINS,
 		{0x17, 0x21014020},
-- 
GitLab


From 3855f11d54a07256cc4a6fb85c692000208a73a7 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Tue, 23 Apr 2019 14:15:53 -0700
Subject: [PATCH 1477/1861] x86/um/vdso: Drop unnecessary cc-ldoption

Towards the goal of removing cc-ldoption, it seems that --hash-style=
was added to binutils 2.17.50.0.2 in 2006. The minimal required version
of binutils for the kernel according to
Documentation/process/changes.rst is 2.20.

Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: anton.ivanov@cambridgegreys.com
Cc: clang-built-linux@googlegroups.com
Cc: jdike@addtoit.com
Cc: linux-um@lists.infradead.org
Cc: richard@nod.at
Link: http://lkml.kernel.org/r/20190423211554.1594-1-ndesaulniers@google.com
Link: https://gcc.gnu.org/ml/gcc/2007-01/msg01141.html
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/um/vdso/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index bf94060fc06f3..0caddd6acb226 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -62,7 +62,7 @@ quiet_cmd_vdso = VDSO    $@
 		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
 		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
 GCOV_PROFILE := n
 
 #
-- 
GitLab


From 392bef709659abea614abfe53cf228e7a59876a4 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 23 Apr 2019 11:38:27 -0700
Subject: [PATCH 1478/1861] x86/build: Move _etext to actual end of .text

When building x86 with Clang LTO and CFI, CFI jump regions are
automatically added to the end of the .text section late in linking. As a
result, the _etext position was being labelled before the appended jump
regions, causing confusion about where the boundaries of the executable
region actually are in the running kernel, and broke at least the fault
injection code. This moves the _etext mark to outside (and immediately
after) the .text area, as it already the case on other architectures
(e.g. arm64, arm).

Reported-and-tested-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190423183827.GA4012@beast
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vmlinux.lds.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6ee..de94da2366e78 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -141,11 +141,11 @@ SECTIONS
 		*(.text.__x86.indirect_thunk)
 		__indirect_thunk_end = .;
 #endif
-
-		/* End of text section */
-		_etext = .;
 	} :text = 0x9090
 
+	/* End of text section */
+	_etext = .;
+
 	NOTES :text :note
 
 	EXCEPTION_TABLE(16) :text = 0x9090
-- 
GitLab


From 15854edd193ae5d9daf8f50ce5f9f1724cebe344 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Apr 2019 10:02:20 +0200
Subject: [PATCH 1479/1861] x86/pci: Clean up usage of X86_DEV_DMA_OPS

We have supported per-device dma_map_ops in generic code for a long
time, and this symbol just guards the inclusion of the dma_map_ops
registry used for vmd.  Stop enabling it for anything but vmd.

No change in functionality intended.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190410080220.21705-3-hch@lst.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig               | 3 ---
 drivers/misc/mic/Kconfig       | 4 ++--
 drivers/pci/controller/Kconfig | 1 +
 drivers/pci/controller/vmd.c   | 7 -------
 4 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7f93e013e6dbf..60f6459344bc2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,7 +28,6 @@ config X86_64
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE
 	select SWIOTLB
-	select X86_DEV_DMA_OPS
 	select ARCH_HAS_SYSCALL_WRAPPER
 
 #
@@ -703,7 +702,6 @@ config STA2X11
 	bool "STA2X11 Companion Chip Support"
 	depends on X86_32_NON_STANDARD && PCI
 	select ARCH_HAS_PHYS_TO_DMA
-	select X86_DEV_DMA_OPS
 	select SWIOTLB
 	select MFD_STA2X11
 	select GPIOLIB
@@ -2883,7 +2881,6 @@ config HAVE_ATOMIC_IOMAP
 
 config X86_DEV_DMA_OPS
 	bool
-	depends on X86_64 || STA2X11
 
 config HAVE_GENERIC_GUP
 	def_bool y
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index 242dcee14689c..6736f72cc14a6 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -4,7 +4,7 @@ comment "Intel MIC Bus Driver"
 
 config INTEL_MIC_BUS
 	tristate "Intel MIC Bus Driver"
-	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	depends on 64BIT && PCI && X86
 	help
 	  This option is selected by any driver which registers a
 	  device or driver on the MIC Bus, such as CONFIG_INTEL_MIC_HOST,
@@ -21,7 +21,7 @@ comment "SCIF Bus Driver"
 
 config SCIF_BUS
 	tristate "SCIF Bus Driver"
-	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	depends on 64BIT && PCI && X86
 	help
 	  This option is selected by any driver which registers a
 	  device or driver on the SCIF Bus, such as CONFIG_INTEL_MIC_HOST
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index 6012f3059acd9..011c57cae4b0b 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -267,6 +267,7 @@ config PCIE_TANGO_SMP8759
 
 config VMD
 	depends on PCI_MSI && X86_64 && SRCU
+	select X86_DEV_DMA_OPS
 	tristate "Intel Volume Management Device Driver"
 	---help---
 	  Adds support for the Intel Volume Management Device (VMD). VMD is a
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index cf6816b55b5e0..999a5509e57eb 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -95,10 +95,8 @@ struct vmd_dev {
 	struct irq_domain	*irq_domain;
 	struct pci_bus		*bus;
 
-#ifdef CONFIG_X86_DEV_DMA_OPS
 	struct dma_map_ops	dma_ops;
 	struct dma_domain	dma_domain;
-#endif
 };
 
 static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
@@ -293,7 +291,6 @@ static struct msi_domain_info vmd_msi_domain_info = {
 	.chip		= &vmd_msi_controller,
 };
 
-#ifdef CONFIG_X86_DEV_DMA_OPS
 /*
  * VMD replaces the requester ID with its own.  DMA mappings for devices in a
  * VMD domain need to be mapped for the VMD, not the device requiring
@@ -438,10 +435,6 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd)
 	add_dma_domain(domain);
 }
 #undef ASSIGN_VMD_DMA_OPS
-#else
-static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {}
-static void vmd_setup_dma_ops(struct vmd_dev *vmd) {}
-#endif
 
 static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
 				  unsigned int devfn, int reg, int len)
-- 
GitLab


From 01eb42afb45719cb41bb32c278e068073738899d Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Tue, 23 Apr 2019 15:36:36 +0200
Subject: [PATCH 1480/1861] s390/kasan: fix strncpy_from_user kasan checks

arch/s390/lib/uaccess.c is built without kasan instrumentation. Kasan
checks are performed explicitly in copy_from_user/copy_to_user
functions. But since those functions could be inlined, calls from
files like uaccess.c with instrumentation disabled won't generate
kasan reports. This is currently the case with strncpy_from_user
function which was revealed by newly added kasan test. Avoid inlining of
copy_from_user/copy_to_user when the kernel is built with kasan support
to make sure kasan checks are fully functional.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/uaccess.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 007fcb9aeeb83..bd2fd9a7821da 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -55,8 +55,10 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n);
 unsigned long __must_check
 raw_copy_to_user(void __user *to, const void *from, unsigned long n);
 
+#ifndef CONFIG_KASAN
 #define INLINE_COPY_FROM_USER
 #define INLINE_COPY_TO_USER
+#endif
 
 #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
 
-- 
GitLab


From c9f621524e70774688db3cec60d85fa4c7de52e3 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 24 Apr 2019 12:49:44 +0200
Subject: [PATCH 1481/1861] s390/mm: fix pxd_bad with folded page tables

With git commit d1874a0c2805fcfa9162c972d6b7541e57adb542
"s390/mm: make the pxd_offset functions more robust" and a 2-level page
table it can now happen that pgd_bad() gets asked to verify a large
segment table entry. If the entry is marked as dirty pgd_bad() will
incorrectly return true.

Change the pgd_bad(), p4d_bad(), pud_bad() and pmd_bad() functions to
first verify the table type, return false if the table level is lower
than what the function is suppossed to check, return true if the table
level is too high, and otherwise check the relevant region and segment
table bits. pmd_bad() has to check against ~SEGMENT_ENTRY_BITS for
normal page table pointers or ~SEGMENT_ENTRY_BITS_LARGE for large
segment table entries. Same for pud_bad() which has to check against
~_REGION_ENTRY_BITS or ~_REGION_ENTRY_BITS_LARGE.

Fixes: d1874a0c2805 ("s390/mm: make the pxd_offset functions more robust")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 394bec31cb97d..9f0195d5fa167 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -238,7 +238,7 @@ static inline int is_module_addr(void *addr)
 #define _REGION_ENTRY_NOEXEC	0x100	/* region no-execute bit	    */
 #define _REGION_ENTRY_OFFSET	0xc0	/* region table offset		    */
 #define _REGION_ENTRY_INVALID	0x20	/* invalid region table entry	    */
-#define _REGION_ENTRY_TYPE_MASK	0x0c	/* region/segment table type mask   */
+#define _REGION_ENTRY_TYPE_MASK	0x0c	/* region table type mask	    */
 #define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
 #define _REGION_ENTRY_TYPE_R2	0x08	/* region second table type	    */
 #define _REGION_ENTRY_TYPE_R3	0x04	/* region third table type	    */
@@ -277,6 +277,7 @@ static inline int is_module_addr(void *addr)
 #define _SEGMENT_ENTRY_PROTECT	0x200	/* segment protection bit	    */
 #define _SEGMENT_ENTRY_NOEXEC	0x100	/* segment no-execute bit	    */
 #define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
+#define _SEGMENT_ENTRY_TYPE_MASK 0x0c	/* segment table type mask	    */
 
 #define _SEGMENT_ENTRY		(0)
 #define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
@@ -614,15 +615,9 @@ static inline int pgd_none(pgd_t pgd)
 
 static inline int pgd_bad(pgd_t pgd)
 {
-	/*
-	 * With dynamic page table levels the pgd can be a region table
-	 * entry or a segment table entry. Check for the bit that are
-	 * invalid for either table entry.
-	 */
-	unsigned long mask =
-		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
-		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
-	return (pgd_val(pgd) & mask) != 0;
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
+		return 0;
+	return (pgd_val(pgd) & ~_REGION_ENTRY_BITS) != 0;
 }
 
 static inline unsigned long pgd_pfn(pgd_t pgd)
@@ -703,6 +698,8 @@ static inline int pmd_large(pmd_t pmd)
 
 static inline int pmd_bad(pmd_t pmd)
 {
+	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0)
+		return 1;
 	if (pmd_large(pmd))
 		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
 	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
@@ -710,8 +707,12 @@ static inline int pmd_bad(pmd_t pmd)
 
 static inline int pud_bad(pud_t pud)
 {
-	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
-		return pmd_bad(__pmd(pud_val(pud)));
+	unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
+
+	if (type > _REGION_ENTRY_TYPE_R3)
+		return 1;
+	if (type < _REGION_ENTRY_TYPE_R3)
+		return 0;
 	if (pud_large(pud))
 		return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
 	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
@@ -719,8 +720,12 @@ static inline int pud_bad(pud_t pud)
 
 static inline int p4d_bad(p4d_t p4d)
 {
-	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
-		return pud_bad(__pud(p4d_val(p4d)));
+	unsigned long type = p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK;
+
+	if (type > _REGION_ENTRY_TYPE_R2)
+		return 1;
+	if (type < _REGION_ENTRY_TYPE_R2)
+		return 0;
 	return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
 }
 
-- 
GitLab


From 71189f263f8a3db7b72ca75be14e7309375e8707 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Mon, 21 Jan 2019 09:55:18 +0100
Subject: [PATCH 1482/1861] vfio-ccw: make it safe to access channel programs

When we get a solicited interrupt, the start function may have
been cleared by a csch, but we still have a channel program
structure allocated. Make it safe to call the cp accessors in
any case, so we can call them unconditionally.

While at it, also make sure that functions called from other parts
of the code return gracefully if the channel program structure
has not been initialized (even though that is a bug in the caller).

Reviewed-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_cp.c  | 21 ++++++++++++++++++++-
 drivers/s390/cio/vfio_ccw_cp.h  |  2 ++
 drivers/s390/cio/vfio_ccw_fsm.c |  5 +++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 384b3987eeb48..0e79799e9a719 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -362,6 +362,7 @@ static void cp_unpin_free(struct channel_program *cp)
 	struct ccwchain *chain, *temp;
 	int i;
 
+	cp->initialized = false;
 	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
 		for (i = 0; i < chain->ch_len; i++) {
 			pfn_array_table_unpin_free(chain->ch_pat + i,
@@ -732,6 +733,9 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
 	 */
 	cp->orb.cmd.c64 = 1;
 
+	if (!ret)
+		cp->initialized = true;
+
 	return ret;
 }
 
@@ -746,7 +750,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
  */
 void cp_free(struct channel_program *cp)
 {
-	cp_unpin_free(cp);
+	if (cp->initialized)
+		cp_unpin_free(cp);
 }
 
 /**
@@ -791,6 +796,10 @@ int cp_prefetch(struct channel_program *cp)
 	struct ccwchain *chain;
 	int len, idx, ret;
 
+	/* this is an error in the caller */
+	if (!cp->initialized)
+		return -EINVAL;
+
 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
 		len = chain->ch_len;
 		for (idx = 0; idx < len; idx++) {
@@ -826,6 +835,10 @@ union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
 	struct ccwchain *chain;
 	struct ccw1 *cpa;
 
+	/* this is an error in the caller */
+	if (!cp->initialized)
+		return NULL;
+
 	orb = &cp->orb;
 
 	orb->cmd.intparm = intparm;
@@ -862,6 +875,9 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
 	u32 cpa = scsw->cmd.cpa;
 	u32 ccw_head;
 
+	if (!cp->initialized)
+		return;
+
 	/*
 	 * LATER:
 	 * For now, only update the cmd.cpa part. We may need to deal with
@@ -898,6 +914,9 @@ bool cp_iova_pinned(struct channel_program *cp, u64 iova)
 	struct ccwchain *chain;
 	int i;
 
+	if (!cp->initialized)
+		return false;
+
 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
 		for (i = 0; i < chain->ch_len; i++)
 			if (pfn_array_table_iova_pinned(chain->ch_pat + i,
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
index a4b74fb1aa574..3c20cd208da59 100644
--- a/drivers/s390/cio/vfio_ccw_cp.h
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -21,6 +21,7 @@
  * @ccwchain_list: list head of ccwchains
  * @orb: orb for the currently processed ssch request
  * @mdev: the mediated device to perform page pinning/unpinning
+ * @initialized: whether this instance is actually initialized
  *
  * @ccwchain_list is the head of a ccwchain list, that contents the
  * translated result of the guest channel program that pointed out by
@@ -30,6 +31,7 @@ struct channel_program {
 	struct list_head ccwchain_list;
 	union orb orb;
 	struct device *mdev;
+	bool initialized;
 };
 
 extern int cp_init(struct channel_program *cp, struct device *mdev,
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index cab17865aafe1..e7c9877c9f1ef 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -31,6 +31,10 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
 	private->state = VFIO_CCW_STATE_BUSY;
 
 	orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
+	if (!orb) {
+		ret = -EIO;
+		goto out;
+	}
 
 	/* Issue "Start Subchannel" */
 	ccode = ssch(sch->schid, orb);
@@ -64,6 +68,7 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
 	default:
 		ret = ccode;
 	}
+out:
 	spin_unlock_irqrestore(sch->lock, flags);
 	return ret;
 }
-- 
GitLab


From 690f6a1581c7c08e85451f62bcbfe40f29072842 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Tue, 29 Jan 2019 16:13:57 +0100
Subject: [PATCH 1483/1861] vfio-ccw: rework ssch state handling

The flow for processing ssch requests can be improved by splitting
the BUSY state:

- CP_PROCESSING: We reject any user space requests while we are in
  the process of translating a channel program and submitting it to
  the hardware. Use -EAGAIN to signal user space that it should
  retry the request.
- CP_PENDING: We have successfully submitted a request with ssch and
  are now expecting an interrupt. As we can't handle more than one
  channel program being processed, reject any further requests with
  -EBUSY. A final interrupt will move us out of this state.
  By making this a separate state, we make it possible to issue a
  halt or a clear while we're still waiting for the final interrupt
  for the ssch (in a follow-on patch).

It also makes a lot of sense not to preemptively filter out writes to
the io_region if we're in an incorrect state: the state machine will
handle this correctly.

Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_fsm.c     | 19 ++++++++++++++-----
 drivers/s390/cio/vfio_ccw_ops.c     |  2 --
 drivers/s390/cio/vfio_ccw_private.h |  3 ++-
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index e7c9877c9f1ef..b4a141fbd1a89 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -28,7 +28,6 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
 	sch = private->sch;
 
 	spin_lock_irqsave(sch->lock, flags);
-	private->state = VFIO_CCW_STATE_BUSY;
 
 	orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
 	if (!orb) {
@@ -46,6 +45,7 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
 		 */
 		sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
 		ret = 0;
+		private->state = VFIO_CCW_STATE_CP_PENDING;
 		break;
 	case 1:		/* Status pending */
 	case 2:		/* Busy */
@@ -107,6 +107,12 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
 	private->io_region->ret_code = -EBUSY;
 }
 
+static void fsm_io_retry(struct vfio_ccw_private *private,
+			 enum vfio_ccw_event event)
+{
+	private->io_region->ret_code = -EAGAIN;
+}
+
 static void fsm_disabled_irq(struct vfio_ccw_private *private,
 			     enum vfio_ccw_event event)
 {
@@ -135,8 +141,7 @@ static void fsm_io_request(struct vfio_ccw_private *private,
 	struct mdev_device *mdev = private->mdev;
 	char *errstr = "request";
 
-	private->state = VFIO_CCW_STATE_BUSY;
-
+	private->state = VFIO_CCW_STATE_CP_PROCESSING;
 	memcpy(scsw, io_region->scsw_area, sizeof(*scsw));
 
 	if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
@@ -181,7 +186,6 @@ static void fsm_io_request(struct vfio_ccw_private *private,
 	}
 
 err_out:
-	private->state = VFIO_CCW_STATE_IDLE;
 	trace_vfio_ccw_io_fctl(scsw->cmd.fctl, get_schid(private),
 			       io_region->ret_code, errstr);
 }
@@ -221,7 +225,12 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
-	[VFIO_CCW_STATE_BUSY] = {
+	[VFIO_CCW_STATE_CP_PROCESSING] = {
+		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
+		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_retry,
+		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
+	},
+	[VFIO_CCW_STATE_CP_PENDING] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index f673e106c0415..3fdcc6dfe0bf0 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -193,8 +193,6 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
 		return -EINVAL;
 
 	private = dev_get_drvdata(mdev_parent_dev(mdev));
-	if (private->state != VFIO_CCW_STATE_IDLE)
-		return -EACCES;
 
 	region = private->io_region;
 	if (copy_from_user((void *)region + *ppos, buf, count))
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 08e9a7dc9176b..50c52efb4fcb8 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -63,7 +63,8 @@ enum vfio_ccw_state {
 	VFIO_CCW_STATE_NOT_OPER,
 	VFIO_CCW_STATE_STANDBY,
 	VFIO_CCW_STATE_IDLE,
-	VFIO_CCW_STATE_BUSY,
+	VFIO_CCW_STATE_CP_PROCESSING,
+	VFIO_CCW_STATE_CP_PENDING,
 	/* last element! */
 	NR_VFIO_CCW_STATES
 };
-- 
GitLab


From 4f76617378ee97c557b526cb58d3c61eb0a9c963 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Tue, 8 Jan 2019 15:53:03 +0100
Subject: [PATCH 1484/1861] vfio-ccw: protect the I/O region

Introduce a mutex to disallow concurrent reads or writes to the
I/O region. This makes sure that the data the kernel or user
space see is always consistent.

The same mutex will be used to protect the async region as well.

Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c     |  3 +++
 drivers/s390/cio/vfio_ccw_ops.c     | 28 +++++++++++++++++++---------
 drivers/s390/cio/vfio_ccw_private.h |  2 ++
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 0b3b9de45c602..5ea0da1dd9546 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -84,7 +84,9 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
 		if (is_final)
 			cp_free(&private->cp);
 	}
+	mutex_lock(&private->io_mutex);
 	memcpy(private->io_region->irb_area, irb, sizeof(*irb));
+	mutex_unlock(&private->io_mutex);
 
 	if (private->io_trigger)
 		eventfd_signal(private->io_trigger, 1);
@@ -129,6 +131,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 
 	private->sch = sch;
 	dev_set_drvdata(&sch->dev, private);
+	mutex_init(&private->io_mutex);
 
 	spin_lock_irq(sch->lock);
 	private->state = VFIO_CCW_STATE_NOT_OPER;
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 3fdcc6dfe0bf0..025c8a832bc86 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -169,16 +169,20 @@ static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
 {
 	struct vfio_ccw_private *private;
 	struct ccw_io_region *region;
+	int ret;
 
 	if (*ppos + count > sizeof(*region))
 		return -EINVAL;
 
 	private = dev_get_drvdata(mdev_parent_dev(mdev));
+	mutex_lock(&private->io_mutex);
 	region = private->io_region;
 	if (copy_to_user(buf, (void *)region + *ppos, count))
-		return -EFAULT;
-
-	return count;
+		ret = -EFAULT;
+	else
+		ret = count;
+	mutex_unlock(&private->io_mutex);
+	return ret;
 }
 
 static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
@@ -188,23 +192,29 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
 {
 	struct vfio_ccw_private *private;
 	struct ccw_io_region *region;
+	int ret;
 
 	if (*ppos + count > sizeof(*region))
 		return -EINVAL;
 
 	private = dev_get_drvdata(mdev_parent_dev(mdev));
+	if (!mutex_trylock(&private->io_mutex))
+		return -EAGAIN;
 
 	region = private->io_region;
-	if (copy_from_user((void *)region + *ppos, buf, count))
-		return -EFAULT;
+	if (copy_from_user((void *)region + *ppos, buf, count)) {
+		ret = -EFAULT;
+		goto out_unlock;
+	}
 
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
-	if (region->ret_code != 0) {
+	if (region->ret_code != 0)
 		private->state = VFIO_CCW_STATE_IDLE;
-		return region->ret_code;
-	}
+	ret = (region->ret_code != 0) ? region->ret_code : count;
 
-	return count;
+out_unlock:
+	mutex_unlock(&private->io_mutex);
+	return ret;
 }
 
 static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 50c52efb4fcb8..32173cbd838de 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -28,6 +28,7 @@
  * @mdev: pointer to the mediated device
  * @nb: notifier for vfio events
  * @io_region: MMIO region to input/output I/O arguments/results
+ * @io_mutex: protect against concurrent update of I/O regions
  * @cp: channel program for the current I/O operation
  * @irb: irb info received from interrupt
  * @scsw: scsw info
@@ -42,6 +43,7 @@ struct vfio_ccw_private {
 	struct mdev_device	*mdev;
 	struct notifier_block	nb;
 	struct ccw_io_region	*io_region;
+	struct mutex		io_mutex;
 
 	struct channel_program	cp;
 	struct irb		irb;
-- 
GitLab


From db8e5d17ac03a65e2e0ee0ba50bf61a99741d871 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Thu, 19 Jul 2018 17:53:08 +0200
Subject: [PATCH 1485/1861] vfio-ccw: add capabilities chain

Allow to extend the regions used by vfio-ccw. The first user will be
handling of halt and clear subchannel.

Reviewed-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_ops.c     | 187 ++++++++++++++++++++++++----
 drivers/s390/cio/vfio_ccw_private.h |  38 ++++++
 include/uapi/linux/vfio.h           |   2 +
 3 files changed, 201 insertions(+), 26 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 025c8a832bc86..5b989faa011f9 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -3,13 +3,17 @@
  * Physical device callbacks for vfio_ccw
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2019
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #include <linux/vfio.h>
 #include <linux/mdev.h>
+#include <linux/nospec.h>
+#include <linux/slab.h>
 
 #include "vfio_ccw_private.h"
 
@@ -157,27 +161,33 @@ static void vfio_ccw_mdev_release(struct mdev_device *mdev)
 {
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
+	int i;
 
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
 				 &private->nb);
+
+	for (i = 0; i < private->num_regions; i++)
+		private->region[i].ops->release(private, &private->region[i]);
+
+	private->num_regions = 0;
+	kfree(private->region);
+	private->region = NULL;
 }
 
-static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
-				  char __user *buf,
-				  size_t count,
-				  loff_t *ppos)
+static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
+					    char __user *buf, size_t count,
+					    loff_t *ppos)
 {
-	struct vfio_ccw_private *private;
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
 	struct ccw_io_region *region;
 	int ret;
 
-	if (*ppos + count > sizeof(*region))
+	if (pos + count > sizeof(*region))
 		return -EINVAL;
 
-	private = dev_get_drvdata(mdev_parent_dev(mdev));
 	mutex_lock(&private->io_mutex);
 	region = private->io_region;
-	if (copy_to_user(buf, (void *)region + *ppos, count))
+	if (copy_to_user(buf, (void *)region + pos, count))
 		ret = -EFAULT;
 	else
 		ret = count;
@@ -185,24 +195,47 @@ static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
 	return ret;
 }
 
-static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
-				   const char __user *buf,
-				   size_t count,
-				   loff_t *ppos)
+static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
+				  char __user *buf,
+				  size_t count,
+				  loff_t *ppos)
 {
+	unsigned int index = VFIO_CCW_OFFSET_TO_INDEX(*ppos);
 	struct vfio_ccw_private *private;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
+
+	if (index >= VFIO_CCW_NUM_REGIONS + private->num_regions)
+		return -EINVAL;
+
+	switch (index) {
+	case VFIO_CCW_CONFIG_REGION_INDEX:
+		return vfio_ccw_mdev_read_io_region(private, buf, count, ppos);
+	default:
+		index -= VFIO_CCW_NUM_REGIONS;
+		return private->region[index].ops->read(private, buf, count,
+							ppos);
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t vfio_ccw_mdev_write_io_region(struct vfio_ccw_private *private,
+					     const char __user *buf,
+					     size_t count, loff_t *ppos)
+{
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
 	struct ccw_io_region *region;
 	int ret;
 
-	if (*ppos + count > sizeof(*region))
+	if (pos + count > sizeof(*region))
 		return -EINVAL;
 
-	private = dev_get_drvdata(mdev_parent_dev(mdev));
 	if (!mutex_trylock(&private->io_mutex))
 		return -EAGAIN;
 
 	region = private->io_region;
-	if (copy_from_user((void *)region + *ppos, buf, count)) {
+	if (copy_from_user((void *)region + pos, buf, count)) {
 		ret = -EFAULT;
 		goto out_unlock;
 	}
@@ -217,19 +250,52 @@ out_unlock:
 	return ret;
 }
 
-static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info)
+static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
+				   const char __user *buf,
+				   size_t count,
+				   loff_t *ppos)
 {
+	unsigned int index = VFIO_CCW_OFFSET_TO_INDEX(*ppos);
+	struct vfio_ccw_private *private;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
+
+	if (index >= VFIO_CCW_NUM_REGIONS + private->num_regions)
+		return -EINVAL;
+
+	switch (index) {
+	case VFIO_CCW_CONFIG_REGION_INDEX:
+		return vfio_ccw_mdev_write_io_region(private, buf, count, ppos);
+	default:
+		index -= VFIO_CCW_NUM_REGIONS;
+		return private->region[index].ops->write(private, buf, count,
+							 ppos);
+	}
+
+	return -EINVAL;
+}
+
+static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info,
+					 struct mdev_device *mdev)
+{
+	struct vfio_ccw_private *private;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
 	info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET;
-	info->num_regions = VFIO_CCW_NUM_REGIONS;
+	info->num_regions = VFIO_CCW_NUM_REGIONS + private->num_regions;
 	info->num_irqs = VFIO_CCW_NUM_IRQS;
 
 	return 0;
 }
 
 static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
-					 u16 *cap_type_id,
-					 void **cap_type)
+					 struct mdev_device *mdev,
+					 unsigned long arg)
 {
+	struct vfio_ccw_private *private;
+	int i;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
 	switch (info->index) {
 	case VFIO_CCW_CONFIG_REGION_INDEX:
 		info->offset = 0;
@@ -237,9 +303,55 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
 		info->flags = VFIO_REGION_INFO_FLAG_READ
 			      | VFIO_REGION_INFO_FLAG_WRITE;
 		return 0;
-	default:
-		return -EINVAL;
+	default: /* all other regions are handled via capability chain */
+	{
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		struct vfio_region_info_cap_type cap_type = {
+			.header.id = VFIO_REGION_INFO_CAP_TYPE,
+			.header.version = 1 };
+		int ret;
+
+		if (info->index >=
+		    VFIO_CCW_NUM_REGIONS + private->num_regions)
+			return -EINVAL;
+
+		info->index = array_index_nospec(info->index,
+						 VFIO_CCW_NUM_REGIONS +
+						 private->num_regions);
+
+		i = info->index - VFIO_CCW_NUM_REGIONS;
+
+		info->offset = VFIO_CCW_INDEX_TO_OFFSET(info->index);
+		info->size = private->region[i].size;
+		info->flags = private->region[i].flags;
+
+		cap_type.type = private->region[i].type;
+		cap_type.subtype = private->region[i].subtype;
+
+		ret = vfio_info_add_capability(&caps, &cap_type.header,
+					       sizeof(cap_type));
+		if (ret)
+			return ret;
+
+		info->flags |= VFIO_REGION_INFO_FLAG_CAPS;
+		if (info->argsz < sizeof(*info) + caps.size) {
+			info->argsz = sizeof(*info) + caps.size;
+			info->cap_offset = 0;
+		} else {
+			vfio_info_cap_shift(&caps, sizeof(*info));
+			if (copy_to_user((void __user *)arg + sizeof(*info),
+					 caps.buf, caps.size)) {
+				kfree(caps.buf);
+				return -EFAULT;
+			}
+			info->cap_offset = sizeof(*info);
+		}
+
+		kfree(caps.buf);
+
+	}
 	}
+	return 0;
 }
 
 static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
@@ -316,6 +428,32 @@ static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
 	}
 }
 
+int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
+				 unsigned int subtype,
+				 const struct vfio_ccw_regops *ops,
+				 size_t size, u32 flags, void *data)
+{
+	struct vfio_ccw_region *region;
+
+	region = krealloc(private->region,
+			  (private->num_regions + 1) * sizeof(*region),
+			  GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	private->region = region;
+	private->region[private->num_regions].type = VFIO_REGION_TYPE_CCW;
+	private->region[private->num_regions].subtype = subtype;
+	private->region[private->num_regions].ops = ops;
+	private->region[private->num_regions].size = size;
+	private->region[private->num_regions].flags = flags;
+	private->region[private->num_regions].data = data;
+
+	private->num_regions++;
+
+	return 0;
+}
+
 static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 				   unsigned int cmd,
 				   unsigned long arg)
@@ -336,7 +474,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 		if (info.argsz < minsz)
 			return -EINVAL;
 
-		ret = vfio_ccw_mdev_get_device_info(&info);
+		ret = vfio_ccw_mdev_get_device_info(&info, mdev);
 		if (ret)
 			return ret;
 
@@ -345,8 +483,6 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 	case VFIO_DEVICE_GET_REGION_INFO:
 	{
 		struct vfio_region_info info;
-		u16 cap_type_id = 0;
-		void *cap_type = NULL;
 
 		minsz = offsetofend(struct vfio_region_info, offset);
 
@@ -356,8 +492,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 		if (info.argsz < minsz)
 			return -EINVAL;
 
-		ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id,
-						    &cap_type);
+		ret = vfio_ccw_mdev_get_region_info(&info, mdev, arg);
 		if (ret)
 			return ret;
 
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 32173cbd838de..d888a25734708 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -3,9 +3,11 @@
  * Private stuff for vfio_ccw driver
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2019
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #ifndef _VFIO_CCW_PRIVATE_H_
@@ -19,6 +21,38 @@
 #include "css.h"
 #include "vfio_ccw_cp.h"
 
+#define VFIO_CCW_OFFSET_SHIFT   10
+#define VFIO_CCW_OFFSET_TO_INDEX(off)	(off >> VFIO_CCW_OFFSET_SHIFT)
+#define VFIO_CCW_INDEX_TO_OFFSET(index)	((u64)(index) << VFIO_CCW_OFFSET_SHIFT)
+#define VFIO_CCW_OFFSET_MASK	(((u64)(1) << VFIO_CCW_OFFSET_SHIFT) - 1)
+
+/* capability chain handling similar to vfio-pci */
+struct vfio_ccw_private;
+struct vfio_ccw_region;
+
+struct vfio_ccw_regops {
+	ssize_t	(*read)(struct vfio_ccw_private *private, char __user *buf,
+			size_t count, loff_t *ppos);
+	ssize_t	(*write)(struct vfio_ccw_private *private,
+			 const char __user *buf, size_t count, loff_t *ppos);
+	void	(*release)(struct vfio_ccw_private *private,
+			   struct vfio_ccw_region *region);
+};
+
+struct vfio_ccw_region {
+	u32				type;
+	u32				subtype;
+	const struct vfio_ccw_regops	*ops;
+	void				*data;
+	size_t				size;
+	u32				flags;
+};
+
+int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
+				 unsigned int subtype,
+				 const struct vfio_ccw_regops *ops,
+				 size_t size, u32 flags, void *data);
+
 /**
  * struct vfio_ccw_private
  * @sch: pointer to the subchannel
@@ -29,6 +63,8 @@
  * @nb: notifier for vfio events
  * @io_region: MMIO region to input/output I/O arguments/results
  * @io_mutex: protect against concurrent update of I/O regions
+ * @region: additional regions for other subchannel operations
+ * @num_regions: number of additional regions
  * @cp: channel program for the current I/O operation
  * @irb: irb info received from interrupt
  * @scsw: scsw info
@@ -44,6 +80,8 @@ struct vfio_ccw_private {
 	struct notifier_block	nb;
 	struct ccw_io_region	*io_region;
 	struct mutex		io_mutex;
+	struct vfio_ccw_region *region;
+	int num_regions;
 
 	struct channel_program	cp;
 	struct irb		irb;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 02bb7ad6e9864..56e2413d3e009 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -353,6 +353,8 @@ struct vfio_region_gfx_edid {
 #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
 };
 
+#define VFIO_REGION_TYPE_CCW			(2)
+
 /*
  * 10de vendor sub-type
  *
-- 
GitLab


From b0940857379eb930a4310625c4cd89254e4478b6 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Wed, 4 Apr 2018 13:03:38 +0200
Subject: [PATCH 1486/1861] s390/cio: export hsch to modules

The vfio-ccw code will need this, and it matches treatment of ssch
and csch.

Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/ioasm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c
index 14d328338ce28..08eb10283b180 100644
--- a/drivers/s390/cio/ioasm.c
+++ b/drivers/s390/cio/ioasm.c
@@ -233,6 +233,7 @@ int hsch(struct subchannel_id schid)
 
 	return ccode;
 }
+EXPORT_SYMBOL(hsch);
 
 static inline int __xsch(struct subchannel_id schid)
 {
-- 
GitLab


From d5afd5d135c8cc43bd2568361b4c91f0bd488c3f Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Mon, 23 Jul 2018 16:03:27 +0200
Subject: [PATCH 1487/1861] vfio-ccw: add handling for async channel
 instructions

Add a region to the vfio-ccw device that can be used to submit
asynchronous I/O instructions. ssch continues to be handled by the
existing I/O region; the new region handles hsch and csch.

Interrupt status continues to be reported through the same channels
as for ssch.

Acked-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/Makefile           |   3 +-
 drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++++++
 drivers/s390/cio/vfio_ccw_drv.c     |  46 ++++++++---
 drivers/s390/cio/vfio_ccw_fsm.c     | 119 +++++++++++++++++++++++++++-
 drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
 drivers/s390/cio/vfio_ccw_private.h |   5 ++
 include/uapi/linux/vfio.h           |   2 +
 include/uapi/linux/vfio_ccw.h       |  12 +++
 8 files changed, 270 insertions(+), 18 deletions(-)
 create mode 100644 drivers/s390/cio/vfio_ccw_async.c

diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index f230516abb96d..f6a8db04177c6 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
 qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
 obj-$(CONFIG_QDIO) += qdio.o
 
-vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
+vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
+	vfio_ccw_async.o
 obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
new file mode 100644
index 0000000000000..8c1d2357ef5b8
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_async.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Async I/O region for vfio_ccw
+ *
+ * Copyright Red Hat, Inc. 2019
+ *
+ * Author(s): Cornelia Huck <cohuck@redhat.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "vfio_ccw_private.h"
+
+static ssize_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
+					  char __user *buf, size_t count,
+					  loff_t *ppos)
+{
+	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
+	struct ccw_cmd_region *region;
+	int ret;
+
+	if (pos + count > sizeof(*region))
+		return -EINVAL;
+
+	mutex_lock(&private->io_mutex);
+	region = private->region[i].data;
+	if (copy_to_user(buf, (void *)region + pos, count))
+		ret = -EFAULT;
+	else
+		ret = count;
+	mutex_unlock(&private->io_mutex);
+	return ret;
+}
+
+static ssize_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
+					   const char __user *buf, size_t count,
+					   loff_t *ppos)
+{
+	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
+	struct ccw_cmd_region *region;
+	int ret;
+
+	if (pos + count > sizeof(*region))
+		return -EINVAL;
+
+	if (!mutex_trylock(&private->io_mutex))
+		return -EAGAIN;
+
+	region = private->region[i].data;
+	if (copy_from_user((void *)region + pos, buf, count)) {
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_ASYNC_REQ);
+
+	ret = region->ret_code ? region->ret_code : count;
+
+out_unlock:
+	mutex_unlock(&private->io_mutex);
+	return ret;
+}
+
+static void vfio_ccw_async_region_release(struct vfio_ccw_private *private,
+					  struct vfio_ccw_region *region)
+{
+
+}
+
+const struct vfio_ccw_regops vfio_ccw_async_region_ops = {
+	.read = vfio_ccw_async_region_read,
+	.write = vfio_ccw_async_region_write,
+	.release = vfio_ccw_async_region_release,
+};
+
+int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private)
+{
+	return vfio_ccw_register_dev_region(private,
+					    VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD,
+					    &vfio_ccw_async_region_ops,
+					    sizeof(struct ccw_cmd_region),
+					    VFIO_REGION_INFO_FLAG_READ |
+					    VFIO_REGION_INFO_FLAG_WRITE,
+					    private->cmd_region);
+}
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 5ea0da1dd9546..c39d01943a6a5 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -3,9 +3,11 @@
  * VFIO based Physical Subchannel device driver
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2019
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #include <linux/module.h>
@@ -23,6 +25,7 @@
 
 struct workqueue_struct *vfio_ccw_work_q;
 static struct kmem_cache *vfio_ccw_io_region;
+static struct kmem_cache *vfio_ccw_cmd_region;
 
 /*
  * Helpers
@@ -110,7 +113,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 {
 	struct pmcw *pmcw = &sch->schib.pmcw;
 	struct vfio_ccw_private *private;
-	int ret;
+	int ret = -ENOMEM;
 
 	if (pmcw->qf) {
 		dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
@@ -124,10 +127,13 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 
 	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
 					       GFP_KERNEL | GFP_DMA);
-	if (!private->io_region) {
-		kfree(private);
-		return -ENOMEM;
-	}
+	if (!private->io_region)
+		goto out_free;
+
+	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
+						GFP_KERNEL | GFP_DMA);
+	if (!private->cmd_region)
+		goto out_free;
 
 	private->sch = sch;
 	dev_set_drvdata(&sch->dev, private);
@@ -155,7 +161,10 @@ out_disable:
 	cio_disable_subchannel(sch);
 out_free:
 	dev_set_drvdata(&sch->dev, NULL);
-	kmem_cache_free(vfio_ccw_io_region, private->io_region);
+	if (private->cmd_region)
+		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
+	if (private->io_region)
+		kmem_cache_free(vfio_ccw_io_region, private->io_region);
 	kfree(private);
 	return ret;
 }
@@ -170,6 +179,7 @@ static int vfio_ccw_sch_remove(struct subchannel *sch)
 
 	dev_set_drvdata(&sch->dev, NULL);
 
+	kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
 	kmem_cache_free(vfio_ccw_io_region, private->io_region);
 	kfree(private);
 
@@ -244,7 +254,7 @@ static struct css_driver vfio_ccw_sch_driver = {
 
 static int __init vfio_ccw_sch_init(void)
 {
-	int ret;
+	int ret = -ENOMEM;
 
 	vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
 	if (!vfio_ccw_work_q)
@@ -254,20 +264,30 @@ static int __init vfio_ccw_sch_init(void)
 					sizeof(struct ccw_io_region), 0,
 					SLAB_ACCOUNT, 0,
 					sizeof(struct ccw_io_region), NULL);
-	if (!vfio_ccw_io_region) {
-		destroy_workqueue(vfio_ccw_work_q);
-		return -ENOMEM;
-	}
+	if (!vfio_ccw_io_region)
+		goto out_err;
+
+	vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region",
+					sizeof(struct ccw_cmd_region), 0,
+					SLAB_ACCOUNT, 0,
+					sizeof(struct ccw_cmd_region), NULL);
+	if (!vfio_ccw_cmd_region)
+		goto out_err;
 
 	isc_register(VFIO_CCW_ISC);
 	ret = css_driver_register(&vfio_ccw_sch_driver);
 	if (ret) {
 		isc_unregister(VFIO_CCW_ISC);
-		kmem_cache_destroy(vfio_ccw_io_region);
-		destroy_workqueue(vfio_ccw_work_q);
+		goto out_err;
 	}
 
 	return ret;
+
+out_err:
+	kmem_cache_destroy(vfio_ccw_cmd_region);
+	kmem_cache_destroy(vfio_ccw_io_region);
+	destroy_workqueue(vfio_ccw_work_q);
+	return ret;
 }
 
 static void __exit vfio_ccw_sch_exit(void)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index b4a141fbd1a89..49d9d3da02829 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -3,8 +3,10 @@
  * Finite state machine for vfio-ccw device handling
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2019
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #include <linux/vfio.h>
@@ -73,6 +75,75 @@ out:
 	return ret;
 }
 
+static int fsm_do_halt(struct vfio_ccw_private *private)
+{
+	struct subchannel *sch;
+	unsigned long flags;
+	int ccode;
+	int ret;
+
+	sch = private->sch;
+
+	spin_lock_irqsave(sch->lock, flags);
+
+	/* Issue "Halt Subchannel" */
+	ccode = hsch(sch->schid);
+
+	switch (ccode) {
+	case 0:
+		/*
+		 * Initialize device status information
+		 */
+		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
+		ret = 0;
+		break;
+	case 1:		/* Status pending */
+	case 2:		/* Busy */
+		ret = -EBUSY;
+		break;
+	case 3:		/* Device not operational */
+		ret = -ENODEV;
+		break;
+	default:
+		ret = ccode;
+	}
+	spin_unlock_irqrestore(sch->lock, flags);
+	return ret;
+}
+
+static int fsm_do_clear(struct vfio_ccw_private *private)
+{
+	struct subchannel *sch;
+	unsigned long flags;
+	int ccode;
+	int ret;
+
+	sch = private->sch;
+
+	spin_lock_irqsave(sch->lock, flags);
+
+	/* Issue "Clear Subchannel" */
+	ccode = csch(sch->schid);
+
+	switch (ccode) {
+	case 0:
+		/*
+		 * Initialize device status information
+		 */
+		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
+		/* TODO: check what else we might need to clear */
+		ret = 0;
+		break;
+	case 3:		/* Device not operational */
+		ret = -ENODEV;
+		break;
+	default:
+		ret = ccode;
+	}
+	spin_unlock_irqrestore(sch->lock, flags);
+	return ret;
+}
+
 static void fsm_notoper(struct vfio_ccw_private *private,
 			enum vfio_ccw_event event)
 {
@@ -113,6 +184,24 @@ static void fsm_io_retry(struct vfio_ccw_private *private,
 	private->io_region->ret_code = -EAGAIN;
 }
 
+static void fsm_async_error(struct vfio_ccw_private *private,
+			    enum vfio_ccw_event event)
+{
+	struct ccw_cmd_region *cmd_region = private->cmd_region;
+
+	pr_err("vfio-ccw: FSM: %s request from state:%d\n",
+	       cmd_region->command == VFIO_CCW_ASYNC_CMD_HSCH ? "halt" :
+	       cmd_region->command == VFIO_CCW_ASYNC_CMD_CSCH ? "clear" :
+	       "<unknown>", private->state);
+	cmd_region->ret_code = -EIO;
+}
+
+static void fsm_async_retry(struct vfio_ccw_private *private,
+			    enum vfio_ccw_event event)
+{
+	private->cmd_region->ret_code = -EAGAIN;
+}
+
 static void fsm_disabled_irq(struct vfio_ccw_private *private,
 			     enum vfio_ccw_event event)
 {
@@ -176,11 +265,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
 		}
 		return;
 	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
-		/* XXX: Handle halt. */
+		/* halt is handled via the async cmd region */
 		io_region->ret_code = -EOPNOTSUPP;
 		goto err_out;
 	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
-		/* XXX: Handle clear. */
+		/* clear is handled via the async cmd region */
 		io_region->ret_code = -EOPNOTSUPP;
 		goto err_out;
 	}
@@ -190,6 +279,27 @@ err_out:
 			       io_region->ret_code, errstr);
 }
 
+/*
+ * Deal with an async request from userspace.
+ */
+static void fsm_async_request(struct vfio_ccw_private *private,
+			      enum vfio_ccw_event event)
+{
+	struct ccw_cmd_region *cmd_region = private->cmd_region;
+
+	switch (cmd_region->command) {
+	case VFIO_CCW_ASYNC_CMD_HSCH:
+		cmd_region->ret_code = fsm_do_halt(private);
+		break;
+	case VFIO_CCW_ASYNC_CMD_CSCH:
+		cmd_region->ret_code = fsm_do_clear(private);
+		break;
+	default:
+		/* should not happen? */
+		cmd_region->ret_code = -EINVAL;
+	}
+}
+
 /*
  * Got an interrupt for a normal io (state busy).
  */
@@ -213,26 +323,31 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
 	[VFIO_CCW_STATE_NOT_OPER] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
+		[VFIO_CCW_EVENT_ASYNC_REQ]	= fsm_async_error,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
 	},
 	[VFIO_CCW_STATE_STANDBY] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
+		[VFIO_CCW_EVENT_ASYNC_REQ]	= fsm_async_error,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_IDLE] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
+		[VFIO_CCW_EVENT_ASYNC_REQ]	= fsm_async_request,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_CP_PROCESSING] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_retry,
+		[VFIO_CCW_EVENT_ASYNC_REQ]	= fsm_async_retry,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_CP_PENDING] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
+		[VFIO_CCW_EVENT_ASYNC_REQ]	= fsm_async_request,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 };
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 5b989faa011f9..3e2802a4a43db 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -150,11 +150,20 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
 	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+	int ret;
 
 	private->nb.notifier_call = vfio_ccw_mdev_notifier;
 
-	return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
-				      &events, &private->nb);
+	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+				     &events, &private->nb);
+	if (ret)
+		return ret;
+
+	ret = vfio_ccw_register_async_dev_regions(private);
+	if (ret)
+		vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+					 &private->nb);
+	return ret;
 }
 
 static void vfio_ccw_mdev_release(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index d888a25734708..f1092c3dc1b11 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -53,6 +53,8 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
 				 const struct vfio_ccw_regops *ops,
 				 size_t size, u32 flags, void *data);
 
+int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private);
+
 /**
  * struct vfio_ccw_private
  * @sch: pointer to the subchannel
@@ -64,6 +66,7 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
  * @io_region: MMIO region to input/output I/O arguments/results
  * @io_mutex: protect against concurrent update of I/O regions
  * @region: additional regions for other subchannel operations
+ * @cmd_region: MMIO region for asynchronous I/O commands other than START
  * @num_regions: number of additional regions
  * @cp: channel program for the current I/O operation
  * @irb: irb info received from interrupt
@@ -81,6 +84,7 @@ struct vfio_ccw_private {
 	struct ccw_io_region	*io_region;
 	struct mutex		io_mutex;
 	struct vfio_ccw_region *region;
+	struct ccw_cmd_region	*cmd_region;
 	int num_regions;
 
 	struct channel_program	cp;
@@ -116,6 +120,7 @@ enum vfio_ccw_event {
 	VFIO_CCW_EVENT_NOT_OPER,
 	VFIO_CCW_EVENT_IO_REQ,
 	VFIO_CCW_EVENT_INTERRUPT,
+	VFIO_CCW_EVENT_ASYNC_REQ,
 	/* last element! */
 	NR_VFIO_CCW_EVENTS
 };
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 56e2413d3e009..8f10748dac79a 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -354,6 +354,8 @@ struct vfio_region_gfx_edid {
 };
 
 #define VFIO_REGION_TYPE_CCW			(2)
+/* ccw sub-types */
+#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
 
 /*
  * 10de vendor sub-type
diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
index 2ec5f367ff78f..cbecbf0cd54f4 100644
--- a/include/uapi/linux/vfio_ccw.h
+++ b/include/uapi/linux/vfio_ccw.h
@@ -12,6 +12,7 @@
 
 #include <linux/types.h>
 
+/* used for START SUBCHANNEL, always present */
 struct ccw_io_region {
 #define ORB_AREA_SIZE 12
 	__u8	orb_area[ORB_AREA_SIZE];
@@ -22,4 +23,15 @@ struct ccw_io_region {
 	__u32	ret_code;
 } __packed;
 
+/*
+ * used for processing commands that trigger asynchronous actions
+ * Note: this is controlled by a capability
+ */
+#define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
+#define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
+struct ccw_cmd_region {
+	__u32 command;
+	__u32 ret_code;
+} __packed;
+
 #endif
-- 
GitLab


From cea5dde42a83b5f0a039da672f8686455936b8d8 Mon Sep 17 00:00:00 2001
From: Farhan Ali <alifm@linux.ibm.com>
Date: Mon, 8 Apr 2019 17:05:31 -0400
Subject: [PATCH 1488/1861] vfio-ccw: Do not call flush_workqueue while holding
 the spinlock

Currently we call flush_workqueue while holding the subchannel
spinlock. But flush_workqueue function can go to sleep, so
do not call the function while holding the spinlock.

Fixes the following bug:

[  285.203430] BUG: scheduling while atomic: bash/14193/0x00000002
[  285.203434] INFO: lockdep is turned off.
....
[  285.203485] Preemption disabled at:
[  285.203488] [<000003ff80243e5c>] vfio_ccw_sch_quiesce+0xbc/0x120 [vfio_ccw]
[  285.203496] CPU: 7 PID: 14193 Comm: bash Tainted: G        W
....
[  285.203504] Call Trace:
[  285.203510] ([<0000000000113772>] show_stack+0x82/0xd0)
[  285.203514]  [<0000000000b7a102>] dump_stack+0x92/0xd0
[  285.203518]  [<000000000017b8be>] __schedule_bug+0xde/0xf8
[  285.203524]  [<0000000000b95b5a>] __schedule+0x7a/0xc38
[  285.203528]  [<0000000000b9678a>] schedule+0x72/0xb0
[  285.203533]  [<0000000000b9bfbc>] schedule_timeout+0x34/0x528
[  285.203538]  [<0000000000b97608>] wait_for_common+0x118/0x1b0
[  285.203544]  [<0000000000166d6a>] flush_workqueue+0x182/0x548
[  285.203550]  [<000003ff80243e6e>] vfio_ccw_sch_quiesce+0xce/0x120 [vfio_ccw]
[  285.203556]  [<000003ff80245278>] vfio_ccw_mdev_reset+0x38/0x70 [vfio_ccw]
[  285.203562]  [<000003ff802458b0>] vfio_ccw_mdev_remove+0x40/0x78 [vfio_ccw]
[  285.203567]  [<000003ff801a499c>] mdev_device_remove_ops+0x3c/0x80 [mdev]
[  285.203573]  [<000003ff801a4d5c>] mdev_device_remove+0xc4/0x130 [mdev]
[  285.203578]  [<000003ff801a5074>] remove_store+0x6c/0xa8 [mdev]
[  285.203582]  [<000000000046f494>] kernfs_fop_write+0x14c/0x1f8
[  285.203588]  [<00000000003c1530>] __vfs_write+0x38/0x1a8
[  285.203593]  [<00000000003c187c>] vfs_write+0xb4/0x198
[  285.203597]  [<00000000003c1af2>] ksys_write+0x5a/0xb0
[  285.203601]  [<0000000000b9e270>] system_call+0xdc/0x2d8

Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
Message-Id: <626bab8bb2958ae132452e1ddaf1b20882ad5a9d.1554756534.git.alifm@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index c39d01943a6a5..d72aa8c760c57 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -57,9 +57,9 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch)
 
 			wait_for_completion_timeout(&completion, 3*HZ);
 
-			spin_lock_irq(sch->lock);
 			private->completion = NULL;
 			flush_workqueue(vfio_ccw_work_q);
+			spin_lock_irq(sch->lock);
 			ret = cio_cancel_halt_clear(sch, &iretry);
 		};
 
-- 
GitLab


From b49bdc8602b7c9c7a977758bee4125683f73e59f Mon Sep 17 00:00:00 2001
From: Farhan Ali <alifm@linux.ibm.com>
Date: Mon, 8 Apr 2019 17:05:33 -0400
Subject: [PATCH 1489/1861] vfio-ccw: Release any channel program when
 releasing/removing vfio-ccw mdev

When releasing the vfio-ccw mdev, we currently do not release
any existing channel program and its pinned pages. This can
lead to the following warning:

[1038876.561565] WARNING: CPU: 2 PID: 144727 at drivers/vfio/vfio_iommu_type1.c:1494 vfio_sanity_check_pfn_list+0x40/0x70 [vfio_iommu_type1]

....

1038876.561921] Call Trace:
[1038876.561935] ([<00000009897fb870>] 0x9897fb870)
[1038876.561949]  [<000003ff8013bf62>] vfio_iommu_type1_detach_group+0xda/0x2f0 [vfio_iommu_type1]
[1038876.561965]  [<000003ff8007b634>] __vfio_group_unset_container+0x64/0x190 [vfio]
[1038876.561978]  [<000003ff8007b87e>] vfio_group_put_external_user+0x26/0x38 [vfio]
[1038876.562024]  [<000003ff806fc608>] kvm_vfio_group_put_external_user+0x40/0x60 [kvm]
[1038876.562045]  [<000003ff806fcb9e>] kvm_vfio_destroy+0x5e/0xd0 [kvm]
[1038876.562065]  [<000003ff806f63fc>] kvm_put_kvm+0x2a4/0x3d0 [kvm]
[1038876.562083]  [<000003ff806f655e>] kvm_vm_release+0x36/0x48 [kvm]
[1038876.562098]  [<00000000003c2dc4>] __fput+0x144/0x228
[1038876.562113]  [<000000000016ee82>] task_work_run+0x8a/0xd8
[1038876.562125]  [<000000000014c7a8>] do_exit+0x5d8/0xd90
[1038876.562140]  [<000000000014d084>] do_group_exit+0xc4/0xc8
[1038876.562155]  [<000000000015c046>] get_signal+0x9ae/0xa68
[1038876.562169]  [<0000000000108d66>] do_signal+0x66/0x768
[1038876.562185]  [<0000000000b9e37e>] system_call+0x1ea/0x2d8
[1038876.562195] 2 locks held by qemu-system-s39/144727:
[1038876.562205]  #0: 00000000537abaf9 (&container->group_lock){++++}, at: __vfio_group_unset_container+0x3c/0x190 [vfio]
[1038876.562230]  #1: 00000000670008b5 (&iommu->lock){+.+.}, at: vfio_iommu_type1_detach_group+0x36/0x2f0 [vfio_iommu_type1]
[1038876.562250] Last Breaking-Event-Address:
[1038876.562262]  [<000003ff8013aa24>] vfio_sanity_check_pfn_list+0x3c/0x70 [vfio_iommu_type1]
[1038876.562272] irq event stamp: 4236481
[1038876.562287] hardirqs last  enabled at (4236489): [<00000000001cee7a>] console_unlock+0x6d2/0x740
[1038876.562299] hardirqs last disabled at (4236496): [<00000000001ce87e>] console_unlock+0xd6/0x740
[1038876.562311] softirqs last  enabled at (4234162): [<0000000000b9fa1e>] __do_softirq+0x556/0x598
[1038876.562325] softirqs last disabled at (4234153): [<000000000014e4cc>] irq_exit+0xac/0x108
[1038876.562337] ---[ end trace 6c96d467b1c3ca06 ]---

Similarly we do not free the channel program when we are removing
the vfio-ccw device. Let's fix this by resetting the device and freeing
the channel program and pinned pages in the release path. For the remove
path we can just quiesce the device, since in the remove path the mediated
device is going away for good and so we don't need to do a full reset.

Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
Message-Id: <ae9f20dc8873f2027f7b3c5d2aaa0bdfe06850b8.1554756534.git.alifm@linux.ibm.com>
Acked-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_ops.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 3e2802a4a43db..5eb61116ca6fb 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -134,11 +134,12 @@ static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
 
 	if ((private->state != VFIO_CCW_STATE_NOT_OPER) &&
 	    (private->state != VFIO_CCW_STATE_STANDBY)) {
-		if (!vfio_ccw_mdev_reset(mdev))
+		if (!vfio_ccw_sch_quiesce(private->sch))
 			private->state = VFIO_CCW_STATE_STANDBY;
 		/* The state will be NOT_OPER on error. */
 	}
 
+	cp_free(&private->cp);
 	private->mdev = NULL;
 	atomic_inc(&private->avail);
 
@@ -172,6 +173,14 @@ static void vfio_ccw_mdev_release(struct mdev_device *mdev)
 		dev_get_drvdata(mdev_parent_dev(mdev));
 	int i;
 
+	if ((private->state != VFIO_CCW_STATE_NOT_OPER) &&
+	    (private->state != VFIO_CCW_STATE_STANDBY)) {
+		if (!vfio_ccw_mdev_reset(mdev))
+			private->state = VFIO_CCW_STATE_STANDBY;
+		/* The state will be NOT_OPER on error. */
+	}
+
+	cp_free(&private->cp);
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
 				 &private->nb);
 
-- 
GitLab


From d1ffa760d22aa1d8190478e5ef555c59a771db27 Mon Sep 17 00:00:00 2001
From: Farhan Ali <alifm@linux.ibm.com>
Date: Tue, 16 Apr 2019 17:23:14 -0400
Subject: [PATCH 1490/1861] vfio-ccw: Prevent quiesce function going into an
 infinite loop

The quiesce function calls cio_cancel_halt_clear() and if we
get an -EBUSY we go into a loop where we:
	- wait for any interrupts
	- flush all I/O in the workqueue
	- retry cio_cancel_halt_clear

During the period where we are waiting for interrupts or
flushing all I/O, the channel subsystem could have completed
a halt/clear action and turned off the corresponding activity
control bits in the subchannel status word. This means the next
time we call cio_cancel_halt_clear(), we will again start by
calling cancel subchannel and so we can be stuck between calling
cancel and halt forever.

Rather than calling cio_cancel_halt_clear() immediately after
waiting, let's try to disable the subchannel. If we succeed in
disabling the subchannel then we know nothing else can happen
with the device.

Suggested-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
Message-Id: <4d5a4b98ab1b41ac6131b5c36de18b76c5d66898.1555449329.git.alifm@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index d72aa8c760c57..ee8767f5845ae 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -43,26 +43,30 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch)
 	if (ret != -EBUSY)
 		goto out_unlock;
 
+	iretry = 255;
 	do {
-		iretry = 255;
 
 		ret = cio_cancel_halt_clear(sch, &iretry);
-		while (ret == -EBUSY) {
-			/*
-			 * Flush all I/O and wait for
-			 * cancel/halt/clear completion.
-			 */
-			private->completion = &completion;
-			spin_unlock_irq(sch->lock);
 
-			wait_for_completion_timeout(&completion, 3*HZ);
+		if (ret == -EIO) {
+			pr_err("vfio_ccw: could not quiesce subchannel 0.%x.%04x!\n",
+			       sch->schid.ssid, sch->schid.sch_no);
+			break;
+		}
+
+		/*
+		 * Flush all I/O and wait for
+		 * cancel/halt/clear completion.
+		 */
+		private->completion = &completion;
+		spin_unlock_irq(sch->lock);
 
-			private->completion = NULL;
-			flush_workqueue(vfio_ccw_work_q);
-			spin_lock_irq(sch->lock);
-			ret = cio_cancel_halt_clear(sch, &iretry);
-		};
+		if (ret == -EBUSY)
+			wait_for_completion_timeout(&completion, 3*HZ);
 
+		private->completion = NULL;
+		flush_workqueue(vfio_ccw_work_q);
+		spin_lock_irq(sch->lock);
 		ret = cio_disable_subchannel(sch);
 	} while (ret == -EBUSY);
 out_unlock:
-- 
GitLab


From e02bc29b2cfa7806830d6da8b2322cddd67e8dfe Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Date: Wed, 24 Apr 2019 11:04:13 +0200
Subject: [PATCH 1491/1861] drm/sun4i: Unbind components before releasing DRM
 and memory

Our components may still be using the DRM device driver (if only to
access our driver's private data), so make sure to unbind them before
the final drm_dev_put.

Also release our reserved memory after component unbind instead of
before to match reverse creation order.

Fixes: f5a9ed867c83 ("drm/sun4i: Fix component unbinding and component master deletion")
Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20190424090413.6918-1-paul.kocialkowski@bootlin.com
---
 drivers/gpu/drm/sun4i/sun4i_drv.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 843b86661833e..29258b404e549 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -149,10 +149,11 @@ static void sun4i_drv_unbind(struct device *dev)
 	drm_kms_helper_poll_fini(drm);
 	drm_atomic_helper_shutdown(drm);
 	drm_mode_config_cleanup(drm);
-	of_reserved_mem_device_release(dev);
-	drm_dev_put(drm);
 
 	component_unbind_all(dev, NULL);
+	of_reserved_mem_device_release(dev);
+
+	drm_dev_put(drm);
 }
 
 static const struct component_master_ops sun4i_drv_master_ops = {
-- 
GitLab


From 462ce5d963f18b71c63f6b7730a35a2ee5273540 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Wed, 24 Apr 2019 17:06:29 +0200
Subject: [PATCH 1492/1861] drm/vc4: Fix compilation error reported by kbuild
 test bot

A pointer to crtc was missing, resulting in the following build error:
drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse: sparse: incorrect type in argument 1 (different base types)
drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse:    expected struct drm_crtc *crtc
drivers/gpu/drm/vc4/vc4_crtc.c:1045:44: sparse:    got struct drm_crtc_state *state
drivers/gpu/drm/vc4/vc4_crtc.c:1045:39: sparse: sparse: not enough arguments for function vc4_crtc_destroy_state

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reported-by: kbuild test robot <lkp@intel.com>
Cc: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/2b6ed5e6-81b0-4276-8860-870b54ca3262@linux.intel.com
Fixes: d08106796a78 ("drm/vc4: Fix memory leak during gpu reset.")
Cc: <stable@vger.kernel.org> # v4.6+
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index e7c04a9eb219b..1baa10e944847 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -1042,7 +1042,7 @@ static void
 vc4_crtc_reset(struct drm_crtc *crtc)
 {
 	if (crtc->state)
-		vc4_crtc_destroy_state(crtc->state);
+		vc4_crtc_destroy_state(crtc, crtc->state);
 
 	crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
 	if (crtc->state)
-- 
GitLab


From c660133c339f9ab684fdf568c0d51b9ae5e86002 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 16 Apr 2019 14:07:25 +0300
Subject: [PATCH 1493/1861] RDMA/mlx5: Do not allow the user to write to the
 clock page

The intent of this VMA was to be read-only from user space, but the
VM_MAYWRITE masking was missed, so mprotect could make it writable.

Cc: stable@vger.kernel.org
Fixes: 5c99eaecb1fc ("IB/mlx5: Mmap the HCA's clock info to user-space")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 241d9ead79eb1..12a75dfc992b6 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2068,6 +2068,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
 
 	if (vma->vm_flags & VM_WRITE)
 		return -EPERM;
+	vma->vm_flags &= ~VM_MAYWRITE;
 
 	if (!dev->mdev->clock_info_page)
 		return -EOPNOTSUPP;
@@ -2233,6 +2234,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
 
 		if (vma->vm_flags & VM_WRITE)
 			return -EPERM;
+		vma->vm_flags &= ~VM_MAYWRITE;
 
 		/* Don't expose to user-space information it shouldn't have */
 		if (PAGE_SIZE > 4096)
-- 
GitLab


From d5e560d3f72382ac4e3bfe4e0f0420e6a220b039 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 16 Apr 2019 14:07:26 +0300
Subject: [PATCH 1494/1861] RDMA/mlx5: Use rdma_user_map_io for mapping BAR
 pages

Since mlx5 supports device disassociate it must use this API for all
BAR page mmaps, otherwise the pages can remain mapped after the device
is unplugged causing a system crash.

Cc: stable@vger.kernel.org
Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 12a75dfc992b6..d3dd290ae1b17 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2240,14 +2240,12 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
 		if (PAGE_SIZE > 4096)
 			return -EOPNOTSUPP;
 
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 		pfn = (dev->mdev->iseg_base +
 		       offsetof(struct mlx5_init_seg, internal_timer_h)) >>
 			PAGE_SHIFT;
-		if (io_remap_pfn_range(vma, vma->vm_start, pfn,
-				       PAGE_SIZE, vma->vm_page_prot))
-			return -EAGAIN;
-		break;
+		return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
+					 PAGE_SIZE,
+					 pgprot_noncached(vma->vm_page_prot));
 	case MLX5_IB_MMAP_CLOCK_INFO:
 		return mlx5_ib_mmap_clock_info_page(dev, vma, context);
 
-- 
GitLab


From f06eba72274788db6a43012a05a99915c0283aef Mon Sep 17 00:00:00 2001
From: Jacky Bai <ping.bai@nxp.com>
Date: Fri, 5 Apr 2019 10:31:09 -0700
Subject: [PATCH 1495/1861] Input: snvs_pwrkey - make it depend on ARCH_MXC

The SNVS power key is not only used on i.MX6SX and i.MX7D, it is also
used by i.MX6UL and NXP's latest ARMv8 based i.MX8M series SOC. So
update the config dependency to use ARCH_MXC, and add the COMPILE_TEST
too.

Signed-off-by: Jacky Bai <ping.bai@nxp.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index a878351f16439..52d7f55fca329 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -420,7 +420,7 @@ config KEYBOARD_MPR121
 
 config KEYBOARD_SNVS_PWRKEY
 	tristate "IMX SNVS Power Key Driver"
-	depends on SOC_IMX6SX || SOC_IMX7D
+	depends on ARCH_MXC || COMPILE_TEST
 	depends on OF
 	help
 	  This is the snvs powerkey driver for the Freescale i.MX application
-- 
GitLab


From 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 16 Apr 2019 14:07:28 +0300
Subject: [PATCH 1496/1861] RDMA/ucontext: Fix regression with disassociate

When this code was consolidated the intention was that the VMA would
become backed by anonymous zero pages after the zap_vma_pte - however this
very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED
bits to transform the VMA into an anonymous VMA. Since the vm_ops was
removed this broke.

Now userspace gets a SIGBUS if it touches the vma after disassociation.

Instead of converting the VMA to anonymous provide a fault handler that
puts a zero'd page into the VMA when user-space touches it after
disassociation.

Cc: stable@vger.kernel.org
Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs.h      |  1 +
 drivers/infiniband/core/uverbs_main.c | 52 +++++++++++++++++++++++++--
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index ea0bc6885517b..32cc8fe7902f1 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -160,6 +160,7 @@ struct ib_uverbs_file {
 
 	struct mutex umap_lock;
 	struct list_head umaps;
+	struct page *disassociate_page;
 
 	struct idr		idr;
 	/* spinlock protects write access to idr */
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 70b7d80431a9b..db20b6e0f253c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref)
 		kref_put(&file->async_file->ref,
 			 ib_uverbs_release_async_event_file);
 	put_device(&file->device->dev);
+
+	if (file->disassociate_page)
+		__free_pages(file->disassociate_page, 0);
 	kfree(file);
 }
 
@@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma)
 	kfree(priv);
 }
 
+/*
+ * Once the zap_vma_ptes has been called touches to the VMA will come here and
+ * we return a dummy writable zero page for all the pfns.
+ */
+static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
+{
+	struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
+	struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
+	vm_fault_t ret = 0;
+
+	if (!priv)
+		return VM_FAULT_SIGBUS;
+
+	/* Read only pages can just use the system zero page. */
+	if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
+		vmf->page = ZERO_PAGE(vmf->vm_start);
+		get_page(vmf->page);
+		return 0;
+	}
+
+	mutex_lock(&ufile->umap_lock);
+	if (!ufile->disassociate_page)
+		ufile->disassociate_page =
+			alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
+
+	if (ufile->disassociate_page) {
+		/*
+		 * This VMA is forced to always be shared so this doesn't have
+		 * to worry about COW.
+		 */
+		vmf->page = ufile->disassociate_page;
+		get_page(vmf->page);
+	} else {
+		ret = VM_FAULT_SIGBUS;
+	}
+	mutex_unlock(&ufile->umap_lock);
+
+	return ret;
+}
+
 static const struct vm_operations_struct rdma_umap_ops = {
 	.open = rdma_umap_open,
 	.close = rdma_umap_close,
+	.fault = rdma_umap_fault,
 };
 
 static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
@@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
 	struct ib_uverbs_file *ufile = ucontext->ufile;
 	struct rdma_umap_priv *priv;
 
+	if (!(vma->vm_flags & VM_SHARED))
+		return ERR_PTR(-EINVAL);
+
 	if (vma->vm_end - vma->vm_start != size)
 		return ERR_PTR(-EINVAL);
 
@@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 		 * at a time to get the lock ordering right. Typically there
 		 * will only be one mm, so no big deal.
 		 */
-		down_write(&mm->mmap_sem);
+		down_read(&mm->mmap_sem);
 		mutex_lock(&ufile->umap_lock);
 		list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
 					  list) {
@@ -1004,10 +1051,9 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 
 			zap_vma_ptes(vma, vma->vm_start,
 				     vma->vm_end - vma->vm_start);
-			vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
 		}
 		mutex_unlock(&ufile->umap_lock);
-		up_write(&mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 		mmput(mm);
 	}
 }
-- 
GitLab


From bce1a78423961fce676ac65540a31b6ffd179e6d Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Fri, 19 Apr 2019 07:39:00 +0000
Subject: [PATCH 1497/1861] Input: synaptics-rmi4 - fix possible double free

The RMI4 function structure has been released in rmi_register_function
if error occurs. However, it will be released again in the function
rmi_create_function, which may result in a double-free bug.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_driver.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index fc3ab93b7aea4..7fb358f961957 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -860,7 +860,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
 
 	error = rmi_register_function(fn);
 	if (error)
-		goto err_put_fn;
+		return error;
 
 	if (pdt->function_number == 0x01)
 		data->f01_container = fn;
@@ -870,10 +870,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
 	list_add_tail(&fn->node, &data->function_list);
 
 	return RMI_SCAN_CONTINUE;
-
-err_put_fn:
-	put_device(&fn->dev);
-	return error;
 }
 
 void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
-- 
GitLab


From 05fd5c2c61732152a6bddc318aae62d7e436629b Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Tue, 23 Apr 2019 16:39:45 +1000
Subject: [PATCH 1498/1861] cifs: fix memory leak in SMB2_read

Commit 088aaf17aa79300cab14dbee2569c58cfafd7d6e introduced a leak where
if SMB2_read() returned an error we would return without freeing the
request buffer.

Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b8f7262ac3541..a37774a55f3aa 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3466,6 +3466,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
 				    io_parms->tcon->tid, ses->Suid,
 				    io_parms->offset, 0);
 		free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+		cifs_small_buf_release(req);
 		return rc == -ENODATA ? 0 : rc;
 	} else
 		trace_smb3_read_done(xid, req->PersistentFileId,
-- 
GitLab


From 652727bbe1b17993636346716ae5867627793647 Mon Sep 17 00:00:00 2001
From: Frank Sorenson <sorenson@redhat.com>
Date: Tue, 16 Apr 2019 08:37:27 -0500
Subject: [PATCH 1499/1861] cifs: do not attempt cifs operation on smb2+ rename
 error

A path-based rename returning EBUSY will incorrectly try opening
the file with a cifs (NT Create AndX) operation on an smb2+ mount,
which causes the server to force a session close.

If the mount is smb2+, skip the fallback.

Signed-off-by: Frank Sorenson <sorenson@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/inode.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 53fdb5df0d2eb..538fd7d807e47 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
 	if (rc == 0 || rc != -EBUSY)
 		goto do_rename_exit;
 
+	/* Don't fall back to using SMB on SMB 2+ mount */
+	if (server->vals->protocol_id != 0)
+		goto do_rename_exit;
+
 	/* open-file renames don't work across directories */
 	if (to_dentry->d_parent != from_dentry->d_parent)
 		goto do_rename_exit;
-- 
GitLab


From 13f5938d8264b5501368523c4513ff26608a33e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= <jglisse@redhat.com>
Date: Wed, 10 Apr 2019 15:37:47 -0400
Subject: [PATCH 1500/1861] cifs: fix page reference leak with readv/writev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CIFS can leak pages reference gotten through GUP (get_user_pages*()
through iov_iter_get_pages()). This happen if cifs_send_async_read()
or cifs_write_from_iter() calls fail from within __cifs_readv() and
__cifs_writev() respectively. This patch move page unreference to
cifs_aio_ctx_release() which will happens on all code paths this is
all simpler to follow for correctness.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: linux-cifs@vger.kernel.org
Cc: samba-technical@lists.samba.org
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/file.c | 15 +--------------
 fs/cifs/misc.c | 23 ++++++++++++++++++++++-
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9c0ccc06d172e..7037a137fa533 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2877,7 +2877,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
 	struct cifs_tcon *tcon;
 	struct cifs_sb_info *cifs_sb;
 	struct dentry *dentry = ctx->cfile->dentry;
-	unsigned int i;
 	int rc;
 
 	tcon = tlink_tcon(ctx->cfile->tlink);
@@ -2941,10 +2940,6 @@ restart_loop:
 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
 	}
 
-	if (!ctx->direct_io)
-		for (i = 0; i < ctx->npages; i++)
-			put_page(ctx->bv[i].bv_page);
-
 	cifs_stats_bytes_written(tcon, ctx->total_len);
 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
 
@@ -3582,7 +3577,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
 	struct iov_iter *to = &ctx->iter;
 	struct cifs_sb_info *cifs_sb;
 	struct cifs_tcon *tcon;
-	unsigned int i;
 	int rc;
 
 	tcon = tlink_tcon(ctx->cfile->tlink);
@@ -3666,15 +3660,8 @@ again:
 		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
 	}
 
-	if (!ctx->direct_io) {
-		for (i = 0; i < ctx->npages; i++) {
-			if (ctx->should_dirty)
-				set_page_dirty(ctx->bv[i].bv_page);
-			put_page(ctx->bv[i].bv_page);
-		}
-
+	if (!ctx->direct_io)
 		ctx->total_len = ctx->len - iov_iter_count(to);
-	}
 
 	/* mask nodata case */
 	if (rc == -ENODATA)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1e1626a2cfc39..0dc6f08020acb 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -789,6 +789,11 @@ cifs_aio_ctx_alloc(void)
 {
 	struct cifs_aio_ctx *ctx;
 
+	/*
+	 * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
+	 * to false so that we know when we have to unreference pages within
+	 * cifs_aio_ctx_release()
+	 */
 	ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
 	if (!ctx)
 		return NULL;
@@ -807,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount)
 					struct cifs_aio_ctx, refcount);
 
 	cifsFileInfo_put(ctx->cfile);
-	kvfree(ctx->bv);
+
+	/*
+	 * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
+	 * which means that iov_iter_get_pages() was a success and thus that
+	 * we have taken reference on pages.
+	 */
+	if (ctx->bv) {
+		unsigned i;
+
+		for (i = 0; i < ctx->npages; i++) {
+			if (ctx->should_dirty)
+				set_page_dirty(ctx->bv[i].bv_page);
+			put_page(ctx->bv[i].bv_page);
+		}
+		kvfree(ctx->bv);
+	}
+
 	kfree(ctx);
 }
 
-- 
GitLab


From 22e8860cf8f777fbf6a83f2fb7127f682a8e9de4 Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kjlu@umn.edu>
Date: Sun, 24 Mar 2019 18:18:56 -0500
Subject: [PATCH 1501/1861] net: ieee802154: fix missing checks for
 regmap_update_bits

regmap_update_bits could fail and deserves a check.

The patch adds the checks and if it fails, returns its error
code upstream.

Signed-off-by: Kangjie Lu <kjlu@umn.edu>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/mcr20a.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index c589f5ae75bb5..8bb53ec8d9cf2 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -533,6 +533,8 @@ mcr20a_start(struct ieee802154_hw *hw)
 	dev_dbg(printdev(lp), "no slotted operation\n");
 	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
 				 DAR_PHY_CTRL1_SLOTTED, 0x0);
+	if (ret < 0)
+		return ret;
 
 	/* enable irq */
 	enable_irq(lp->spi->irq);
@@ -540,11 +542,15 @@ mcr20a_start(struct ieee802154_hw *hw)
 	/* Unmask SEQ interrupt */
 	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
 				 DAR_PHY_CTRL2_SEQMSK, 0x0);
+	if (ret < 0)
+		return ret;
 
 	/* Start the RX sequence */
 	dev_dbg(printdev(lp), "start the RX sequence\n");
 	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
 				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+	if (ret < 0)
+		return ret;
 
 	return 0;
 }
-- 
GitLab


From 6819e3f6d83a24777813b0d031ebe0861694db5a Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 20 Apr 2019 12:09:39 +0800
Subject: [PATCH 1502/1861] net: vrf: Fix operation not supported when set vrf
 mac

Vrf device is not able to change mac address now because lack of
ndo_set_mac_address. Complete this in case some apps need to do
this.

Reported-by: Hui Wang <wanghui104@huawei.com>
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index cd15c32b2e436..9ee4d7402ca23 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -875,6 +875,7 @@ static const struct net_device_ops vrf_netdev_ops = {
 	.ndo_init		= vrf_dev_init,
 	.ndo_uninit		= vrf_dev_uninit,
 	.ndo_start_xmit		= vrf_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_get_stats64	= vrf_get_stats64,
 	.ndo_add_slave		= vrf_add_slave,
 	.ndo_del_slave		= vrf_del_slave,
@@ -1274,6 +1275,7 @@ static void vrf_setup(struct net_device *dev)
 	/* default to no qdisc; user can add if desired */
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->priv_flags |= IFF_NO_RX_HANDLER;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	/* VRF devices do not care about MTU, but if the MTU is set
 	 * too low then the ipv4 and ipv6 protocols are disabled
-- 
GitLab


From 4b9fc7146249a6e0e3175d0acc033fdcd2bfcb17 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Wed, 24 Apr 2019 02:56:42 -0400
Subject: [PATCH 1503/1861] net: rds: exchange of 8K and 1M pool

Before the commit 490ea5967b0d ("RDS: IB: move FMR code to its own file"),
when the dirty_count is greater than 9/10 of max_items of 8K pool,
1M pool is used, Vice versa. After the commit 490ea5967b0d ("RDS: IB: move
FMR code to its own file"), the above is removed. When we make the
following tests.

Server:
  rds-stress -r 1.1.1.16 -D 1M

Client:
  rds-stress -r 1.1.1.14 -s 1.1.1.16 -D 1M

The following will appear.
"
connecting to 1.1.1.16:4000
negotiated options, tasks will start in 2 seconds
Starting up..header from 1.1.1.166:4001 to id 4001 bogus
..
tsks  tx/s  rx/s tx+rx K/s  mbi K/s  mbo K/s tx us/c  rtt us
cpu %
   1    0    0     0.00     0.00     0.00    0.00 0.00 -1.00
   1    0    0     0.00     0.00     0.00    0.00 0.00 -1.00
   1    0    0     0.00     0.00     0.00    0.00 0.00 -1.00
   1    0    0     0.00     0.00     0.00    0.00 0.00 -1.00
   1    0    0     0.00     0.00     0.00    0.00 0.00 -1.00
...
"
So this exchange between 8K and 1M pool is added back.

Fixes: commit 490ea5967b0d ("RDS: IB: move FMR code to its own file")
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_fmr.c  | 11 +++++++++++
 net/rds/ib_rdma.c |  3 ---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 31cf37da4510c..93c0437e6a5fd 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
 	else
 		pool = rds_ibdev->mr_1m_pool;
 
+	if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
+		queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+
+	/* Switch pools if one of the pool is reaching upper limit */
+	if (atomic_read(&pool->dirty_count) >=  pool->max_items * 9 / 10) {
+		if (pool->pool_type == RDS_IB_MR_8K_POOL)
+			pool = rds_ibdev->mr_1m_pool;
+		else
+			pool = rds_ibdev->mr_8k_pool;
+	}
+
 	ibmr = rds_ib_try_reuse_ibmr(pool);
 	if (ibmr)
 		return ibmr;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 63c8d107adcfb..d664e9ade74de 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
 	struct rds_ib_mr *ibmr = NULL;
 	int iter = 0;
 
-	if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
-		queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
-
 	while (1) {
 		ibmr = rds_ib_reuse_mr(pool);
 		if (ibmr)
-- 
GitLab


From 52fb56f7b5a9cc23a07b2c237bad91180263a492 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Tue, 23 Apr 2019 15:00:24 -0500
Subject: [PATCH 1504/1861] MAINTAINERS: LEDs: Add designated reviewer for LED
 subsystem

Add a designated reviewer for the LED subsystem as there
are already two maintainers assigned.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e17ebf70b5480..d11bc7a44294c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8698,6 +8698,7 @@ F:	scripts/leaking_addresses.pl
 LED SUBSYSTEM
 M:	Jacek Anaszewski <jacek.anaszewski@gmail.com>
 M:	Pavel Machek <pavel@ucw.cz>
+R:	Dan Murphy <dmurphy@ti.com>
 L:	linux-leds@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
 S:	Maintained
-- 
GitLab


From 032be5f19a94de51093851757089133dcc1e92aa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Apr 2019 09:44:11 -0700
Subject: [PATCH 1505/1861] rxrpc: fix race condition in rxrpc_input_packet()

After commit 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook"),
rxrpc_input_packet() is directly called from lockless UDP receive
path, under rcu_read_lock() protection.

It must therefore use RCU rules :

- udp_sk->sk_user_data can be cleared at any point in this function.
  rcu_dereference_sk_user_data() is what we need here.

- Also, since sk_user_data might have been set in rxrpc_open_socket()
  we must observe a proper RCU grace period before kfree(local) in
  rxrpc_lookup_local()

v4: @local can be NULL in xrpc_lookup_local() as reported by kbuild test robot <lkp@intel.com>
        and Julia Lawall <julia.lawall@lip6.fr>, thanks !

v3,v2 : addressed David Howells feedback, thanks !

syzbot reported :

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 19236 Comm: syz-executor703 Not tainted 5.1.0-rc6 #79
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:__lock_acquire+0xbef/0x3fb0 kernel/locking/lockdep.c:3573
Code: 00 0f 85 a5 1f 00 00 48 81 c4 10 01 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 4a 21 00 00 49 81 7d 00 20 54 9c 89 0f 84 cf f4
RSP: 0018:ffff88809d7aef58 EFLAGS: 00010002
RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000000000000026 RSI: 0000000000000000 RDI: 0000000000000001
RBP: ffff88809d7af090 R08: 0000000000000001 R09: 0000000000000001
R10: ffffed1015d05bc7 R11: ffff888089428600 R12: 0000000000000000
R13: 0000000000000130 R14: 0000000000000001 R15: 0000000000000001
FS:  00007f059044d700(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004b6040 CR3: 00000000955ca000 CR4: 00000000001406f0
Call Trace:
 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:4211
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:152
 skb_queue_tail+0x26/0x150 net/core/skbuff.c:2972
 rxrpc_reject_packet net/rxrpc/input.c:1126 [inline]
 rxrpc_input_packet+0x4a0/0x5536 net/rxrpc/input.c:1414
 udp_queue_rcv_one_skb+0xaf2/0x1780 net/ipv4/udp.c:2011
 udp_queue_rcv_skb+0x128/0x730 net/ipv4/udp.c:2085
 udp_unicast_rcv_skb.isra.0+0xb9/0x360 net/ipv4/udp.c:2245
 __udp4_lib_rcv+0x701/0x2ca0 net/ipv4/udp.c:2301
 udp_rcv+0x22/0x30 net/ipv4/udp.c:2482
 ip_protocol_deliver_rcu+0x60/0x8f0 net/ipv4/ip_input.c:208
 ip_local_deliver_finish+0x23b/0x390 net/ipv4/ip_input.c:234
 NF_HOOK include/linux/netfilter.h:289 [inline]
 NF_HOOK include/linux/netfilter.h:283 [inline]
 ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:255
 dst_input include/net/dst.h:450 [inline]
 ip_rcv_finish+0x1e1/0x300 net/ipv4/ip_input.c:413
 NF_HOOK include/linux/netfilter.h:289 [inline]
 NF_HOOK include/linux/netfilter.h:283 [inline]
 ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:523
 __netif_receive_skb_one_core+0x115/0x1a0 net/core/dev.c:4987
 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5099
 netif_receive_skb_internal+0x117/0x660 net/core/dev.c:5202
 napi_frags_finish net/core/dev.c:5769 [inline]
 napi_gro_frags+0xade/0xd10 net/core/dev.c:5843
 tun_get_user+0x2f24/0x3fb0 drivers/net/tun.c:1981
 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2027
 call_write_iter include/linux/fs.h:1866 [inline]
 do_iter_readv_writev+0x5e1/0x8e0 fs/read_write.c:681
 do_iter_write fs/read_write.c:957 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:938
 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1002
 do_writev+0x15e/0x370 fs/read_write.c:1037
 __do_sys_writev fs/read_write.c:1110 [inline]
 __se_sys_writev fs/read_write.c:1107 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1107
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/input.c        | 12 ++++++++----
 net/rxrpc/local_object.c |  3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 4c6f9d0a00e79..c2c35cf4e3089 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1161,19 +1161,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
  * handle data received on the local endpoint
  * - may be called in interrupt context
  *
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
  *
  * Called with the RCU read lock held from the IP layer via UDP.
  */
 int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 {
+	struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
 	struct rxrpc_connection *conn;
 	struct rxrpc_channel *chan;
 	struct rxrpc_call *call = NULL;
 	struct rxrpc_skb_priv *sp;
-	struct rxrpc_local *local = udp_sk->sk_user_data;
 	struct rxrpc_peer *peer = NULL;
 	struct rxrpc_sock *rx = NULL;
 	unsigned int channel;
@@ -1181,6 +1181,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 
 	_enter("%p", udp_sk);
 
+	if (unlikely(!local)) {
+		kfree_skb(skb);
+		return 0;
+	}
 	if (skb->tstamp == 0)
 		skb->tstamp = ktime_get_real();
 
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 15cf42d5b53a5..01959db51445c 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -304,7 +304,8 @@ nomem:
 	ret = -ENOMEM;
 sock_error:
 	mutex_unlock(&rxnet->local_mutex);
-	kfree(local);
+	if (local)
+		call_rcu(&local->rcu, rxrpc_local_rcu);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 
-- 
GitLab


From 0453c682459583910d611a96de928f4442205493 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Apr 2019 05:35:00 -0700
Subject: [PATCH 1506/1861] net/rose: fix unbound loop in rose_loopback_timer()

This patch adds a limit on the number of skbs that fuzzers can queue
into loopback_queue. 1000 packets for rose loopback seems more than enough.

Then, since we now have multiple cpus in most linux hosts,
we also need to limit the number of skbs rose_loopback_timer()
can dequeue at each round.

rose_loopback_queue() can be drop-monitor friendly, calling
consume_skb() or kfree_skb() appropriately.

Finally, use mod_timer() instead of del_timer() + add_timer()

syzbot report was :

rcu: INFO: rcu_preempt self-detected stall on CPU
rcu:    0-...!: (10499 ticks this GP) idle=536/1/0x4000000000000002 softirq=103291/103291 fqs=34
rcu:     (t=10500 jiffies g=140321 q=323)
rcu: rcu_preempt kthread starved for 10426 jiffies! g140321 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=1
rcu: RCU grace-period kthread stack dump:
rcu_preempt     I29168    10      2 0x80000000
Call Trace:
 context_switch kernel/sched/core.c:2877 [inline]
 __schedule+0x813/0x1cc0 kernel/sched/core.c:3518
 schedule+0x92/0x180 kernel/sched/core.c:3562
 schedule_timeout+0x4db/0xfd0 kernel/time/timer.c:1803
 rcu_gp_fqs_loop kernel/rcu/tree.c:1971 [inline]
 rcu_gp_kthread+0x962/0x17b0 kernel/rcu/tree.c:2128
 kthread+0x357/0x430 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352
NMI backtrace for cpu 0
CPU: 0 PID: 7632 Comm: kworker/0:4 Not tainted 5.1.0-rc5+ #172
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: events iterate_cleanup_work
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 nmi_cpu_backtrace.cold+0x63/0xa4 lib/nmi_backtrace.c:101
 nmi_trigger_cpumask_backtrace+0x1be/0x236 lib/nmi_backtrace.c:62
 arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline]
 rcu_dump_cpu_stacks+0x183/0x1cf kernel/rcu/tree.c:1223
 print_cpu_stall kernel/rcu/tree.c:1360 [inline]
 check_cpu_stall kernel/rcu/tree.c:1434 [inline]
 rcu_pending kernel/rcu/tree.c:3103 [inline]
 rcu_sched_clock_irq.cold+0x500/0xa4a kernel/rcu/tree.c:2544
 update_process_times+0x32/0x80 kernel/time/timer.c:1635
 tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:161
 tick_sched_timer+0x47/0x130 kernel/time/tick-sched.c:1271
 __run_hrtimer kernel/time/hrtimer.c:1389 [inline]
 __hrtimer_run_queues+0x33e/0xde0 kernel/time/hrtimer.c:1451
 hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509
 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1035 [inline]
 smp_apic_timer_interrupt+0x120/0x570 arch/x86/kernel/apic/apic.c:1060
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50 kernel/kcov.c:95
Code: 89 25 b4 6e ec 08 41 bc f4 ff ff ff e8 cd 5d ea ff 48 c7 05 9e 6e ec 08 00 00 00 00 e9 a4 e9 ff ff 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 48 8b 75 08 65 48 8b 04 25 00 ee 01 00 65 8b 15 c8 60
RSP: 0018:ffff8880ae807ce0 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13
RAX: ffff88806fd40640 RBX: dffffc0000000000 RCX: ffffffff863fbc56
RDX: 0000000000000100 RSI: ffffffff863fbc1d RDI: ffff88808cf94228
RBP: ffff8880ae807d10 R08: ffff88806fd40640 R09: ffffed1015d00f8b
R10: ffffed1015d00f8a R11: 0000000000000003 R12: ffff88808cf941c0
R13: 00000000fffff034 R14: ffff8882166cd840 R15: 0000000000000000
 rose_loopback_timer+0x30d/0x3f0 net/rose/rose_loopback.c:91
 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325
 expire_timers kernel/time/timer.c:1362 [inline]
 __run_timers kernel/time/timer.c:1681 [inline]
 __run_timers kernel/time/timer.c:1649 [inline]
 run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694
 __do_softirq+0x266/0x95a kernel/softirq.c:293
 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1027

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/rose_loopback.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 7af4f99c4a932..094a6621f8e80 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 
 static struct sk_buff_head loopback_queue;
+#define ROSE_LOOPBACK_LIMIT 1000
 static struct timer_list loopback_timer;
 
 static void rose_set_loopback_timer(void);
@@ -35,29 +36,27 @@ static int rose_loopback_running(void)
 
 int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
 {
-	struct sk_buff *skbn;
+	struct sk_buff *skbn = NULL;
 
-	skbn = skb_clone(skb, GFP_ATOMIC);
+	if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT)
+		skbn = skb_clone(skb, GFP_ATOMIC);
 
-	kfree_skb(skb);
-
-	if (skbn != NULL) {
+	if (skbn) {
+		consume_skb(skb);
 		skb_queue_tail(&loopback_queue, skbn);
 
 		if (!rose_loopback_running())
 			rose_set_loopback_timer();
+	} else {
+		kfree_skb(skb);
 	}
 
 	return 1;
 }
 
-
 static void rose_set_loopback_timer(void)
 {
-	del_timer(&loopback_timer);
-
-	loopback_timer.expires  = jiffies + 10;
-	add_timer(&loopback_timer);
+	mod_timer(&loopback_timer, jiffies + 10);
 }
 
 static void rose_loopback_timer(struct timer_list *unused)
@@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused)
 	struct sock *sk;
 	unsigned short frametype;
 	unsigned int lci_i, lci_o;
+	int count;
 
-	while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+	for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) {
+		skb = skb_dequeue(&loopback_queue);
+		if (!skb)
+			return;
 		if (skb->len < ROSE_MIN_LEN) {
 			kfree_skb(skb);
 			continue;
@@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused)
 			kfree_skb(skb);
 		}
 	}
+	if (!skb_queue_empty(&loopback_queue))
+		mod_timer(&loopback_timer, jiffies + 1);
 }
 
 void __exit rose_loopback_clear(void)
-- 
GitLab


From 20ff83f10f113c88d0bb74589389b05250994c16 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Apr 2019 08:04:05 -0700
Subject: [PATCH 1507/1861] ipv4: add sanity checks in ipv4_link_failure()

Before calling __ip_options_compile(), we need to ensure the network
header is a an IPv4 one, and that it is already pulled in skb->head.

RAW sockets going through a tunnel can end up calling ipv4_link_failure()
with total garbage in the skb, or arbitrary lengthes.

syzbot report :

BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:355 [inline]
BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0x294/0x1120 net/ipv4/ip_options.c:123
Write of size 69 at addr ffff888096abf068 by task syz-executor.4/9204

CPU: 0 PID: 9204 Comm: syz-executor.4 Not tainted 5.1.0-rc5+ #77
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 check_memory_region_inline mm/kasan/generic.c:185 [inline]
 check_memory_region+0x123/0x190 mm/kasan/generic.c:191
 memcpy+0x38/0x50 mm/kasan/common.c:133
 memcpy include/linux/string.h:355 [inline]
 __ip_options_echo+0x294/0x1120 net/ipv4/ip_options.c:123
 __icmp_send+0x725/0x1400 net/ipv4/icmp.c:695
 ipv4_link_failure+0x29f/0x550 net/ipv4/route.c:1204
 dst_link_failure include/net/dst.h:427 [inline]
 vti6_xmit net/ipv6/ip6_vti.c:514 [inline]
 vti6_tnl_xmit+0x10d4/0x1c0c net/ipv6/ip6_vti.c:553
 __netdev_start_xmit include/linux/netdevice.h:4414 [inline]
 netdev_start_xmit include/linux/netdevice.h:4423 [inline]
 xmit_one net/core/dev.c:3292 [inline]
 dev_hard_start_xmit+0x1b2/0x980 net/core/dev.c:3308
 __dev_queue_xmit+0x271d/0x3060 net/core/dev.c:3878
 dev_queue_xmit+0x18/0x20 net/core/dev.c:3911
 neigh_direct_output+0x16/0x20 net/core/neighbour.c:1527
 neigh_output include/net/neighbour.h:508 [inline]
 ip_finish_output2+0x949/0x1740 net/ipv4/ip_output.c:229
 ip_finish_output+0x73c/0xd50 net/ipv4/ip_output.c:317
 NF_HOOK_COND include/linux/netfilter.h:278 [inline]
 ip_output+0x21f/0x670 net/ipv4/ip_output.c:405
 dst_output include/net/dst.h:444 [inline]
 NF_HOOK include/linux/netfilter.h:289 [inline]
 raw_send_hdrinc net/ipv4/raw.c:432 [inline]
 raw_sendmsg+0x1d2b/0x2f20 net/ipv4/raw.c:663
 inet_sendmsg+0x147/0x5d0 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:651 [inline]
 sock_sendmsg+0xdd/0x130 net/socket.c:661
 sock_write_iter+0x27c/0x3e0 net/socket.c:988
 call_write_iter include/linux/fs.h:1866 [inline]
 new_sync_write+0x4c7/0x760 fs/read_write.c:474
 __vfs_write+0xe4/0x110 fs/read_write.c:487
 vfs_write+0x20c/0x580 fs/read_write.c:549
 ksys_write+0x14f/0x2d0 fs/read_write.c:599
 __do_sys_write fs/read_write.c:611 [inline]
 __se_sys_write fs/read_write.c:608 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:608
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x458c29
Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f293b44bc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000458c29
RDX: 0000000000000014 RSI: 00000000200002c0 RDI: 0000000000000003
RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f293b44c6d4
R13: 00000000004c8623 R14: 00000000004ded68 R15: 00000000ffffffff

The buggy address belongs to the page:
page:ffffea00025aafc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
flags: 0x1fffc0000000000()
raw: 01fffc0000000000 0000000000000000 ffffffff025a0101 0000000000000000
raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff888096abef80: 00 00 00 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 f2
 ffff888096abf000: f2 f2 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00
>ffff888096abf080: 00 00 f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00
                         ^
 ffff888096abf100: 00 00 00 00 f1 f1 f1 f1 00 00 f3 f3 00 00 00 00
 ffff888096abf180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Stephen Suryaputra <ssuryaextr@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 88ce038dd495d..6fdf1c195d8e3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1183,25 +1183,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 	return dst;
 }
 
-static void ipv4_link_failure(struct sk_buff *skb)
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
 {
 	struct ip_options opt;
-	struct rtable *rt;
 	int res;
 
 	/* Recompile ip options since IPCB may not be valid anymore.
+	 * Also check we have a reasonable ipv4 header.
 	 */
-	memset(&opt, 0, sizeof(opt));
-	opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
+	    ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
+		return;
 
-	rcu_read_lock();
-	res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
-	rcu_read_unlock();
+	memset(&opt, 0, sizeof(opt));
+	if (ip_hdr(skb)->ihl > 5) {
+		if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
+			return;
+		opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
 
-	if (res)
-		return;
+		rcu_read_lock();
+		res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+		rcu_read_unlock();
 
+		if (res)
+			return;
+	}
 	__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+}
+
+static void ipv4_link_failure(struct sk_buff *skb)
+{
+	struct rtable *rt;
+
+	ipv4_send_dest_unreach(skb);
 
 	rt = skb_rtable(skb);
 	if (rt)
-- 
GitLab


From 82c99f7a81f28f8c1be5f701c8377d14c4075b10 Mon Sep 17 00:00:00 2001
From: Harry Pan <harry.pan@intel.com>
Date: Wed, 24 Apr 2019 22:50:33 +0800
Subject: [PATCH 1508/1861] perf/x86/intel: Update KBL Package C-state events
 to also include PC8/PC9/PC10 counters

Kaby Lake (and Coffee Lake) has PC8/PC9/PC10 residency counters.

This patch updates the list of Kaby/Coffee Lake PMU event counters
from the snb_cstates[] list of events to the hswult_cstates[]
list of events, which keeps all previously supported events and
also adds the PKG_C8, PKG_C9 and PKG_C10 residency counters.

This allows user space tools to profile them through the perf interface.

Signed-off-by: Harry Pan <harry.pan@intel.com>
Cc: <stable@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: gs0622@gmail.com
Link: http://lkml.kernel.org/r/20190424145033.1924-1-harry.pan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/cstate.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 94a4b7fc75d0e..d41de9af7a39b 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -76,15 +76,15 @@
  *			       Scope: Package (physical package)
  *	MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
  *			       perf code: 0x04
- *			       Available model: HSW ULT,CNL
+ *			       Available model: HSW ULT,KBL,CNL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
  *			       perf code: 0x05
- *			       Available model: HSW ULT,CNL
+ *			       Available model: HSW ULT,KBL,CNL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
  *			       perf code: 0x06
- *			       Available model: HSW ULT,GLM,CNL
+ *			       Available model: HSW ULT,KBL,GLM,CNL
  *			       Scope: Package (physical package)
  *
  */
@@ -566,8 +566,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
 
-	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  snb_cstates),
-	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  hswult_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates),
 
 	X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates),
 
-- 
GitLab


From 6ad57f7f2cbf65daced27f023cc99360742a24b9 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 24 Apr 2019 16:24:11 +0300
Subject: [PATCH 1509/1861] x86/Kconfig: Make SPARSEMEM default for 32-bit x86

Sparsemem has been a default memory model for x86-64 for over a decade,
since:

  b263295dbffd ("x86: 64-bit, make sparsemem vmemmap the only memory model").

Make it the default for 32-bit NUMA systems (if there any left) as well.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1556112252-9339-2-git-send-email-rppt@linux.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19c..0b91756ed9800 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1609,10 +1609,6 @@ config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
 	depends on NUMA && X86_32
 
-config ARCH_DISCONTIGMEM_DEFAULT
-	def_bool y
-	depends on NUMA && X86_32
-
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD
@@ -1620,8 +1616,7 @@ config ARCH_SPARSEMEM_ENABLE
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
 config ARCH_SPARSEMEM_DEFAULT
-	def_bool y
-	depends on X86_64
+	def_bool X86_64 || (NUMA && X86_32)
 
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
-- 
GitLab


From 2792107dc3af29ecc1a9b3dc5bc873dac4b61cd6 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 24 Apr 2019 16:24:12 +0300
Subject: [PATCH 1510/1861] x86/Kconfig: Deprecate DISCONTIGMEM support for
 32-bit x86

Mel Gorman says:

  "32-bit NUMA systems should be non-existent in practice.  The last NUMA
   system I'm aware of that was both NUMA and 32-bit only died somewhere
   between 2004 and 2007. If someone is running a 64-bit capable system in
   32-bit mode with NUMA, they really are just punishing themselves for fun."

Mark DISCONTIGMEM broken for now as suggested by Christoph Hellwig,
and (hopefully) remove it in a couple of releases.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1556112252-9339-3-git-send-email-rppt@linux.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0b91756ed9800..335d6e37f151d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1606,8 +1606,9 @@ config ARCH_FLATMEM_ENABLE
 	depends on X86_32 && !NUMA
 
 config ARCH_DISCONTIGMEM_ENABLE
-	def_bool y
+	def_bool n
 	depends on NUMA && X86_32
+	depends on BROKEN
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
-- 
GitLab


From 81103355b1e23345dbcdeccad59962a424da4a34 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 23 Apr 2019 14:02:57 +0200
Subject: [PATCH 1511/1861] drm/vmwgfx: Fix dma API layer violation

Remove the check for IOMMU presence since it was considered a
layer violation.
This means we have no reliable way to destinguish between coherent
hardware IOMMU DMA address translations and incoherent SWIOTLB DMA
address translations, which we can't handle. So always presume the
former. This means that if anybody forces SWIOTLB without also setting
the vmw_force_coherent=1 vmwgfx option, driver operation will fail,
like it will on most other graphics drivers.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 33 +++++------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 7ef5dcb061043..2d066383d7fda 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -545,30 +545,14 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv)
 	dev_priv->initial_height = height;
 }
 
-/**
- * vmw_assume_iommu - Figure out whether coherent dma-remapping might be
- * taking place.
- * @dev: Pointer to the struct drm_device.
- *
- * Return: true if iommu present, false otherwise.
- */
-static bool vmw_assume_iommu(struct drm_device *dev)
-{
-	const struct dma_map_ops *ops = get_dma_ops(dev->dev);
-
-	return !dma_is_direct(ops) && ops &&
-		ops->map_page != dma_direct_map_page;
-}
-
 /**
  * vmw_dma_select_mode - Determine how DMA mappings should be set up for this
  * system.
  *
  * @dev_priv: Pointer to a struct vmw_private
  *
- * This functions tries to determine the IOMMU setup and what actions
- * need to be taken by the driver to make system pages visible to the
- * device.
+ * This functions tries to determine what actions need to be taken by the
+ * driver to make system pages visible to the device.
  * If this function decides that DMA is not possible, it returns -EINVAL.
  * The driver may then try to disable features of the device that require
  * DMA.
@@ -578,23 +562,16 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
 	static const char *names[vmw_dma_map_max] = {
 		[vmw_dma_phys] = "Using physical TTM page addresses.",
 		[vmw_dma_alloc_coherent] = "Using coherent TTM pages.",
-		[vmw_dma_map_populate] = "Keeping DMA mappings.",
+		[vmw_dma_map_populate] = "Caching DMA mappings.",
 		[vmw_dma_map_bind] = "Giving up DMA mappings early."};
 
 	if (vmw_force_coherent)
 		dev_priv->map_mode = vmw_dma_alloc_coherent;
-	else if (vmw_assume_iommu(dev_priv->dev))
-		dev_priv->map_mode = vmw_dma_map_populate;
-	else if (!vmw_force_iommu)
-		dev_priv->map_mode = vmw_dma_phys;
-	else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl())
-		dev_priv->map_mode = vmw_dma_alloc_coherent;
+	else if (vmw_restrict_iommu)
+		dev_priv->map_mode = vmw_dma_map_bind;
 	else
 		dev_priv->map_mode = vmw_dma_map_populate;
 
-	if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu)
-		dev_priv->map_mode = vmw_dma_map_bind;
-
 	/* No TTM coherent page pool? FIXME: Ask TTM instead! */
         if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) &&
 	    (dev_priv->map_mode == vmw_dma_alloc_coherent))
-- 
GitLab


From 357798909164bf423eac6a78ff7da7e98d2d7f7f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 24 Apr 2019 15:59:33 +0200
Subject: [PATCH 1512/1861] gpio: Fix gpiochip_add_data_with_key() error path

The err_remove_chip block is too coarse, and may perform cleanup that
must not be done.  E.g. if of_gpiochip_add() fails, of_gpiochip_remove()
is still called, causing:

    OF: ERROR: Bad of_node_put() on /soc/gpio@e6050000
    CPU: 1 PID: 20 Comm: kworker/1:1 Not tainted 5.1.0-rc2-koelsch+ #407
    Hardware name: Generic R-Car Gen2 (Flattened Device Tree)
    Workqueue: events deferred_probe_work_func
    [<c020ec74>] (unwind_backtrace) from [<c020ae58>] (show_stack+0x10/0x14)
    [<c020ae58>] (show_stack) from [<c07c1224>] (dump_stack+0x7c/0x9c)
    [<c07c1224>] (dump_stack) from [<c07c5a80>] (kobject_put+0x94/0xbc)
    [<c07c5a80>] (kobject_put) from [<c0470420>] (gpiochip_add_data_with_key+0x8d8/0xa3c)
    [<c0470420>] (gpiochip_add_data_with_key) from [<c0473738>] (gpio_rcar_probe+0x1d4/0x314)
    [<c0473738>] (gpio_rcar_probe) from [<c052fca8>] (platform_drv_probe+0x48/0x94)

and later, if a GPIO consumer tries to use a GPIO from a failed
controller:

    WARNING: CPU: 0 PID: 1 at lib/refcount.c:156 kobject_get+0x38/0x4c
    refcount_t: increment on 0; use-after-free.
    Modules linked in:
    CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.1.0-rc2-koelsch+ #407
    Hardware name: Generic R-Car Gen2 (Flattened Device Tree)
    [<c020ec74>] (unwind_backtrace) from [<c020ae58>] (show_stack+0x10/0x14)
    [<c020ae58>] (show_stack) from [<c07c1224>] (dump_stack+0x7c/0x9c)
    [<c07c1224>] (dump_stack) from [<c0221580>] (__warn+0xd0/0xec)
    [<c0221580>] (__warn) from [<c02215e0>] (warn_slowpath_fmt+0x44/0x6c)
    [<c02215e0>] (warn_slowpath_fmt) from [<c07c58fc>] (kobject_get+0x38/0x4c)
    [<c07c58fc>] (kobject_get) from [<c068b3ec>] (of_node_get+0x14/0x1c)
    [<c068b3ec>] (of_node_get) from [<c0686f24>] (of_find_node_by_phandle+0xc0/0xf0)
    [<c0686f24>] (of_find_node_by_phandle) from [<c0686fbc>] (of_phandle_iterator_next+0x68/0x154)
    [<c0686fbc>] (of_phandle_iterator_next) from [<c0687fe4>] (__of_parse_phandle_with_args+0x40/0xd0)
    [<c0687fe4>] (__of_parse_phandle_with_args) from [<c0688204>] (of_parse_phandle_with_args_map+0x100/0x3ac)
    [<c0688204>] (of_parse_phandle_with_args_map) from [<c0471240>] (of_get_named_gpiod_flags+0x38/0x380)
    [<c0471240>] (of_get_named_gpiod_flags) from [<c046f864>] (gpiod_get_from_of_node+0x24/0xd8)
    [<c046f864>] (gpiod_get_from_of_node) from [<c0470aa4>] (devm_fwnode_get_index_gpiod_from_child+0xa0/0x144)
    [<c0470aa4>] (devm_fwnode_get_index_gpiod_from_child) from [<c05f425c>] (gpio_keys_probe+0x418/0x7bc)
    [<c05f425c>] (gpio_keys_probe) from [<c052fca8>] (platform_drv_probe+0x48/0x94)

Fix this by splitting the cleanup block, and adding a missing call to
gpiochip_irqchip_remove().

Fixes: 28355f81969962cf ("gpio: defer probe if pinctrl cannot be found")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 0495bf1d480a4..bca3e7740ef66 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1379,7 +1379,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 
 	status = gpiochip_add_irqchip(chip, lock_key, request_key);
 	if (status)
-		goto err_remove_chip;
+		goto err_free_gpiochip_mask;
 
 	status = of_gpiochip_add(chip);
 	if (status)
@@ -1387,7 +1387,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 
 	status = gpiochip_init_valid_mask(chip);
 	if (status)
-		goto err_remove_chip;
+		goto err_remove_of_chip;
 
 	for (i = 0; i < chip->ngpio; i++) {
 		struct gpio_desc *desc = &gdev->descs[i];
@@ -1415,14 +1415,18 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 	if (gpiolib_initialized) {
 		status = gpiochip_setup_dev(gdev);
 		if (status)
-			goto err_remove_chip;
+			goto err_remove_acpi_chip;
 	}
 	return 0;
 
-err_remove_chip:
+err_remove_acpi_chip:
 	acpi_gpiochip_remove(chip);
+err_remove_of_chip:
 	gpiochip_free_hogs(chip);
 	of_gpiochip_remove(chip);
+err_remove_chip:
+	gpiochip_irqchip_remove(chip);
+err_free_gpiochip_mask:
 	gpiochip_free_valid_mask(chip);
 err_remove_irqchip_mask:
 	gpiochip_irqchip_free_valid_mask(chip);
-- 
GitLab


From 71b77697af9ef06b559875e4bd8dc3d141807c93 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 18 Apr 2019 12:27:55 +0200
Subject: [PATCH 1513/1861] PM / Domains: Don't kfree() the virtual device in
 the error path

It's not correct to call kfree(dev) when device_register(dev) has failed.

Fix this by calling put_device(dev) instead.

Fixes: 3c095f32a92b ("PM / Domains: Add support for multi PM domains per device to genpd")
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 598a4e02aee19..5422fc01dca3a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2455,7 +2455,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 
 	ret = device_register(virt_dev);
 	if (ret) {
-		kfree(virt_dev);
+		put_device(virt_dev);
 		return ERR_PTR(ret);
 	}
 
-- 
GitLab


From e8b04de9da71b56dbbc5fa443d4ab52b617977bb Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 18 Apr 2019 12:27:56 +0200
Subject: [PATCH 1514/1861] PM / Domains: Allow OF lookup for multi PM domain
 case from ->attach_dev()

A genpd provider that uses the ->attach_dev() callback to look up
resources for a device fails to do so when the device has multiple
PM domains attached.  That is because when genpd invokes the
->attach_dev() callback, it passes the allocated virtual device as
the in-parameter.

To address this problem, simply assign the dev->of_node for the
virtual device, based upon the original device's OF node.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5422fc01dca3a..a0b021d530846 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2272,6 +2272,7 @@ EXPORT_SYMBOL_GPL(of_genpd_remove_last);
 
 static void genpd_release_dev(struct device *dev)
 {
+	of_node_put(dev->of_node);
 	kfree(dev);
 }
 
@@ -2333,14 +2334,14 @@ static void genpd_dev_pm_sync(struct device *dev)
 	genpd_queue_power_off_work(pd);
 }
 
-static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np,
-				 unsigned int index, bool power_on)
+static int __genpd_dev_pm_attach(struct device *dev, unsigned int index,
+				 bool power_on)
 {
 	struct of_phandle_args pd_args;
 	struct generic_pm_domain *pd;
 	int ret;
 
-	ret = of_parse_phandle_with_args(np, "power-domains",
+	ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
 				"#power-domain-cells", index, &pd_args);
 	if (ret < 0)
 		return ret;
@@ -2408,7 +2409,7 @@ int genpd_dev_pm_attach(struct device *dev)
 				       "#power-domain-cells") != 1)
 		return 0;
 
-	return __genpd_dev_pm_attach(dev, dev->of_node, 0, true);
+	return __genpd_dev_pm_attach(dev, 0, true);
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
 
@@ -2452,6 +2453,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 	dev_set_name(virt_dev, "genpd:%u:%s", index, dev_name(dev));
 	virt_dev->bus = &genpd_bus_type;
 	virt_dev->release = genpd_release_dev;
+	virt_dev->of_node = of_node_get(dev->of_node);
 
 	ret = device_register(virt_dev);
 	if (ret) {
@@ -2460,7 +2462,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 	}
 
 	/* Try to attach the device to the PM domain at the specified index. */
-	ret = __genpd_dev_pm_attach(virt_dev, dev->of_node, index, false);
+	ret = __genpd_dev_pm_attach(virt_dev, index, false);
 	if (ret < 1) {
 		device_unregister(virt_dev);
 		return ret ? ERR_PTR(ret) : NULL;
-- 
GitLab


From 3ccf3f0cd1971e007680114ff732e8a717aafbf8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 18 Apr 2019 12:27:57 +0200
Subject: [PATCH 1515/1861] PM / Domains: Enable
 genpd_dev_pm_attach_by_id|name() for single PM domain

If a call to dev_pm_domain_attach() succeeds to attach a device to its
single PM domain, the important point is to prevent subsequent
dev_pm_domain_attach_by_name|id() calls from failing. That is done by
checking the dev->pm_domain pointer and then returning -EEXIST, rather
than continuing to call genpd_dev_pm_attach_by_id|name().

For this reason, enable genpd_dev_pm_attach_by_id|name() to be used for
single PM domains too. This simplifies future users, so they only need
to use dev_pm_domain_attach_by_id|name() instead of having to combine
it with dev_pm_domain_attach().

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index a0b021d530846..d97bcf6918fb7 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2439,10 +2439,10 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 	if (!dev->of_node)
 		return NULL;
 
-	/* Deal only with devices using multiple PM domains. */
+	/* Verify that the index is within a valid range. */
 	num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
 						 "#power-domain-cells");
-	if (num_domains < 2 || index >= num_domains)
+	if (index >= num_domains)
 		return NULL;
 
 	/* Allocate and register device on the genpd bus. */
-- 
GitLab


From c4cba44eeecab9d5ccd3dd2d5520a7d1e5be544f Mon Sep 17 00:00:00 2001
From: Jonas Karlman <jonas@kwiboo.se>
Date: Sun, 21 Apr 2019 08:25:50 +0000
Subject: [PATCH 1516/1861] drm/bridge: dw-hdmi: fix SCDC configuration for
 ddc-i2c-bus

When ddc-i2c-bus property is used, a NULL pointer dereference is reported:

[   31.041669] Unable to handle kernel NULL pointer dereference at virtual address 00000008
[   31.041671] pgd = 4d3c16f6
[   31.041673] [00000008] *pgd=00000000
[   31.041678] Internal error: Oops: 5 [#1] SMP ARM

[   31.041711] Hardware name: Rockchip (Device Tree)
[   31.041718] PC is at i2c_transfer+0x8/0xe4
[   31.041721] LR is at drm_scdc_read+0x54/0x84
[   31.041723] pc : [<c073273c>]    lr : [<c05926c4>]    psr: 280f0013
[   31.041725] sp : edffdad0  ip : 5ccb5511  fp : 00000058
[   31.041727] r10: 00000780  r9 : edf91608  r8 : c11b0f48
[   31.041728] r7 : 00000438  r6 : 00000000  r5 : 00000000  r4 : 00000000
[   31.041730] r3 : edffdae7  r2 : 00000002  r1 : edffdaec  r0 : 00000000

[   31.041908] [<c073273c>] (i2c_transfer) from [<c05926c4>] (drm_scdc_read+0x54/0x84)
[   31.041913] [<c05926c4>] (drm_scdc_read) from [<c0592858>] (drm_scdc_set_scrambling+0x30/0xbc)
[   31.041919] [<c0592858>] (drm_scdc_set_scrambling) from [<c05cc0f4>] (dw_hdmi_update_power+0x1440/0x1610)
[   31.041926] [<c05cc0f4>] (dw_hdmi_update_power) from [<c05cc574>] (dw_hdmi_bridge_enable+0x2c/0x70)
[   31.041932] [<c05cc574>] (dw_hdmi_bridge_enable) from [<c05aed48>] (drm_bridge_enable+0x24/0x34)
[   31.041938] [<c05aed48>] (drm_bridge_enable) from [<c0591060>] (drm_atomic_helper_commit_modeset_enables+0x114/0x220)
[   31.041943] [<c0591060>] (drm_atomic_helper_commit_modeset_enables) from [<c05c3fe0>] (rockchip_atomic_helper_commit_tail_rpm+0x28/0x64)

hdmi->i2c may not be set when ddc-i2c-bus property is used in device tree.
Fix this by using hdmi->ddc as the i2c adapter when calling drm_scdc_*().
Also report that SCDC is not supported when there is no DDC bus.

Fixes: 264fce6cc2c1 ("drm/bridge: dw-hdmi: Add SCDC and TMDS Scrambling support")
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/VE1PR03MB59031814B5BCAB2152923BDAAC210@VE1PR03MB5903.eurprd03.prod.outlook.com
---
 drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index b74ccb6a24c2b..ab7968c8f6a29 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1046,6 +1046,10 @@ static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi)
 	if (hdmi->version < 0x200a)
 		return false;
 
+	/* Disable if no DDC bus */
+	if (!hdmi->ddc)
+		return false;
+
 	/* Disable if SCDC is not supported, or if an HF-VSDB block is absent */
 	if (!display->hdmi.scdc.supported ||
 	    !display->hdmi.scdc.scrambling.supported)
@@ -1684,13 +1688,13 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
 			 * Source Devices compliant shall set the
 			 * Source Version = 1.
 			 */
-			drm_scdc_readb(&hdmi->i2c->adap, SCDC_SINK_VERSION,
+			drm_scdc_readb(hdmi->ddc, SCDC_SINK_VERSION,
 				       &bytes);
-			drm_scdc_writeb(&hdmi->i2c->adap, SCDC_SOURCE_VERSION,
+			drm_scdc_writeb(hdmi->ddc, SCDC_SOURCE_VERSION,
 				min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
 
 			/* Enabled Scrambling in the Sink */
-			drm_scdc_set_scrambling(&hdmi->i2c->adap, 1);
+			drm_scdc_set_scrambling(hdmi->ddc, 1);
 
 			/*
 			 * To activate the scrambler feature, you must ensure
@@ -1706,7 +1710,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
 			hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
 			hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
 				    HDMI_MC_SWRSTZ);
-			drm_scdc_set_scrambling(&hdmi->i2c->adap, 0);
+			drm_scdc_set_scrambling(hdmi->ddc, 0);
 		}
 	}
 
-- 
GitLab


From c409ca3be3c6ff3a1eeb303b191184e80d412862 Mon Sep 17 00:00:00 2001
From: Malte Leip <malte@leip.net>
Date: Sun, 14 Apr 2019 12:00:12 +0200
Subject: [PATCH 1517/1861] usb: usbip: fix isoc packet num validation in
 get_pipe

Change the validation of number_of_packets in get_pipe to compare the
number of packets to a fixed maximum number of packets allowed, set to
be 1024. This number was chosen due to it being used by other drivers as
well, for example drivers/usb/host/uhci-q.c

Background/reason:
The get_pipe function in stub_rx.c validates the number of packets in
isochronous mode and aborts with an error if that number is too large,
in order to prevent malicious input from possibly triggering large
memory allocations. This was previously done by checking whether
pdu->u.cmd_submit.number_of_packets is bigger than the number of packets
that would be needed for pdu->u.cmd_submit.transfer_buffer_length bytes
if all except possibly the last packet had maximum length, given by
usb_endpoint_maxp(epd) *  usb_endpoint_maxp_mult(epd). This leads to an
error if URBs with packets shorter than the maximum possible length are
submitted, which is allowed according to
Documentation/driver-api/usb/URB.rst and occurs for example with the
snd-usb-audio driver.

Fixes: c6688ef9f297 ("usbip: fix stub_rx: harden CMD_SUBMIT path to handle malicious input")
Signed-off-by: Malte Leip <malte@leip.net>
Cc: stable <stable@vger.kernel.org>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/stub_rx.c      | 12 +++---------
 drivers/usb/usbip/usbip_common.h |  7 +++++++
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 97b09a42a10ca..dbfb2f24d71ea 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -361,16 +361,10 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
 	}
 
 	if (usb_endpoint_xfer_isoc(epd)) {
-		/* validate packet size and number of packets */
-		unsigned int maxp, packets, bytes;
-
-		maxp = usb_endpoint_maxp(epd);
-		maxp *= usb_endpoint_maxp_mult(epd);
-		bytes = pdu->u.cmd_submit.transfer_buffer_length;
-		packets = DIV_ROUND_UP(bytes, maxp);
-
+		/* validate number of packets */
 		if (pdu->u.cmd_submit.number_of_packets < 0 ||
-		    pdu->u.cmd_submit.number_of_packets > packets) {
+		    pdu->u.cmd_submit.number_of_packets >
+		    USBIP_MAX_ISO_PACKETS) {
 			dev_err(&sdev->udev->dev,
 				"CMD_SUBMIT: isoc invalid num packets %d\n",
 				pdu->u.cmd_submit.number_of_packets);
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index bf8afe9b58838..8be857a4fa132 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -121,6 +121,13 @@ extern struct device_attribute dev_attr_usbip_debug;
 #define USBIP_DIR_OUT	0x00
 #define USBIP_DIR_IN	0x01
 
+/*
+ * Arbitrary limit for the maximum number of isochronous packets in an URB,
+ * compare for example the uhci_submit_isochronous function in
+ * drivers/usb/host/uhci-q.c
+ */
+#define USBIP_MAX_ISO_PACKETS 1024
+
 /**
  * struct usbip_header_basic - data pertinent to every request
  * @command: the usbip request type
-- 
GitLab


From ef61eb43ada6c1d6b94668f0f514e4c268093ff3 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 23 Apr 2019 14:48:29 -0400
Subject: [PATCH 1518/1861] USB: yurex: Fix protection fault after device
 removal

The syzkaller USB fuzzer found a general-protection-fault bug in the
yurex driver.  The fault occurs when a device has been unplugged; the
driver's interrupt-URB handler logs an error message referring to the
device by name, after the device has been unregistered and its name
deallocated.

This problem is caused by the fact that the interrupt URB isn't
cancelled until the driver's private data structure is released, which
can happen long after the device is gone.  The cure is to make sure
that the interrupt URB is killed before yurex_disconnect() returns;
this is exactly the sort of thing that usb_poison_urb() was meant for.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: syzbot+2eb9121678bdb36e6d57@syzkaller.appspotmail.com
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/yurex.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 6d9fd5f649036..7b306aa22d258 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -314,6 +314,7 @@ static void yurex_disconnect(struct usb_interface *interface)
 	usb_deregister_dev(interface, &yurex_class);
 
 	/* prevent more I/O from starting */
+	usb_poison_urb(dev->urb);
 	mutex_lock(&dev->io_mutex);
 	dev->interface = NULL;
 	mutex_unlock(&dev->io_mutex);
-- 
GitLab


From c114944d7d67f24e71562fcfc18d550ab787e4d4 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 22 Apr 2019 11:16:04 -0400
Subject: [PATCH 1519/1861] USB: w1 ds2490: Fix bug caused by improper use of
 altsetting array

The syzkaller USB fuzzer spotted a slab-out-of-bounds bug in the
ds2490 driver.  This bug is caused by improper use of the altsetting
array in the usb_interface structure (the array's entries are not
always stored in numerical order), combined with a naive assumption
that all interfaces probed by the driver will have the expected number
of altsettings.

The bug can be fixed by replacing references to the possibly
non-existent intf->altsetting[alt] entry with the guaranteed-to-exist
intf->cur_altsetting entry.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: syzbot+d65f673b847a1a96cdba@syzkaller.appspotmail.com
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/w1/masters/ds2490.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c
index 0f4ecfcdb5497..a9fb775852723 100644
--- a/drivers/w1/masters/ds2490.c
+++ b/drivers/w1/masters/ds2490.c
@@ -1016,15 +1016,15 @@ static int ds_probe(struct usb_interface *intf,
 	/* alternative 3, 1ms interrupt (greatly speeds search), 64 byte bulk */
 	alt = 3;
 	err = usb_set_interface(dev->udev,
-		intf->altsetting[alt].desc.bInterfaceNumber, alt);
+		intf->cur_altsetting->desc.bInterfaceNumber, alt);
 	if (err) {
 		dev_err(&dev->udev->dev, "Failed to set alternative setting %d "
 			"for %d interface: err=%d.\n", alt,
-			intf->altsetting[alt].desc.bInterfaceNumber, err);
+			intf->cur_altsetting->desc.bInterfaceNumber, err);
 		goto err_out_clear;
 	}
 
-	iface_desc = &intf->altsetting[alt];
+	iface_desc = intf->cur_altsetting;
 	if (iface_desc->desc.bNumEndpoints != NUM_EP-1) {
 		pr_info("Num endpoints=%d. It is not DS9490R.\n",
 			iface_desc->desc.bNumEndpoints);
-- 
GitLab


From 4235a5947a16241be1ea331fe60e79e03fa5faa1 Mon Sep 17 00:00:00 2001
From: Yuantian Tang <andy.tang@nxp.com>
Date: Wed, 24 Apr 2019 10:32:23 +0800
Subject: [PATCH 1520/1861] cpufreq: qoriq: Add ls1028a chip support

Enable cpufreq feature on ls1028a chip by adding its compatible
string.

Signed-off-by: Yuantian Tang <andy.tang@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/qoriq-cpufreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index 4295e54762642..d308c4de467d1 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -280,6 +280,7 @@ static const struct of_device_id node_matches[] __initconst = {
 
 	{ .compatible = "fsl,ls1012a-clockgen", },
 	{ .compatible = "fsl,ls1021a-clockgen", },
+	{ .compatible = "fsl,ls1028a-clockgen", },
 	{ .compatible = "fsl,ls1043a-clockgen", },
 	{ .compatible = "fsl,ls1046a-clockgen", },
 	{ .compatible = "fsl,ls1088a-clockgen", },
-- 
GitLab


From 811328fc3222f7b55846de0cd0404339e2e1e6d7 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Thu, 4 Apr 2019 19:42:30 +0200
Subject: [PATCH 1521/1861] KVM: arm/arm64: Ensure vcpu target is unset on
 reset failure

A failed KVM_ARM_VCPU_INIT should not set the vcpu target,
as the vcpu target is used by kvm_vcpu_initialized() to
determine if other vcpu ioctls may proceed. We need to set
the target before calling kvm_reset_vcpu(), but if that call
fails, we should then unset it and clear the feature bitmap
while we're at it.

Signed-off-by: Andrew Jones <drjones@redhat.com>
[maz: Simplified patch, completed commit message]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/arm.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 99c37384ba7bd..f412ebc906100 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -934,7 +934,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			       const struct kvm_vcpu_init *init)
 {
-	unsigned int i;
+	unsigned int i, ret;
 	int phys_target = kvm_target_cpu();
 
 	if (init->target != phys_target)
@@ -969,9 +969,14 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 	vcpu->arch.target = phys_target;
 
 	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
+	ret = kvm_reset_vcpu(vcpu);
+	if (ret) {
+		vcpu->arch.target = -1;
+		bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+	}
 
+	return ret;
+}
 
 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 					 struct kvm_vcpu_init *init)
-- 
GitLab


From 2e8010bb71b39ff18aac9fb209b3c3093f4c4783 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 10 Apr 2019 16:14:57 +0100
Subject: [PATCH 1522/1861] kvm: arm: Skip stage2 huge mappings for unaligned
 ipa backed by THP

With commit a80868f398554842b14, we no longer ensure that the
THP page is properly aligned in the guest IPA. Skip the stage2
huge mapping for unaligned IPA backed by transparent hugepages.

Fixes: a80868f398554842b14 ("KVM: arm/arm64: Enforce PTE mappings at stage2 when needed")
Reported-by: Eric Auger <eric.auger@redhat.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Chirstoffer Dall <christoffer.dall@arm.com>
Cc: Zenghui Yu <yuzenghui@huawei.com>
Cc: Zheng Xiang <zhengxiang9@huawei.com>
Cc: Andrew Murray <andrew.murray@arm.com>
Cc: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/mmu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 27c958306449f..a39dcfdbcc652 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1781,8 +1781,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		 * Only PMD_SIZE transparent hugepages(THP) are
 		 * currently supported. This code will need to be
 		 * updated to support other THP sizes.
+		 *
+		 * Make sure the host VA and the guest IPA are sufficiently
+		 * aligned and that the block is contained within the memslot.
 		 */
-		if (transparent_hugepage_adjust(&pfn, &fault_ipa))
+		if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
+		    transparent_hugepage_adjust(&pfn, &fault_ipa))
 			vma_pagesize = PMD_SIZE;
 	}
 
-- 
GitLab


From b82d6c1f8f8288f744a9dcc16cd3085d535decca Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Wed, 3 Apr 2019 21:01:06 -0700
Subject: [PATCH 1523/1861] mwifiex: Make resume actually do something useful
 again on SDIO cards

The commit fc3a2fcaa1ba ("mwifiex: use atomic bitops to represent
adapter status variables") had a fairly straightforward bug in it.  It
contained this bit of diff:

 - if (!adapter->is_suspended) {
 + if (test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) {

As you can see the patch missed the "!" when converting to the atomic
bitops.  This meant that the resume hasn't done anything at all since
that commit landed and suspend/resume for mwifiex SDIO cards has been
totally broken.

After fixing this mwifiex suspend/resume appears to work again, at
least with the simple testing I've done.

Fixes: fc3a2fcaa1ba ("mwifiex: use atomic bitops to represent adapter status variables")
Cc: <stable@vger.kernel.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
index a85648342d15b..d5a70340a9457 100644
--- a/drivers/net/wireless/marvell/mwifiex/sdio.c
+++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
@@ -181,7 +181,7 @@ static int mwifiex_sdio_resume(struct device *dev)
 
 	adapter = card->adapter;
 
-	if (test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) {
+	if (!test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) {
 		mwifiex_dbg(adapter, WARN,
 			    "device already resumed\n");
 		return 0;
-- 
GitLab


From a3d46aea46f99d134b4e0726e4826b824c3e5980 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 1 Apr 2019 11:29:58 +0300
Subject: [PATCH 1524/1861] btrfs: Switch memory allocations in async csum
 calculation path to kvmalloc

Recent multi-page biovec rework allowed creation of bios that can span
large regions - up to 128 megabytes in the case of btrfs. OTOH btrfs'
submission path currently allocates a contiguous array to store the
checksums for every bio submitted. This means we can request up to
(128mb / BTRFS_SECTOR_SIZE) * 4 bytes + 32bytes of memory from kmalloc.
On busy systems with possibly fragmented memory said kmalloc can fail
which will trigger BUG_ON due to improper error handling IO submission
context in btrfs.

Until error handling is improved or bios in btrfs limited to a more
manageable size (e.g. 1m) let's use kvmalloc to fallback to vmalloc for
such large allocations. There is no hard requirement that the memory
allocated for checksums during IO submission has to be contiguous, but
this is a simple fix that does not require several non-contiguous
allocations.

For small writes this is unlikely to have any visible effect since
kmalloc will still satisfy allocation requests as usual. For larger
requests the code will just fallback to vmalloc.

We've performed evaluation on several workload types and there was no
significant difference kmalloc vs kvmalloc.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file-item.c    | 15 +++++++++++----
 fs/btrfs/ordered-data.c |  3 ++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 920bf3b4b0ef5..cccc75d15970c 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
+#include <linux/sched/mm.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -427,9 +428,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 	unsigned long this_sum_bytes = 0;
 	int i;
 	u64 offset;
+	unsigned nofs_flag;
+
+	nofs_flag = memalloc_nofs_save();
+	sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
+		       GFP_KERNEL);
+	memalloc_nofs_restore(nofs_flag);
 
-	sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
-		       GFP_NOFS);
 	if (!sums)
 		return BLK_STS_RESOURCE;
 
@@ -472,8 +477,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 
 				bytes_left = bio->bi_iter.bi_size - total_bytes;
 
-				sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left),
-					       GFP_NOFS);
+				nofs_flag = memalloc_nofs_save();
+				sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
+						      bytes_left), GFP_KERNEL);
+				memalloc_nofs_restore(nofs_flag);
 				BUG_ON(!sums); /* -ENOMEM */
 				sums->len = bytes_left;
 				ordered = btrfs_lookup_ordered_extent(inode,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6fde2b2741ef1..45e3cfd1198bc 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,6 +6,7 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
+#include <linux/sched/mm.h>
 #include "ctree.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
@@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
 			cur = entry->list.next;
 			sum = list_entry(cur, struct btrfs_ordered_sum, list);
 			list_del(&sum->list);
-			kfree(sum);
+			kvfree(sum);
 		}
 		kmem_cache_free(btrfs_ordered_extent_cache, entry);
 	}
-- 
GitLab


From 6bc210003dff7b789efae5bb02a0320dc24dd416 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Thu, 25 Apr 2019 13:57:40 +0100
Subject: [PATCH 1525/1861] KVM: arm/arm64: Don't emulate virtual timers on
 userspace ioctls

When a VCPU never runs before a guest exists, but we set timer registers
up via ioctls, the associated hrtimer might never get cancelled.

Since we moved vcpu_load/put into the arch-specific implementations and
only have load/put for KVM_RUN, we won't ever have a scheduled hrtimer
for emulating a timer when modifying the timer state via an ioctl from
user space.  All we need to do is make sure that we pick up the right
state when we load the timer state next time userspace calls KVM_RUN
again.

We also do not need to worry about this interacting with the bg_timer,
because if we were in WFI from the guest, and somehow ended up in a
kvm_arm_timer_set_reg, it means that:

 1. the VCPU thread has received a signal,
 2. we have called vcpu_load when being scheduled in again,
 3. we have called vcpu_put when we returned to userspace for it to issue
    another ioctl

And therefore will not have a bg_timer programmed and the event is
treated as a spurious wakeup from WFI if userspace decides to run the
vcpu again even if there are not virtual interrupts.

This fixes stray virtual timer interrupts triggered by an expiring
hrtimer, which happens after a failed live migration, for instance.

Fixes: bee038a674875 ("KVM: arm/arm64: Rework the timer code to use a timer_map")
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Reported-by: Andre Przywara <andre.przywara@arm.com>
Tested-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/arch_timer.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index d43308dc36179..7fc272ecae162 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -507,6 +507,14 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 
+	/*
+	 * Update the timer output so that it is likely to match the
+	 * state we're about to restore. If the timer expires between
+	 * this point and the register restoration, we'll take the
+	 * interrupt anyway.
+	 */
+	kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
+
 	/*
 	 * When using a userspace irqchip with the architected timers and a
 	 * host interrupt controller that doesn't support an active state, we
@@ -730,7 +738,6 @@ static void kvm_timer_init_interrupt(void *info)
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 {
 	struct arch_timer_context *timer;
-	bool level;
 
 	switch (regid) {
 	case KVM_REG_ARM_TIMER_CTL:
@@ -758,10 +765,6 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 		return -1;
 	}
 
-	level = kvm_timer_should_fire(timer);
-	kvm_timer_update_irq(vcpu, level, timer);
-	timer_emulate(timer);
-
 	return 0;
 }
 
-- 
GitLab


From 23d1aee92b42587e696ab48a9da774819ecd3718 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.ibm.com>
Date: Thu, 11 Apr 2019 15:47:34 +0200
Subject: [PATCH 1526/1861] s390/crypto: rework generate_entropy function for
 pseudo random dd

Here is a rework of the generate_entropy function of the pseudo random
device driver exploiting the prno CPACF instruction.

George Spelvin pointed out some issues with the existing
implementation. One point was, that the buffer used to store the stckf
values is 2 pages which are initially filled with get_random_bytes()
for each 64 byte junk produced by the function. Another point was that
the stckf values only carry entropy in the LSB and thus a buffer of
2 pages is not really needed. Then there was a comment about the use
of the kimd cpacf function without proper initialization.

The rework addresses these points and now one page is used and only
one half of this is filled with get_random_bytes() on each chunk of 64
bytes requested data. The other half of the page is filled with stckf
values exored into with an overlap of 4 bytes. This can be done due to
the fact that only the lower 4 bytes carry entropy we need.  For more
details about the algorithm used, see the header of the function.

The generate_entropy() function now uses the cpacf function klmd with
proper initialization of the parameter block to perform the sha512
hash.

George also pointed out some issues with the internal buffers used for
seeding and reads. These buffers are now zeroed with memzero_implicit
after use.

Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Reported-by: George Spelvin <lkml@sdf.org>
Suggested-by: George Spelvin <lkml@sdf.org>
Reviewed-by: Patrick Steuer <steuer@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/crypto/prng.c | 86 +++++++++++++++++++++++++++--------------
 1 file changed, 58 insertions(+), 28 deletions(-)

diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index a97a1802cfb4d..fba37906045b1 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -115,46 +115,68 @@ static const u8 initial_parm_block[32] __initconst = {
 
 /*
  * generate_entropy:
- * This algorithm produces 64 bytes of entropy data based on 1024
- * individual stckf() invocations assuming that each stckf() value
- * contributes 0.25 bits of entropy. So the caller gets 256 bit
- * entropy per 64 byte or 4 bits entropy per byte.
+ * This function fills a given buffer with random bytes. The entropy within
+ * the random bytes given back is assumed to have at least 50% - meaning
+ * a 64 bytes buffer has at least 64 * 8 / 2 = 256 bits of entropy.
+ * Within the function the entropy generation is done in junks of 64 bytes.
+ * So the caller should also ask for buffer fill in multiples of 64 bytes.
+ * The generation of the entropy is based on the assumption that every stckf()
+ * invocation produces 0.5 bits of entropy. To accumulate 256 bits of entropy
+ * at least 512 stckf() values are needed. The entropy relevant part of the
+ * stckf value is bit 51 (counting starts at the left with bit nr 0) so
+ * here we use the lower 4 bytes and exor the values into 2k of bufferspace.
+ * To be on the save side, if there is ever a problem with stckf() the
+ * other half of the page buffer is filled with bytes from urandom via
+ * get_random_bytes(), so this function consumes 2k of urandom for each
+ * requested 64 bytes output data. Finally the buffer page is condensed into
+ * a 64 byte value by hashing with a SHA512 hash.
  */
 static int generate_entropy(u8 *ebuf, size_t nbytes)
 {
 	int n, ret = 0;
-	u8 *pg, *h, hash[64];
-
-	/* allocate 2 pages */
-	pg = (u8 *) __get_free_pages(GFP_KERNEL, 1);
+	u8 *pg, pblock[80] = {
+		/* 8 x 64 bit init values */
+		0x6A, 0x09, 0xE6, 0x67, 0xF3, 0xBC, 0xC9, 0x08,
+		0xBB, 0x67, 0xAE, 0x85, 0x84, 0xCA, 0xA7, 0x3B,
+		0x3C, 0x6E, 0xF3, 0x72, 0xFE, 0x94, 0xF8, 0x2B,
+		0xA5, 0x4F, 0xF5, 0x3A, 0x5F, 0x1D, 0x36, 0xF1,
+		0x51, 0x0E, 0x52, 0x7F, 0xAD, 0xE6, 0x82, 0xD1,
+		0x9B, 0x05, 0x68, 0x8C, 0x2B, 0x3E, 0x6C, 0x1F,
+		0x1F, 0x83, 0xD9, 0xAB, 0xFB, 0x41, 0xBD, 0x6B,
+		0x5B, 0xE0, 0xCD, 0x19, 0x13, 0x7E, 0x21, 0x79,
+		/* 128 bit counter total message bit length */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 };
+
+	/* allocate one page stckf buffer */
+	pg = (u8 *) __get_free_page(GFP_KERNEL);
 	if (!pg) {
 		prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
 		return -ENOMEM;
 	}
 
+	/* fill the ebuf in chunks of 64 byte each */
 	while (nbytes) {
-		/* fill pages with urandom bytes */
-		get_random_bytes(pg, 2*PAGE_SIZE);
-		/* exor pages with 1024 stckf values */
-		for (n = 0; n < 2 * PAGE_SIZE / sizeof(u64); n++) {
-			u64 *p = ((u64 *)pg) + n;
+		/* fill lower 2k with urandom bytes */
+		get_random_bytes(pg, PAGE_SIZE / 2);
+		/* exor upper 2k with 512 stckf values, offset 4 bytes each */
+		for (n = 0; n < 512; n++) {
+			int offset = (PAGE_SIZE / 2) + (n * 4) - 4;
+			u64 *p = (u64 *)(pg + offset);
 			*p ^= get_tod_clock_fast();
 		}
-		n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash);
-		if (n < sizeof(hash))
-			h = hash;
-		else
-			h = ebuf;
-		/* hash over the filled pages */
-		cpacf_kimd(CPACF_KIMD_SHA_512, h, pg, 2*PAGE_SIZE);
-		if (n < sizeof(hash))
-			memcpy(ebuf, hash, n);
+		/* hash over the filled page */
+		cpacf_klmd(CPACF_KLMD_SHA_512, pblock, pg, PAGE_SIZE);
+		n = (nbytes < 64) ? nbytes : 64;
+		memcpy(ebuf, pblock, n);
 		ret += n;
 		ebuf += n;
 		nbytes -= n;
 	}
 
-	free_pages((unsigned long)pg, 1);
+	memzero_explicit(pblock, sizeof(pblock));
+	memzero_explicit(pg, PAGE_SIZE);
+	free_page((unsigned long)pg);
 	return ret;
 }
 
@@ -345,7 +367,7 @@ static int __init prng_sha512_selftest(void)
 static int __init prng_sha512_instantiate(void)
 {
 	int ret, datalen;
-	u8 seed[64 + 32 + 16];
+	u8 seed[128 + 16];
 
 	pr_debug("prng runs in SHA-512 mode "
 		 "with chunksize=%d and reseed_limit=%u\n",
@@ -368,16 +390,22 @@ static int __init prng_sha512_instantiate(void)
 	if (ret)
 		goto outfree;
 
-	/* generate initial seed bytestring, with 256 + 128 bits entropy */
-	ret = generate_entropy(seed, 64 + 32);
-	if (ret != 64 + 32)
+	/*
+	 * generate initial seed bytestring, we need at least
+	 * 256 + 128 bits entropy. However, the generate_entropy()
+	 * function anyway works in 64 byte junks so we pull
+	 * 2*64 bytes here.
+	 */
+	ret = generate_entropy(seed, 128);
+	if (ret != 128)
 		goto outfree;
 	/* followed by 16 bytes of unique nonce */
-	get_tod_clock_ext(seed + 64 + 32);
+	get_tod_clock_ext(seed + 128);
 
 	/* initial seed of the prno drng */
 	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
 		   &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+	memzero_explicit(seed, sizeof(seed));
 
 	/* if fips mode is enabled, generate a first block of random
 	   bytes for the FIPS 140-2 Conditional Self Test */
@@ -416,6 +444,7 @@ static int prng_sha512_reseed(void)
 	/* do a reseed of the prno drng with this bytestring */
 	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
 		   &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+	memzero_explicit(seed, sizeof(seed));
 
 	return 0;
 }
@@ -592,6 +621,7 @@ static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
 			ret = -EFAULT;
 			break;
 		}
+		memzero_explicit(p, n);
 		ubuf += n;
 		nbytes -= n;
 		ret += n;
-- 
GitLab


From 769f020b6c9283d61c59de3559375ec7e961a424 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.ibm.com>
Date: Tue, 16 Apr 2019 13:41:26 +0200
Subject: [PATCH 1527/1861] s390/crypto: use TRNG for seeding/reseeding

With the z14 machine there came also a CPACF hardware extension
which provides a True Random Number Generator. This TRNG can
be accessed with a new subfunction code within the CPACF prno
instruction and provides random data with very high entropy.

So if there is a TRNG available, let's use it for initial seeding
and reseeding instead of the current implementation which tries
to generate entropy based on stckf (store clock fast) jitters.

For details about the amount of data needed and pulled for
seeding and reseeding there can be explaining comments in the
code found.

Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/crypto/prng.c | 67 +++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 20 deletions(-)

diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index fba37906045b1..12cca467af7d1 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -61,6 +61,7 @@ static unsigned int prng_reseed_limit;
 module_param_named(reseed_limit, prng_reseed_limit, int, 0);
 MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
 
+static bool trng_available;
 
 /*
  * Any one who considers arithmetical methods of producing random digits is,
@@ -366,7 +367,7 @@ static int __init prng_sha512_selftest(void)
 
 static int __init prng_sha512_instantiate(void)
 {
-	int ret, datalen;
+	int ret, datalen, seedlen;
 	u8 seed[128 + 16];
 
 	pr_debug("prng runs in SHA-512 mode "
@@ -390,21 +391,35 @@ static int __init prng_sha512_instantiate(void)
 	if (ret)
 		goto outfree;
 
-	/*
-	 * generate initial seed bytestring, we need at least
-	 * 256 + 128 bits entropy. However, the generate_entropy()
-	 * function anyway works in 64 byte junks so we pull
-	 * 2*64 bytes here.
-	 */
-	ret = generate_entropy(seed, 128);
-	if (ret != 128)
-		goto outfree;
-	/* followed by 16 bytes of unique nonce */
-	get_tod_clock_ext(seed + 128);
+	/* generate initial seed, we need at least  256 + 128 bits entropy. */
+	if (trng_available) {
+		/*
+		 * Trng available, so use it. The trng works in chunks of
+		 * 32 bytes and produces 100% entropy. So we pull 64 bytes
+		 * which gives us 512 bits entropy.
+		 */
+		seedlen = 2 * 32;
+		cpacf_trng(NULL, 0, seed, seedlen);
+	} else {
+		/*
+		 * No trng available, so use the generate_entropy() function.
+		 * This function works in 64 byte junks and produces
+		 * 50% entropy. So we pull 2*64 bytes which gives us 512 bits
+		 * of entropy.
+		 */
+		seedlen = 2 * 64;
+		ret = generate_entropy(seed, seedlen);
+		if (ret != seedlen)
+			goto outfree;
+	}
 
-	/* initial seed of the prno drng */
+	/* append the seed by 16 bytes of unique nonce */
+	get_tod_clock_ext(seed + seedlen);
+	seedlen += 16;
+
+	/* now initial seed of the prno drng */
 	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
-		   &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+		   &prng_data->prnows, NULL, 0, seed, seedlen);
 	memzero_explicit(seed, sizeof(seed));
 
 	/* if fips mode is enabled, generate a first block of random
@@ -433,17 +448,25 @@ static void prng_sha512_deinstantiate(void)
 
 static int prng_sha512_reseed(void)
 {
-	int ret;
+	int ret, seedlen;
 	u8 seed[64];
 
-	/* fetch 256 bits of fresh entropy */
-	ret = generate_entropy(seed, sizeof(seed));
-	if (ret != sizeof(seed))
-		return ret;
+	/* We need at least 256 bits of fresh entropy for reseeding */
+	if (trng_available) {
+		/* trng produces 256 bits entropy in 32 bytes */
+		seedlen = 32;
+		cpacf_trng(NULL, 0, seed, seedlen);
+	} else {
+		/* generate_entropy() produces 256 bits entropy in 64 bytes */
+		seedlen = 64;
+		ret = generate_entropy(seed, seedlen);
+		if (ret != sizeof(seed))
+			return ret;
+	}
 
 	/* do a reseed of the prno drng with this bytestring */
 	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
-		   &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+		   &prng_data->prnows, NULL, 0, seed, seedlen);
 	memzero_explicit(seed, sizeof(seed));
 
 	return 0;
@@ -803,6 +826,10 @@ static int __init prng_init(void)
 	if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
 		return -EOPNOTSUPP;
 
+	/* check if TRNG subfunction is available */
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+		trng_available = true;
+
 	/* choose prng mode */
 	if (prng_mode != PRNG_MODE_TDES) {
 		/* check for MSA5 support for PRNO operations */
-- 
GitLab


From a8fd61688dfad6fdce95fa64cacd8a66595697b8 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 5 Feb 2019 16:15:01 +0100
Subject: [PATCH 1528/1861] s390: report new CPU capabilities

Add hardware capability bits and features tags to /proc/cpuinfo
for 4 new CPU features:
  "Vector-Enhancements Facility 2" (tag "vxe2", hwcap 2^15)
  "Vector-Packed-Decimal-Enhancement Facility" (tag "vxp", hwcap 2^16)
  "Enhanced-Sort Facility" (tag "sort", hwcap 2^17)
  "Deflate-Conversion Facility" (tag "dflt", hwcap 2^18)

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/elf.h  | 4 ++++
 arch/s390/kernel/processor.c | 3 ++-
 arch/s390/kernel/setup.c     | 8 ++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index f74639a05f0ff..5775fc22f4107 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -107,6 +107,10 @@
 #define HWCAP_S390_VXRS_BCD	4096
 #define HWCAP_S390_VXRS_EXT	8192
 #define HWCAP_S390_GS		16384
+#define HWCAP_S390_VXRS_EXT2	32768
+#define HWCAP_S390_VXRS_PDE	65536
+#define HWCAP_S390_SORT		131072
+#define HWCAP_S390_DFLT		262144
 
 /* Internal bits, not exposed via elf */
 #define HWCAP_INT_SIE		1UL
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 6fe2e1875058b..5de13307b7038 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -109,7 +109,8 @@ static void show_cpu_summary(struct seq_file *m, void *v)
 {
 	static const char *hwcap_str[] = {
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
-		"edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
+		"edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs",
+		"vxe2", "vxp", "sort", "dflt"
 	};
 	static const char * const int_hwcap_str[] = {
 		"sie"
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 70197a68e6fab..12d136e567c48 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -919,7 +919,15 @@ static int __init setup_hwcaps(void)
 			elf_hwcap |= HWCAP_S390_VXRS_EXT;
 		if (test_facility(135))
 			elf_hwcap |= HWCAP_S390_VXRS_BCD;
+		if (test_facility(148))
+			elf_hwcap |= HWCAP_S390_VXRS_EXT2;
+		if (test_facility(152))
+			elf_hwcap |= HWCAP_S390_VXRS_PDE;
 	}
+	if (test_facility(150))
+		elf_hwcap |= HWCAP_S390_SORT;
+	if (test_facility(151))
+		elf_hwcap |= HWCAP_S390_DFLT;
 
 	/*
 	 * Guarded storage support HWCAP_S390_GS is bit 12.
-- 
GitLab


From 2557fabd6e29f349bfa0ac13f38ac98aa5eafc74 Mon Sep 17 00:00:00 2001
From: Lijun Ou <oulijun@huawei.com>
Date: Tue, 23 Apr 2019 17:30:26 +0800
Subject: [PATCH 1529/1861] RDMA/hns: Bugfix for mapping user db

When the maximum send wr delivered by the user is zero, the qp does not
have a sq.

When allocating the sq db buffer to store the user sq pi pointer and map
it to the kernel mode, max_send_wr is used as the trigger condition, while
the kernel does not consider the max_send_wr trigger condition when
mapmping db. It will cause sq record doorbell map fail and create qp fail.

The failed print information as follows:

 hns3 0000:7d:00.1: Send cmd: tail - 418, opcode - 0x8504, flag - 0x0011, retval - 0x0000
 hns3 0000:7d:00.1: Send cmd: 0xe59dc000 0x00000000 0x00000000 0x00000000 0x00000116 0x0000ffff
 hns3 0000:7d:00.1: sq record doorbell map failed!
 hns3 0000:7d:00.1: Create RC QP failed

Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space")
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 66cdf625534ff..60cf9f03e9414 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -533,7 +533,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 
 static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
 {
-	if (attr->qp_type == IB_QPT_XRC_TGT)
+	if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
 		return 0;
 
 	return 1;
-- 
GitLab


From be604c616ca71cbf5c860d0cfa4595128ab74189 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 24 Apr 2019 09:55:37 -0700
Subject: [PATCH 1530/1861] arm64: sysreg: Make mrs_s and msr_s macros work
 with Clang and LTO

Clang's integrated assembler does not allow assembly macros defined
in one inline asm block using the .macro directive to be used across
separate asm blocks. LLVM developers consider this a feature and not a
bug, recommending code refactoring:

  https://bugs.llvm.org/show_bug.cgi?id=19749

As binutils doesn't allow macros to be redefined, this change uses
UNDEFINE_MRS_S and UNDEFINE_MSR_S to define corresponding macros
in-place and workaround gcc and clang limitations on redefining macros
across different assembler blocks.

Specifically, the current state after preprocessing looks like this:

asm volatile(".macro mXX_s ... .endm");
void f()
{
	asm volatile("mXX_s a, b");
}

With GCC, it gives macro redefinition error because sysreg.h is included
in multiple source files, and assembler code for all of them is later
combined for LTO (I've seen an intermediate file with hundreds of
identical definitions).

With clang, it gives macro undefined error because clang doesn't allow
sharing macros between inline asm statements.

I also seem to remember catching another sort of undefined error with
GCC due to reordering of macro definition asm statement and generated
asm code for function that uses the macro.

The solution with defining and undefining for each use, while certainly
not elegant, satisfies both GCC and clang, LTO and non-LTO.

Co-developed-by: Alex Matveev <alxmtvv@gmail.com>
Co-developed-by: Yury Norov <ynorov@caviumnetworks.com>
Co-developed-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/irqflags.h |  8 +++---
 arch/arm64/include/asm/kvm_hyp.h  |  4 +--
 arch/arm64/include/asm/sysreg.h   | 45 ++++++++++++++++++++++---------
 3 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 43d8366c1e878..6299631890857 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -43,7 +43,7 @@ static inline void arch_local_irq_enable(void)
 	asm volatile(ALTERNATIVE(
 		"msr	daifclr, #2		// arch_local_irq_enable\n"
 		"nop",
-		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
+		__msr_s(SYS_ICC_PMR_EL1, "%0")
 		"dsb	sy",
 		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
@@ -55,7 +55,7 @@ static inline void arch_local_irq_disable(void)
 {
 	asm volatile(ALTERNATIVE(
 		"msr	daifset, #2		// arch_local_irq_disable",
-		"msr_s  " __stringify(SYS_ICC_PMR_EL1) ", %0",
+		__msr_s(SYS_ICC_PMR_EL1, "%0"),
 		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
 		: "r" ((unsigned long) GIC_PRIO_IRQOFF)
@@ -86,7 +86,7 @@ static inline unsigned long arch_local_save_flags(void)
 			"mov	%0, %1\n"
 			"nop\n"
 			"nop",
-			"mrs_s	%0, " __stringify(SYS_ICC_PMR_EL1) "\n"
+			__mrs_s("%0", SYS_ICC_PMR_EL1)
 			"ands	%1, %1, " __stringify(PSR_I_BIT) "\n"
 			"csel	%0, %0, %2, eq",
 			ARM64_HAS_IRQ_PRIO_MASKING)
@@ -116,7 +116,7 @@ static inline void arch_local_irq_restore(unsigned long flags)
 	asm volatile(ALTERNATIVE(
 			"msr	daif, %0\n"
 			"nop",
-			"msr_s	" __stringify(SYS_ICC_PMR_EL1) ", %0\n"
+			__msr_s(SYS_ICC_PMR_EL1, "%0")
 			"dsb	sy",
 			ARM64_HAS_IRQ_PRIO_MASKING)
 		: "+r" (flags)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 4da765f2cca58..c3060833b7a5a 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -30,7 +30,7 @@
 	({								\
 		u64 reg;						\
 		asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
-					 "mrs_s %0, " __stringify(r##vh),\
+					 __mrs_s("%0", r##vh),		\
 					 ARM64_HAS_VIRT_HOST_EXTN)	\
 			     : "=r" (reg));				\
 		reg;							\
@@ -40,7 +40,7 @@
 	do {								\
 		u64 __val = (u64)(v);					\
 		asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
-					 "msr_s " __stringify(r##vh) ", %x0",\
+					 __msr_s(r##vh, "%x0"),		\
 					 ARM64_HAS_VIRT_HOST_EXTN)	\
 					 : : "rZ" (__val));		\
 	} while (0)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 29c6559693204..3f7b917e8f3a7 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -760,20 +760,39 @@
 #include <linux/build_bug.h>
 #include <linux/types.h>
 
-asm(
-"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
-"	.equ	.L__reg_num_x\\num, \\num\n"
-"	.endr\n"
+#define __DEFINE_MRS_MSR_S_REGNUM				\
+"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
+"	.equ	.L__reg_num_x\\num, \\num\n"			\
+"	.endr\n"						\
 "	.equ	.L__reg_num_xzr, 31\n"
-"\n"
-"	.macro	mrs_s, rt, sreg\n"
-	__emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt))
+
+#define DEFINE_MRS_S						\
+	__DEFINE_MRS_MSR_S_REGNUM				\
+"	.macro	mrs_s, rt, sreg\n"				\
+	__emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt))	\
 "	.endm\n"
-"\n"
-"	.macro	msr_s, sreg, rt\n"
-	__emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt))
+
+#define DEFINE_MSR_S						\
+	__DEFINE_MRS_MSR_S_REGNUM				\
+"	.macro	msr_s, sreg, rt\n"				\
+	__emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt))	\
 "	.endm\n"
-);
+
+#define UNDEFINE_MRS_S						\
+"	.purgem	mrs_s\n"
+
+#define UNDEFINE_MSR_S						\
+"	.purgem	msr_s\n"
+
+#define __mrs_s(v, r)						\
+	DEFINE_MRS_S						\
+"	mrs_s " v ", " __stringify(r) "\n"			\
+	UNDEFINE_MRS_S
+
+#define __msr_s(r, v)						\
+	DEFINE_MSR_S						\
+"	msr_s " __stringify(r) ", " v "\n"			\
+	UNDEFINE_MSR_S
 
 /*
  * Unlike read_cpuid, calls to read_sysreg are never expected to be
@@ -801,13 +820,13 @@ asm(
  */
 #define read_sysreg_s(r) ({						\
 	u64 __val;							\
-	asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val));	\
+	asm volatile(__mrs_s("%0", r) : "=r" (__val));			\
 	__val;								\
 })
 
 #define write_sysreg_s(v, r) do {					\
 	u64 __val = (u64)(v);						\
-	asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val));	\
+	asm volatile(__msr_s(r, "%x0") : : "rZ" (__val));		\
 } while (0)
 
 /*
-- 
GitLab


From 8de9930a4618811edfaebc4981a9fafff2af9170 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 25 Apr 2019 16:30:34 +0200
Subject: [PATCH 1531/1861] Revert "EDAC/amd64: Support more than two
 controllers for chip select handling"

This reverts commit 0a227af521d6df5286550b62f4b591417170b4ea.

Unfortunately, this commit caused wrong detection of chip select sizes
on some F17h client machines:

  --- 00-rc6+     2019-02-14 14:28:03.126622904 +0100
  +++ 01-rc4+     2019-04-14 21:06:16.060614790 +0200
   EDAC amd64: MC: 0:     0MB 1:     0MB
  -EDAC amd64: MC: 2: 16383MB 3: 16383MB
  +EDAC amd64: MC: 2:     0MB 3: 2097151MB
   EDAC amd64: MC: 4:     0MB 5:     0MB
   EDAC amd64: MC: 6:     0MB 7:     0MB
   EDAC MC: UMC1 chip selects:
   EDAC amd64: MC: 0:     0MB 1:     0MB
  -EDAC amd64: MC: 2: 16383MB 3: 16383MB
  +EDAC amd64: MC: 2:     0MB 3: 2097151MB
   EDAC amd64: MC: 4:     0MB 5:     0MB
   EDAC amd64: MC: 6:     0MB 7:     0M

Revert it for now until it has been solved properly.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
---
 drivers/edac/amd64_edac.c | 113 ++++++++++++++++++--------------------
 drivers/edac/amd64_edac.h |   5 +-
 2 files changed, 56 insertions(+), 62 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 4c0239aeff2fc..e2a99466faaa8 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -941,94 +941,89 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
 	} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
-	} else if (pvt->fam >= 0x17) {
-		int umc;
-
-		for_each_umc(umc) {
-			pvt->csels[umc].b_cnt = 8;
-			pvt->csels[umc].m_cnt = 4;
-		}
-
 	} else {
 		pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
 		pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
 	}
 }
 
-static void read_umc_base_mask(struct amd64_pvt *pvt)
-{
-	int cs, umc;
-
-	for_each_umc(umc) {
-		u32 umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR;
-		u32 umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
-
-		for_each_chip_select(cs, 0, pvt) {
-			u32 *base = &pvt->csels[umc].csbases[cs];
-			u32 *mask = &pvt->csels[umc].csmasks[cs];
-
-			u32 base_reg = umc_base_reg + (cs * 4);
-			u32 mask_reg = umc_mask_reg + ((cs >> 1) * 4);
-
-			if (!amd_smn_read(pvt->mc_node_id, base_reg, base))
-				edac_dbg(0, "  DCSB%d[%d]=0x%08x reg: 0x%x\n",
-					 umc, cs, *base, base_reg);
-
-			if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask))
-				edac_dbg(0, "    DCSM%d[%d]=0x%08x reg: 0x%x\n",
-					 umc, cs, *mask, mask_reg);
-		}
-	}
-}
-
 /*
  * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
  */
 static void read_dct_base_mask(struct amd64_pvt *pvt)
 {
-	int cs;
+	int base_reg0, base_reg1, mask_reg0, mask_reg1, cs;
 
 	prep_chip_selects(pvt);
 
-	if (pvt->umc)
-		return read_umc_base_mask(pvt);
+	if (pvt->umc) {
+		base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR;
+		base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR;
+		mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK;
+		mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK;
+	} else {
+		base_reg0 = DCSB0;
+		base_reg1 = DCSB1;
+		mask_reg0 = DCSM0;
+		mask_reg1 = DCSM1;
+	}
 
 	for_each_chip_select(cs, 0, pvt) {
-		int reg0   = DCSB0 + (cs * 4);
-		int reg1   = DCSB1 + (cs * 4);
+		int reg0   = base_reg0 + (cs * 4);
+		int reg1   = base_reg1 + (cs * 4);
 		u32 *base0 = &pvt->csels[0].csbases[cs];
 		u32 *base1 = &pvt->csels[1].csbases[cs];
 
-		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
-			edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
-				 cs, *base0, reg0);
+		if (pvt->umc) {
+			if (!amd_smn_read(pvt->mc_node_id, reg0, base0))
+				edac_dbg(0, "  DCSB0[%d]=0x%08x reg: 0x%x\n",
+					 cs, *base0, reg0);
 
-		if (pvt->fam == 0xf)
-			continue;
+			if (!amd_smn_read(pvt->mc_node_id, reg1, base1))
+				edac_dbg(0, "  DCSB1[%d]=0x%08x reg: 0x%x\n",
+					 cs, *base1, reg1);
+		} else {
+			if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
+				edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
+					 cs, *base0, reg0);
+
+			if (pvt->fam == 0xf)
+				continue;
 
-		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
-			edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
-				 cs, *base1, (pvt->fam == 0x10) ? reg1
-							: reg0);
+			if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
+				edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
+					 cs, *base1, (pvt->fam == 0x10) ? reg1
+								: reg0);
+		}
 	}
 
 	for_each_chip_select_mask(cs, 0, pvt) {
-		int reg0   = DCSM0 + (cs * 4);
-		int reg1   = DCSM1 + (cs * 4);
+		int reg0   = mask_reg0 + (cs * 4);
+		int reg1   = mask_reg1 + (cs * 4);
 		u32 *mask0 = &pvt->csels[0].csmasks[cs];
 		u32 *mask1 = &pvt->csels[1].csmasks[cs];
 
-		if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
-			edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
-				 cs, *mask0, reg0);
+		if (pvt->umc) {
+			if (!amd_smn_read(pvt->mc_node_id, reg0, mask0))
+				edac_dbg(0, "    DCSM0[%d]=0x%08x reg: 0x%x\n",
+					 cs, *mask0, reg0);
 
-		if (pvt->fam == 0xf)
-			continue;
+			if (!amd_smn_read(pvt->mc_node_id, reg1, mask1))
+				edac_dbg(0, "    DCSM1[%d]=0x%08x reg: 0x%x\n",
+					 cs, *mask1, reg1);
+		} else {
+			if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
+				edac_dbg(0, "    DCSM0[%d]=0x%08x reg: F2x%x\n",
+					 cs, *mask0, reg0);
 
-		if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
-			edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
-				 cs, *mask1, (pvt->fam == 0x10) ? reg1
-							: reg0);
+			if (pvt->fam == 0xf)
+				continue;
+
+			if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
+				edac_dbg(0, "    DCSM1[%d]=0x%08x reg: F2x%x\n",
+					 cs, *mask1, (pvt->fam == 0x10) ? reg1
+								: reg0);
+		}
 	}
 }
 
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 4dce6a2ac75f9..8f66472f7adc2 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -96,7 +96,6 @@
 /* Hardware limit on ChipSelect rows per MC and processors per system */
 #define NUM_CHIPSELECTS			8
 #define DRAM_RANGES			8
-#define NUM_CONTROLLERS			8
 
 #define ON true
 #define OFF false
@@ -352,8 +351,8 @@ struct amd64_pvt {
 	u32 dbam0;		/* DRAM Base Address Mapping reg for DCT0 */
 	u32 dbam1;		/* DRAM Base Address Mapping reg for DCT1 */
 
-	/* one for each DCT/UMC */
-	struct chip_select csels[NUM_CONTROLLERS];
+	/* one for each DCT */
+	struct chip_select csels[2];
 
 	/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
 	struct dram_range ranges[DRAM_RANGES];
-- 
GitLab


From b88c9f4129dcec941e5a26508e991c08051ed1ac Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Thu, 25 Apr 2019 16:28:37 +0300
Subject: [PATCH 1532/1861] clk: Add missing stubs for a few functions

Compilation fails if any of undeclared clk_set_*() functions are in use
and CONFIG_HAVE_CLK=n.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/include/linux/clk.h b/include/linux/clk.h
index d8bc1a856b39c..f689fc58d7be3 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -811,6 +811,22 @@ static inline bool clk_has_parent(struct clk *clk, struct clk *parent)
 	return true;
 }
 
+static inline int clk_set_rate_range(struct clk *clk, unsigned long min,
+				     unsigned long max)
+{
+	return 0;
+}
+
+static inline int clk_set_min_rate(struct clk *clk, unsigned long rate)
+{
+	return 0;
+}
+
+static inline int clk_set_max_rate(struct clk *clk, unsigned long rate)
+{
+	return 0;
+}
+
 static inline int clk_set_parent(struct clk *clk, struct clk *parent)
 {
 	return 0;
-- 
GitLab


From a860fa7b96e1a1c974556327aa1aee852d434c21 Mon Sep 17 00:00:00 2001
From: Xie XiuQi <xiexiuqi@huawei.com>
Date: Sat, 20 Apr 2019 16:34:16 +0800
Subject: [PATCH 1533/1861] sched/numa: Fix a possible divide-by-zero

sched_clock_cpu() may not be consistent between CPUs. If a task
migrates to another CPU, then se.exec_start is set to that CPU's
rq_clock_task() by update_stats_curr_start(). Specifically, the new
value might be before the old value due to clock skew.

So then if in numa_get_avg_runtime() the expression:

  'now - p->last_task_numa_placement'

ends up as -1, then the divider '*period + 1' in task_numa_placement()
is 0 and things go bang. Similar to update_curr(), check if time goes
backwards to avoid this.

[ peterz: Wrote new changelog. ]
[ mingo: Tweaked the code comment. ]

Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: cj.chengjian@huawei.com
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20190425080016.GX11158@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a4d9e14bf1389..35f3ea3750844 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2007,6 +2007,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
 	if (p->last_task_numa_placement) {
 		delta = runtime - p->last_sum_exec_runtime;
 		*period = now - p->last_task_numa_placement;
+
+		/* Avoid time going backwards, prevent potential divide error: */
+		if (unlikely((s64)*period < 0))
+			*period = 0;
 	} else {
 		delta = p->se.avg.load_sum;
 		*period = LOAD_AVG_MAX;
-- 
GitLab


From 6eb4f08293e971cb1b7b867c7fe994c244b91460 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Thu, 25 Apr 2019 10:27:52 -0700
Subject: [PATCH 1534/1861] x86/apic: Unify duplicated local apic timer
 clockevent initialization

Local APIC timer clockevent parameters can be calculated based on platform
specific methods. However the code is mostly duplicated with the interrupt
based calibration. The commit which increased the max_delta parameter
updated only one place and made the implementations diverge.

Unify it to prevent further damage.

[ tglx: Rename function to lapic_init_clockevent() and adjust changelog a bit ]

Fixes: 4aed89d6b515 ("x86, lapic-timer: Increase the max_delta to 31 bits")
Reported-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/1556213272-63568-1-git-send-email-jacob.jun.pan@linux.intel.com
---
 arch/x86/kernel/apic/apic.c | 57 ++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b7bcdd7816513..ab6af775f06c2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -802,6 +802,24 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
 	return 0;
 }
 
+static int __init lapic_init_clockevent(void)
+{
+	if (!lapic_timer_frequency)
+		return -1;
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+					TICK_NSEC, lapic_clockevent.shift);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
+	lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ticks = 0xF;
+
+	return 0;
+}
+
 static int __init calibrate_APIC_clock(void)
 {
 	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
@@ -810,25 +828,21 @@ static int __init calibrate_APIC_clock(void)
 	long delta, deltatsc;
 	int pm_referenced = 0;
 
-	/**
-	 * check if lapic timer has already been calibrated by platform
-	 * specific routine, such as tsc calibration code. if so, we just fill
+	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+		return 0;
+
+	/*
+	 * Check if lapic timer has already been calibrated by platform
+	 * specific routine, such as tsc calibration code. If so just fill
 	 * in the clockevent structure and return.
 	 */
-
-	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
-		return 0;
-	} else if (lapic_timer_frequency) {
+	if (!lapic_init_clockevent()) {
 		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
-				lapic_timer_frequency);
-		lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
-					TICK_NSEC, lapic_clockevent.shift);
-		lapic_clockevent.max_delta_ns =
-			clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-		lapic_clockevent.max_delta_ticks = 0x7FFFFF;
-		lapic_clockevent.min_delta_ns =
-			clockevent_delta2ns(0xF, &lapic_clockevent);
-		lapic_clockevent.min_delta_ticks = 0xF;
+			    lapic_timer_frequency);
+		/*
+		 * Direct calibration methods must have an always running
+		 * local APIC timer, no need for broadcast timer.
+		 */
 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 		return 0;
 	}
@@ -869,17 +883,8 @@ static int __init calibrate_APIC_clock(void)
 	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
 					&delta, &deltatsc);
 
-	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
-				       lapic_clockevent.shift);
-	lapic_clockevent.max_delta_ns =
-		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
-	lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
-	lapic_clockevent.min_delta_ns =
-		clockevent_delta2ns(0xF, &lapic_clockevent);
-	lapic_clockevent.min_delta_ticks = 0xF;
-
 	lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+	lapic_init_clockevent();
 
 	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
 	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
-- 
GitLab


From 680e6ffa15103ab610c0fc1241d2f98c801b13e2 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 23:30:54 +0800
Subject: [PATCH 1535/1861] Documentation: add Linux ACPI to Sphinx TOC tree

Add below index.rst files for ACPI subsystem. More docs will be added later.
  o admin-guide/acpi/index.rst
  o driver-api/acpi/index.rst
  o firmware-guide/index.rst

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/acpi/index.rst    | 10 ++++++++++
 Documentation/admin-guide/index.rst         |  1 +
 Documentation/driver-api/acpi/index.rst     |  7 +++++++
 Documentation/driver-api/index.rst          |  1 +
 Documentation/firmware-guide/acpi/index.rst |  9 +++++++++
 Documentation/firmware-guide/index.rst      | 13 +++++++++++++
 Documentation/index.rst                     | 10 ++++++++++
 7 files changed, 51 insertions(+)
 create mode 100644 Documentation/admin-guide/acpi/index.rst
 create mode 100644 Documentation/driver-api/acpi/index.rst
 create mode 100644 Documentation/firmware-guide/acpi/index.rst
 create mode 100644 Documentation/firmware-guide/index.rst

diff --git a/Documentation/admin-guide/acpi/index.rst b/Documentation/admin-guide/acpi/index.rst
new file mode 100644
index 0000000000000..3e041206089d1
--- /dev/null
+++ b/Documentation/admin-guide/acpi/index.rst
@@ -0,0 +1,10 @@
+============
+ACPI Support
+============
+
+Here we document in detail how to interact with various mechanisms in
+the Linux ACPI support.
+
+.. toctree::
+   :maxdepth: 1
+
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 0a491676685e1..5b8286fdd91ba 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -77,6 +77,7 @@ configure specific aspects of kernel behavior to your liking.
    LSM/index
    mm/index
    perf-security
+   acpi/index
 
 .. only::  subproject and html
 
diff --git a/Documentation/driver-api/acpi/index.rst b/Documentation/driver-api/acpi/index.rst
new file mode 100644
index 0000000000000..898b0c60671ab
--- /dev/null
+++ b/Documentation/driver-api/acpi/index.rst
@@ -0,0 +1,7 @@
+============
+ACPI Support
+============
+
+.. toctree::
+   :maxdepth: 2
+
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index c0b600ed99613..aa87075c78460 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -56,6 +56,7 @@ available subsections can be seen below.
    slimbus
    soundwire/index
    fpga/index
+   acpi/index
 
 .. only::  subproject and html
 
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
new file mode 100644
index 0000000000000..0ec7d072ba226
--- /dev/null
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+ACPI Support
+============
+
+.. toctree::
+   :maxdepth: 1
+
diff --git a/Documentation/firmware-guide/index.rst b/Documentation/firmware-guide/index.rst
new file mode 100644
index 0000000000000..5355784ca0a21
--- /dev/null
+++ b/Documentation/firmware-guide/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+The Linux kernel firmware guide
+===============================
+
+This section describes the ACPI subsystem in Linux from firmware perspective.
+
+.. toctree::
+   :maxdepth: 1
+
+   acpi/index
+
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 80a421cb935e7..fdfa85c56a50f 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -35,6 +35,16 @@ trying to get it to work optimally on a given system.
 
    admin-guide/index
 
+Firmware-related documentation
+------------------------------
+The following holds information on the kernel's expectations regarding the
+platform firmwares.
+
+.. toctree::
+   :maxdepth: 2
+
+   firmware-guide/index
+
 Application-developer documentation
 -----------------------------------
 
-- 
GitLab


From 8a2fe04b446f909ffffadb84b886199edbe408c2 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:44 +0800
Subject: [PATCH 1536/1861] Documentation: ACPI: move namespace.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/index.rst   |   1 +
 .../acpi/namespace.rst}                       | 294 +++++++++---------
 2 files changed, 154 insertions(+), 141 deletions(-)
 rename Documentation/{acpi/namespace.txt => firmware-guide/acpi/namespace.rst} (56%)

diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 0ec7d072ba226..210ad8acd6df5 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -7,3 +7,4 @@ ACPI Support
 .. toctree::
    :maxdepth: 1
 
+   namespace
diff --git a/Documentation/acpi/namespace.txt b/Documentation/firmware-guide/acpi/namespace.rst
similarity index 56%
rename from Documentation/acpi/namespace.txt
rename to Documentation/firmware-guide/acpi/namespace.rst
index 1860cb3865c6b..835521baeb897 100644
--- a/Documentation/acpi/namespace.txt
+++ b/Documentation/firmware-guide/acpi/namespace.rst
@@ -1,85 +1,90 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===================================================
 ACPI Device Tree - Representation of ACPI Namespace
+===================================================
 
-Copyright (C) 2013, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
+:Copyright: |copy| 2013, Intel Corporation
 
+:Author: Lv Zheng <lv.zheng@intel.com>
 
-Abstract:
+:Credit:   Thanks for the help from Zhang Rui <rui.zhang@intel.com> and
+           Rafael J.Wysocki <rafael.j.wysocki@intel.com>.
 
+Abstract
+========
 The Linux ACPI subsystem converts ACPI namespace objects into a Linux
 device tree under the /sys/devices/LNXSYSTEM:00 and updates it upon
-receiving ACPI hotplug notification events.  For each device object in this
-hierarchy there is a corresponding symbolic link in the
+receiving ACPI hotplug notification events.  For each device object
+in this hierarchy there is a corresponding symbolic link in the
 /sys/bus/acpi/devices.
+
 This document illustrates the structure of the ACPI device tree.
 
+ACPI Definition Blocks
+======================
+
+The ACPI firmware sets up RSDP (Root System Description Pointer) in the
+system memory address space pointing to the XSDT (Extended System
+Description Table).  The XSDT always points to the FADT (Fixed ACPI
+Description Table) using its first entry, the data within the FADT
+includes various fixed-length entries that describe fixed ACPI features
+of the hardware.  The FADT contains a pointer to the DSDT
+(Differentiated System Descripition Table).  The XSDT also contains
+entries pointing to possibly multiple SSDTs (Secondary System
+Description Table).
+
+The DSDT and SSDT data is organized in data structures called definition
+blocks that contain definitions of various objects, including ACPI
+control methods, encoded in AML (ACPI Machine Language).  The data block
+of the DSDT along with the contents of SSDTs represents a hierarchical
+data structure called the ACPI namespace whose topology reflects the
+structure of the underlying hardware platform.
+
+The relationships between ACPI System Definition Tables described above
+are illustrated in the following diagram::
+
+   +---------+    +-------+    +--------+    +------------------------+
+   |  RSDP   | +->| XSDT  | +->|  FADT  |    |  +-------------------+ |
+   +---------+ |  +-------+ |  +--------+  +-|->|       DSDT        | |
+   | Pointer | |  | Entry |-+  | ...... |  | |  +-------------------+ |
+   +---------+ |  +-------+    | X_DSDT |--+ |  | Definition Blocks | |
+   | Pointer |-+  | ..... |    | ...... |    |  +-------------------+ |
+   +---------+    +-------+    +--------+    |  +-------------------+ |
+                  | Entry |------------------|->|       SSDT        | |
+                  +- - - -+                  |  +-------------------| |
+                  | Entry | - - - - - - - -+ |  | Definition Blocks | |
+                  +- - - -+                | |  +-------------------+ |
+                                          | |  +- - - - - - - - - -+ |
+                                          +-|->|       SSDT        | |
+                                             |  +-------------------+ |
+                                             |  | Definition Blocks | |
+                                             |  +- - - - - - - - - -+ |
+                                             +------------------------+
+                                                         |
+                                             OSPM Loading |
+                                                         \|/
+                                                   +----------------+
+                                                   | ACPI Namespace |
+                                                   +----------------+
+
+                  Figure 1. ACPI Definition Blocks
+
+.. note:: RSDP can also contain a pointer to the RSDT (Root System
+   Description Table).  Platforms provide RSDT to enable
+   compatibility with ACPI 1.0 operating systems.  The OS is expected
+   to use XSDT, if present.
+
+
+Example ACPI Namespace
+======================
+
+All definition blocks are loaded into a single namespace.  The namespace
+is a hierarchy of objects identified by names and paths.
+The following naming conventions apply to object names in the ACPI
+namespace:
 
-Credit:
-
-Thanks for the help from Zhang Rui <rui.zhang@intel.com> and Rafael J.
-Wysocki <rafael.j.wysocki@intel.com>.
-
-
-1. ACPI Definition Blocks
-
-   The ACPI firmware sets up RSDP (Root System Description Pointer) in the
-   system memory address space pointing to the XSDT (Extended System
-   Description Table).  The XSDT always points to the FADT (Fixed ACPI
-   Description Table) using its first entry, the data within the FADT
-   includes various fixed-length entries that describe fixed ACPI features
-   of the hardware.  The FADT contains a pointer to the DSDT
-   (Differentiated System Descripition Table).  The XSDT also contains
-   entries pointing to possibly multiple SSDTs (Secondary System
-   Description Table).
-
-   The DSDT and SSDT data is organized in data structures called definition
-   blocks that contain definitions of various objects, including ACPI
-   control methods, encoded in AML (ACPI Machine Language).  The data block
-   of the DSDT along with the contents of SSDTs represents a hierarchical
-   data structure called the ACPI namespace whose topology reflects the
-   structure of the underlying hardware platform.
-
-   The relationships between ACPI System Definition Tables described above
-   are illustrated in the following diagram.
-
-     +---------+    +-------+    +--------+    +------------------------+
-     |  RSDP   | +->| XSDT  | +->|  FADT  |    |  +-------------------+ |
-     +---------+ |  +-------+ |  +--------+  +-|->|       DSDT        | |
-     | Pointer | |  | Entry |-+  | ...... |  | |  +-------------------+ |
-     +---------+ |  +-------+    | X_DSDT |--+ |  | Definition Blocks | |
-     | Pointer |-+  | ..... |    | ...... |    |  +-------------------+ |
-     +---------+    +-------+    +--------+    |  +-------------------+ |
-                    | Entry |------------------|->|       SSDT        | |
-                    +- - - -+                  |  +-------------------| |
-                    | Entry | - - - - - - - -+ |  | Definition Blocks | |
-                    +- - - -+                | |  +-------------------+ |
-                                             | |  +- - - - - - - - - -+ |
-                                             +-|->|       SSDT        | |
-                                               |  +-------------------+ |
-                                               |  | Definition Blocks | |
-                                               |  +- - - - - - - - - -+ |
-                                               +------------------------+
-                                                           |
-                                              OSPM Loading |
-                                                          \|/
-                                                    +----------------+
-                                                    | ACPI Namespace |
-                                                    +----------------+
-
-                     Figure 1. ACPI Definition Blocks
-
-   NOTE: RSDP can also contain a pointer to the RSDT (Root System
-         Description Table).  Platforms provide RSDT to enable
-         compatibility with ACPI 1.0 operating systems.  The OS is expected
-         to use XSDT, if present.
-
-
-2. Example ACPI Namespace
-
-   All definition blocks are loaded into a single namespace.  The namespace
-   is a hierarchy of objects identified by names and paths.
-   The following naming conventions apply to object names in the ACPI
-   namespace:
    1. All names are 32 bits long.
    2. The first byte of a name must be one of 'A' - 'Z', '_'.
    3. Each of the remaining bytes of a name must be one of 'A' - 'Z', '0'
@@ -91,7 +96,7 @@ Wysocki <rafael.j.wysocki@intel.com>.
       (i.e. names prepended with '^' are relative to the parent of the
       current namespace node).
 
-   The figure below shows an example ACPI namespace.
+The figure below shows an example ACPI namespace::
 
    +------+
    | \    |                     Root
@@ -184,19 +189,20 @@ Wysocki <rafael.j.wysocki@intel.com>.
                      Figure 2. Example ACPI Namespace
 
 
-3. Linux ACPI Device Objects
+Linux ACPI Device Objects
+=========================
 
-   The Linux kernel's core ACPI subsystem creates struct acpi_device
-   objects for ACPI namespace objects representing devices, power resources
-   processors, thermal zones.  Those objects are exported to user space via
-   sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00.  The
-   format of their names is <bus_id:instance>, where 'bus_id' refers to the
-   ACPI namespace representation of the given object and 'instance' is used
-   for distinguishing different object of the same 'bus_id' (it is
-   two-digit decimal representation of an unsigned integer).
+The Linux kernel's core ACPI subsystem creates struct acpi_device
+objects for ACPI namespace objects representing devices, power resources
+processors, thermal zones.  Those objects are exported to user space via
+sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00.  The
+format of their names is <bus_id:instance>, where 'bus_id' refers to the
+ACPI namespace representation of the given object and 'instance' is used
+for distinguishing different object of the same 'bus_id' (it is
+two-digit decimal representation of an unsigned integer).
 
-   The value of 'bus_id' depends on the type of the object whose name it is
-   part of as listed in the table below.
+The value of 'bus_id' depends on the type of the object whose name it is
+part of as listed in the table below::
 
                 +---+-----------------+-------+----------+
                 |   | Object/Feature  | Table | bus_id   |
@@ -226,10 +232,11 @@ Wysocki <rafael.j.wysocki@intel.com>.
 
                  Table 1. ACPI Namespace Objects Mapping
 
-   The following rules apply when creating struct acpi_device objects on
-   the basis of the contents of ACPI System Description Tables (as
-   indicated by the letter in the first column and the notation in the
-   second column of the table above):
+The following rules apply when creating struct acpi_device objects on
+the basis of the contents of ACPI System Description Tables (as
+indicated by the letter in the first column and the notation in the
+second column of the table above):
+
    N:
       The object's source is an ACPI namespace node (as indicated by the
       named object's type in the second column).  In that case the object's
@@ -249,13 +256,14 @@ Wysocki <rafael.j.wysocki@intel.com>.
       struct acpi_device object with LNXVIDEO 'bus_id' will be created for
       it.
 
-   The third column of the above table indicates which ACPI System
-   Description Tables contain information used for the creation of the
-   struct acpi_device objects represented by the given row (xSDT means DSDT
-   or SSDT).
+The third column of the above table indicates which ACPI System
+Description Tables contain information used for the creation of the
+struct acpi_device objects represented by the given row (xSDT means DSDT
+or SSDT).
+
+The forth column of the above table indicates the 'bus_id' generation
+rule of the struct acpi_device object:
 
-   The forth column of the above table indicates the 'bus_id' generation
-   rule of the struct acpi_device object:
    _HID:
       _HID in the last column of the table means that the object's bus_id
       is derived from the _HID/_CID identification objects present under
@@ -275,45 +283,47 @@ Wysocki <rafael.j.wysocki@intel.com>.
       object's bus_id.
 
 
-4. Linux ACPI Physical Device Glue
-
-   ACPI device (i.e. struct acpi_device) objects may be linked to other
-   objects in the Linux' device hierarchy that represent "physical" devices
-   (for example, devices on the PCI bus).  If that happens, it means that
-   the ACPI device object is a "companion" of a device otherwise
-   represented in a different way and is used (1) to provide configuration
-   information on that device which cannot be obtained by other means and
-   (2) to do specific things to the device with the help of its ACPI
-   control methods.  One ACPI device object may be linked this way to
-   multiple "physical" devices.
-
-   If an ACPI device object is linked to a "physical" device, its sysfs
-   directory contains the "physical_node" symbolic link to the sysfs
-   directory of the target device object.  In turn, the target device's
-   sysfs directory will then contain the "firmware_node" symbolic link to
-   the sysfs directory of the companion ACPI device object.
-   The linking mechanism relies on device identification provided by the
-   ACPI namespace.  For example, if there's an ACPI namespace object
-   representing a PCI device (i.e. a device object under an ACPI namespace
-   object representing a PCI bridge) whose _ADR returns 0x00020000 and the
-   bus number of the parent PCI bridge is 0, the sysfs directory
-   representing the struct acpi_device object created for that ACPI
-   namespace object will contain the 'physical_node' symbolic link to the
-   /sys/devices/pci0000:00/0000:00:02:0/ sysfs directory of the
-   corresponding PCI device.
-
-   The linking mechanism is generally bus-specific.  The core of its
-   implementation is located in the drivers/acpi/glue.c file, but there are
-   complementary parts depending on the bus types in question located
-   elsewhere.  For example, the PCI-specific part of it is located in
-   drivers/pci/pci-acpi.c.
-
-
-5. Example Linux ACPI Device Tree
-
-   The sysfs hierarchy of struct acpi_device objects corresponding to the
-   example ACPI namespace illustrated in Figure 2 with the addition of
-   fixed PWR_BUTTON/SLP_BUTTON devices is shown below.
+Linux ACPI Physical Device Glue
+===============================
+
+ACPI device (i.e. struct acpi_device) objects may be linked to other
+objects in the Linux' device hierarchy that represent "physical" devices
+(for example, devices on the PCI bus).  If that happens, it means that
+the ACPI device object is a "companion" of a device otherwise
+represented in a different way and is used (1) to provide configuration
+information on that device which cannot be obtained by other means and
+(2) to do specific things to the device with the help of its ACPI
+control methods.  One ACPI device object may be linked this way to
+multiple "physical" devices.
+
+If an ACPI device object is linked to a "physical" device, its sysfs
+directory contains the "physical_node" symbolic link to the sysfs
+directory of the target device object.  In turn, the target device's
+sysfs directory will then contain the "firmware_node" symbolic link to
+the sysfs directory of the companion ACPI device object.
+The linking mechanism relies on device identification provided by the
+ACPI namespace.  For example, if there's an ACPI namespace object
+representing a PCI device (i.e. a device object under an ACPI namespace
+object representing a PCI bridge) whose _ADR returns 0x00020000 and the
+bus number of the parent PCI bridge is 0, the sysfs directory
+representing the struct acpi_device object created for that ACPI
+namespace object will contain the 'physical_node' symbolic link to the
+/sys/devices/pci0000:00/0000:00:02:0/ sysfs directory of the
+corresponding PCI device.
+
+The linking mechanism is generally bus-specific.  The core of its
+implementation is located in the drivers/acpi/glue.c file, but there are
+complementary parts depending on the bus types in question located
+elsewhere.  For example, the PCI-specific part of it is located in
+drivers/pci/pci-acpi.c.
+
+
+Example Linux ACPI Device Tree
+=================================
+
+The sysfs hierarchy of struct acpi_device objects corresponding to the
+example ACPI namespace illustrated in Figure 2 with the addition of
+fixed PWR_BUTTON/SLP_BUTTON devices is shown below::
 
    +--------------+---+-----------------+
    | LNXSYSTEM:00 | \ | acpi:LNXSYSTEM: |
@@ -377,12 +387,14 @@ Wysocki <rafael.j.wysocki@intel.com>.
 
                   Figure 3. Example Linux ACPI Device Tree
 
-   NOTE: Each node is represented as "object/path/modalias", where:
-         1. 'object' is the name of the object's directory in sysfs.
-         2. 'path' is the ACPI namespace path of the corresponding
-            ACPI namespace object, as returned by the object's 'path'
-            sysfs attribute.
-         3. 'modalias' is the value of the object's 'modalias' sysfs
-            attribute (as described earlier in this document).
-   NOTE: N/A indicates the device object does not have the 'path' or the
-         'modalias' attribute.
+.. note:: Each node is represented as "object/path/modalias", where:
+
+   1. 'object' is the name of the object's directory in sysfs.
+   2. 'path' is the ACPI namespace path of the corresponding
+      ACPI namespace object, as returned by the object's 'path'
+      sysfs attribute.
+   3. 'modalias' is the value of the object's 'modalias' sysfs
+      attribute (as described earlier in this document).
+
+.. note:: N/A indicates the device object does not have the 'path' or the
+   'modalias' attribute.
-- 
GitLab


From c24bc66e8157ca4956b8be1ed62493d70dfdb547 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:45 +0800
Subject: [PATCH 1537/1861] Documentation: ACPI: move enumeration.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/enumeration.rst}                     | 135 ++++++++++--------
 Documentation/firmware-guide/acpi/index.rst   |   1 +
 2 files changed, 74 insertions(+), 62 deletions(-)
 rename Documentation/{acpi/enumeration.txt => firmware-guide/acpi/enumeration.rst} (88%)

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/firmware-guide/acpi/enumeration.rst
similarity index 88%
rename from Documentation/acpi/enumeration.txt
rename to Documentation/firmware-guide/acpi/enumeration.rst
index 1395b844649cc..6b32b7be8c85f 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -1,5 +1,9 @@
-ACPI based device enumeration
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
+ACPI Based Device Enumeration
+=============================
+
 ACPI 5 introduced a set of new resources (UartTSerialBus, I2cSerialBus,
 SpiSerialBus, GpioIo and GpioInt) which can be used in enumerating slave
 devices behind serial bus controllers.
@@ -11,12 +15,12 @@ that are accessed through memory-mapped registers.
 In order to support this and re-use the existing drivers as much as
 possible we decided to do following:
 
-	o Devices that have no bus connector resource are represented as
-	  platform devices.
+  - Devices that have no bus connector resource are represented as
+    platform devices.
 
-	o Devices behind real busses where there is a connector resource
-	  are represented as struct spi_device or struct i2c_device
-	  (standard UARTs are not busses so there is no struct uart_device).
+  - Devices behind real busses where there is a connector resource
+    are represented as struct spi_device or struct i2c_device
+    (standard UARTs are not busses so there is no struct uart_device).
 
 As both ACPI and Device Tree represent a tree of devices (and their
 resources) this implementation follows the Device Tree way as much as
@@ -31,7 +35,8 @@ enumerated from ACPI namespace. This handle can be used to extract other
 device-specific configuration. There is an example of this below.
 
 Platform bus support
-~~~~~~~~~~~~~~~~~~~~
+====================
+
 Since we are using platform devices to represent devices that are not
 connected to any physical bus we only need to implement a platform driver
 for the device and add supported ACPI IDs. If this same IP-block is used on
@@ -39,7 +44,7 @@ some other non-ACPI platform, the driver might work out of the box or needs
 some minor changes.
 
 Adding ACPI support for an existing driver should be pretty
-straightforward. Here is the simplest example:
+straightforward. Here is the simplest example::
 
 	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id mydrv_acpi_match[] = {
@@ -61,12 +66,13 @@ configuring GPIOs it can get its ACPI handle and extract this information
 from ACPI tables.
 
 DMA support
-~~~~~~~~~~~
+===========
+
 DMA controllers enumerated via ACPI should be registered in the system to
 provide generic access to their resources. For example, a driver that would
 like to be accessible to slave devices via generic API call
 dma_request_slave_channel() must register itself at the end of the probe
-function like this:
+function like this::
 
 	err = devm_acpi_dma_controller_register(dev, xlate_func, dw);
 	/* Handle the error if it's not a case of !CONFIG_ACPI */
@@ -74,7 +80,7 @@ function like this:
 and implement custom xlate function if needed (usually acpi_dma_simple_xlate()
 is enough) which converts the FixedDMA resource provided by struct
 acpi_dma_spec into the corresponding DMA channel. A piece of code for that case
-could look like:
+could look like::
 
 	#ifdef CONFIG_ACPI
 	struct filter_args {
@@ -114,7 +120,7 @@ provided by struct acpi_dma.
 Clients must call dma_request_slave_channel() with the string parameter that
 corresponds to a specific FixedDMA resource. By default "tx" means the first
 entry of the FixedDMA resource array, "rx" means the second entry. The table
-below shows a layout:
+below shows a layout::
 
 	Device (I2C0)
 	{
@@ -138,12 +144,13 @@ acpi_dma_request_slave_chan_by_index() directly and therefore choose the
 specific FixedDMA resource by its index.
 
 SPI serial bus support
-~~~~~~~~~~~~~~~~~~~~~~
+======================
+
 Slave devices behind SPI bus have SpiSerialBus resource attached to them.
 This is extracted automatically by the SPI core and the slave devices are
 enumerated once spi_register_master() is called by the bus driver.
 
-Here is what the ACPI namespace for a SPI slave might look like:
+Here is what the ACPI namespace for a SPI slave might look like::
 
 	Device (EEP0)
 	{
@@ -163,7 +170,7 @@ Here is what the ACPI namespace for a SPI slave might look like:
 
 The SPI device drivers only need to add ACPI IDs in a similar way than with
 the platform device drivers. Below is an example where we add ACPI support
-to at25 SPI eeprom driver (this is meant for the above ACPI snippet):
+to at25 SPI eeprom driver (this is meant for the above ACPI snippet)::
 
 	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id at25_acpi_match[] = {
@@ -182,7 +189,7 @@ to at25 SPI eeprom driver (this is meant for the above ACPI snippet):
 
 Note that this driver actually needs more information like page size of the
 eeprom etc. but at the time writing this there is no standard way of
-passing those. One idea is to return this in _DSM method like:
+passing those. One idea is to return this in _DSM method like::
 
 	Device (EEP0)
 	{
@@ -202,7 +209,7 @@ passing those. One idea is to return this in _DSM method like:
 		}
 
 Then the at25 SPI driver can get this configuration by calling _DSM on its
-ACPI handle like:
+ACPI handle like::
 
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_object_list input;
@@ -220,14 +227,15 @@ ACPI handle like:
 	kfree(output.pointer);
 
 I2C serial bus support
-~~~~~~~~~~~~~~~~~~~~~~
+======================
+
 The slaves behind I2C bus controller only need to add the ACPI IDs like
 with the platform and SPI drivers. The I2C core automatically enumerates
 any slave devices behind the controller device once the adapter is
 registered.
 
 Below is an example of how to add ACPI support to the existing mpu3050
-input driver:
+input driver::
 
 	#ifdef CONFIG_ACPI
 	static const struct acpi_device_id mpu3050_acpi_match[] = {
@@ -251,56 +259,57 @@ input driver:
 	};
 
 GPIO support
-~~~~~~~~~~~~
+============
+
 ACPI 5 introduced two new resources to describe GPIO connections: GpioIo
 and GpioInt. These resources can be used to pass GPIO numbers used by
 the device to the driver. ACPI 5.1 extended this with _DSD (Device
 Specific Data) which made it possible to name the GPIOs among other things.
 
-For example:
+For example::
 
-Device (DEV)
-{
-	Method (_CRS, 0, NotSerialized)
+	Device (DEV)
 	{
-		Name (SBUF, ResourceTemplate()
+		Method (_CRS, 0, NotSerialized)
 		{
-			...
-			// Used to power on/off the device
-			GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
-				IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
-				0x00, ResourceConsumer,,)
+			Name (SBUF, ResourceTemplate()
 			{
-				// Pin List
-				0x0055
-			}
+				...
+				// Used to power on/off the device
+				GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
+					IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
+					0x00, ResourceConsumer,,)
+				{
+					// Pin List
+					0x0055
+				}
+
+				// Interrupt for the device
+				GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
+					0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
+				{
+					// Pin list
+					0x0058
+				}
+
+				...
 
-			// Interrupt for the device
-			GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
-				 0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
-			{
-				// Pin list
-				0x0058
 			}
 
-			...
-
+			Return (SBUF)
 		}
 
-		Return (SBUF)
-	}
-
-	// ACPI 5.1 _DSD used for naming the GPIOs
-	Name (_DSD, Package ()
-	{
-		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
-		Package ()
+		// ACPI 5.1 _DSD used for naming the GPIOs
+		Name (_DSD, Package ()
 		{
-			Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
-			Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
-		}
-	})
-	...
+			ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+			Package ()
+			{
+				Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
+				Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
+			}
+		})
+		...
 
 These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0"
 specifies the path to the controller. In order to use these GPIOs in Linux
@@ -310,7 +319,7 @@ There is a standard GPIO API for that and is documented in
 Documentation/gpio/.
 
 In the above example we can get the corresponding two GPIO descriptors with
-a code like this:
+a code like this::
 
 	#include <linux/gpio/consumer.h>
 	...
@@ -334,21 +343,22 @@ See Documentation/acpi/gpio-properties.txt for more information about the
 _DSD binding related to GPIOs.
 
 MFD devices
-~~~~~~~~~~~
+===========
+
 The MFD devices register their children as platform devices. For the child
 devices there needs to be an ACPI handle that they can use to reference
 parts of the ACPI namespace that relate to them. In the Linux MFD subsystem
 we provide two ways:
 
-	o The children share the parent ACPI handle.
-	o The MFD cell can specify the ACPI id of the device.
+  - The children share the parent ACPI handle.
+  - The MFD cell can specify the ACPI id of the device.
 
 For the first case, the MFD drivers do not need to do anything. The
 resulting child platform device will have its ACPI_COMPANION() set to point
 to the parent device.
 
 If the ACPI namespace has a device that we can match using an ACPI id or ACPI
-adr, the cell should be set like:
+adr, the cell should be set like::
 
 	static struct mfd_cell_acpi_match my_subdevice_cell_acpi_match = {
 		.pnpid = "XYZ0001",
@@ -366,7 +376,8 @@ the MFD device and if found, that ACPI companion device is bound to the
 resulting child platform device.
 
 Device Tree namespace link device ID
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+====================================
+
 The Device Tree protocol uses device identification based on the "compatible"
 property whose value is a string or an array of strings recognized as device
 identifiers by drivers and the driver core.  The set of all those strings may be
@@ -449,4 +460,4 @@ the _DSD of the device object itself or the _DSD of its ancestor in the
 Otherwise, the _DSD itself is regarded as invalid and therefore the "compatible"
 property returned by it is meaningless.
 
-Refer to DSD-properties-rules.txt for more information.
+Refer to :doc:`DSD-properties-rules` for more information.
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 210ad8acd6df5..99677c73f1fb8 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -8,3 +8,4 @@ ACPI Support
    :maxdepth: 1
 
    namespace
+   enumeration
-- 
GitLab


From 1cf70ae6f07b071affcd4e324803e928e3336a8d Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:46 +0800
Subject: [PATCH 1538/1861] Documentation: ACPI: move osi.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/index.rst       |  1 +
 .../{acpi/osi.txt => firmware-guide/acpi/osi.rst} | 15 +++++++++------
 2 files changed, 10 insertions(+), 6 deletions(-)
 rename Documentation/{acpi/osi.txt => firmware-guide/acpi/osi.rst} (97%)

diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 99677c73f1fb8..868bd25a3398c 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -9,3 +9,4 @@ ACPI Support
 
    namespace
    enumeration
+   osi
diff --git a/Documentation/acpi/osi.txt b/Documentation/firmware-guide/acpi/osi.rst
similarity index 97%
rename from Documentation/acpi/osi.txt
rename to Documentation/firmware-guide/acpi/osi.rst
index 50cde0ceb9b00..29e9ef79ebc0f 100644
--- a/Documentation/acpi/osi.txt
+++ b/Documentation/firmware-guide/acpi/osi.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
 ACPI _OSI and _REV methods
---------------------------
+==========================
 
 An ACPI BIOS can use the "Operating System Interfaces" method (_OSI)
 to find out what the operating system supports. Eg. If BIOS
@@ -14,7 +17,7 @@ This document explains how and why the BIOS and Linux should use these methods.
 It also explains how and why they are widely misused.
 
 How to use _OSI
----------------
+===============
 
 Linux runs on two groups of machines -- those that are tested by the OEM
 to be compatible with Linux, and those that were never tested with Linux,
@@ -62,7 +65,7 @@ the string when that support is added to the kernel.
 That was easy.  Read on, to find out how to do it wrong.
 
 Before _OSI, there was _OS
---------------------------
+==========================
 
 ACPI 1.0 specified "_OS" as an
 "object that evaluates to a string that identifies the operating system."
@@ -96,7 +99,7 @@ That is the *only* viable strategy, as that is what modern Windows does,
 and so doing otherwise could steer the BIOS down an untested path.
 
 _OSI is born, and immediately misused
---------------------------------------
+=====================================
 
 With _OSI, the *BIOS* provides the string describing an interface,
 and asks the OS: "YES/NO, are you compatible with this interface?"
@@ -144,7 +147,7 @@ catastrophic failure resulting from the BIOS taking paths that
 were never validated under *any* OS.
 
 Do not use _REV
----------------
+===============
 
 Since _OSI("Linux") went away, some BIOS writers used _REV
 to support Linux and Windows differences in the same BIOS.
@@ -164,7 +167,7 @@ from mid-2015 onward.  The ACPI specification will also be updated
 to reflect that _REV is deprecated, and always returns 2.
 
 Apple Mac and _OSI("Darwin")
-----------------------------
+============================
 
 On Apple's Mac platforms, the ACPI BIOS invokes _OSI("Darwin")
 to determine if the machine is running Apple OSX.
-- 
GitLab


From 25710e23cdee4d4cfc140d34dd627b76be62c9c1 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:47 +0800
Subject: [PATCH 1539/1861] Documentation: ACPI: move linuxized-acpica.txt to
 driver-api/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/acpi/index.rst       |   1 +
 .../acpi/linuxized-acpica.rst}                | 109 ++++++++++--------
 2 files changed, 64 insertions(+), 46 deletions(-)
 rename Documentation/{acpi/linuxized-acpica.txt => driver-api/acpi/linuxized-acpica.rst} (80%)

diff --git a/Documentation/driver-api/acpi/index.rst b/Documentation/driver-api/acpi/index.rst
index 898b0c60671ab..12649947b19b2 100644
--- a/Documentation/driver-api/acpi/index.rst
+++ b/Documentation/driver-api/acpi/index.rst
@@ -5,3 +5,4 @@ ACPI Support
 .. toctree::
    :maxdepth: 2
 
+   linuxized-acpica
diff --git a/Documentation/acpi/linuxized-acpica.txt b/Documentation/driver-api/acpi/linuxized-acpica.rst
similarity index 80%
rename from Documentation/acpi/linuxized-acpica.txt
rename to Documentation/driver-api/acpi/linuxized-acpica.rst
index 3ad7b0dfb0833..0ca8f15385190 100644
--- a/Documentation/acpi/linuxized-acpica.txt
+++ b/Documentation/driver-api/acpi/linuxized-acpica.rst
@@ -1,31 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+============================================================
 Linuxized ACPICA - Introduction to ACPICA Release Automation
+============================================================
 
-Copyright (C) 2013-2016, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
+:Copyright: |copy| 2013-2016, Intel Corporation
 
+:Author: Lv Zheng <lv.zheng@intel.com>
 
-Abstract:
 
+Abstract
+========
 This document describes the ACPICA project and the relationship between
 ACPICA and Linux.  It also describes how ACPICA code in drivers/acpi/acpica,
 include/acpi and tools/power/acpi is automatically updated to follow the
 upstream.
 
+ACPICA Project
+==============
 
-1. ACPICA Project
-
-   The ACPI Component Architecture (ACPICA) project provides an operating
-   system (OS)-independent reference implementation of the Advanced
-   Configuration and Power Interface Specification (ACPI).  It has been
-   adapted by various host OSes.  By directly integrating ACPICA, Linux can
-   also benefit from the application experiences of ACPICA from other host
-   OSes.
+The ACPI Component Architecture (ACPICA) project provides an operating
+system (OS)-independent reference implementation of the Advanced
+Configuration and Power Interface Specification (ACPI).  It has been
+adapted by various host OSes.  By directly integrating ACPICA, Linux can
+also benefit from the application experiences of ACPICA from other host
+OSes.
 
-   The homepage of ACPICA project is: www.acpica.org, it is maintained and
-   supported by Intel Corporation.
+The homepage of ACPICA project is: www.acpica.org, it is maintained and
+supported by Intel Corporation.
 
-   The following figure depicts the Linux ACPI subsystem where the ACPICA
-   adaptation is included:
+The following figure depicts the Linux ACPI subsystem where the ACPICA
+adaptation is included::
 
       +---------------------------------------------------------+
       |                                                         |
@@ -71,21 +77,27 @@ upstream.
 
                  Figure 1. Linux ACPI Software Components
 
-   NOTE:
+.. note::
     A. OS Service Layer - Provided by Linux to offer OS dependent
        implementation of the predefined ACPICA interfaces (acpi_os_*).
+       ::
+
          include/acpi/acpiosxf.h
          drivers/acpi/osl.c
          include/acpi/platform
          include/asm/acenv.h
     B. ACPICA Functionality - Released from ACPICA code base to offer
        OS independent implementation of the ACPICA interfaces (acpi_*).
+       ::
+
          drivers/acpi/acpica
          include/acpi/ac*.h
          tools/power/acpi
     C. Linux/ACPI Functionality - Providing Linux specific ACPI
        functionality to the other Linux kernel subsystems and user space
        programs.
+       ::
+
          drivers/acpi
          include/linux/acpi.h
          include/linux/acpi*.h
@@ -95,24 +107,27 @@ upstream.
        ACPI subsystem to offer architecture specific implementation of the
        ACPI interfaces.  They are Linux specific components and are out of
        the scope of this document.
+       ::
+
          include/asm/acpi.h
          include/asm/acpi*.h
          arch/*/acpi
 
-2. ACPICA Release
+ACPICA Release
+==============
 
-   The ACPICA project maintains its code base at the following repository URL:
-   https://github.com/acpica/acpica.git. As a rule, a release is made every
-   month.
+The ACPICA project maintains its code base at the following repository URL:
+https://github.com/acpica/acpica.git. As a rule, a release is made every
+month.
 
-   As the coding style adopted by the ACPICA project is not acceptable by
-   Linux, there is a release process to convert the ACPICA git commits into
-   Linux patches.  The patches generated by this process are referred to as
-   "linuxized ACPICA patches".  The release process is carried out on a local
-   copy the ACPICA git repository.  Each commit in the monthly release is
-   converted into a linuxized ACPICA patch.  Together, they form the monthly
-   ACPICA release patchset for the Linux ACPI community.  This process is
-   illustrated in the following figure:
+As the coding style adopted by the ACPICA project is not acceptable by
+Linux, there is a release process to convert the ACPICA git commits into
+Linux patches.  The patches generated by this process are referred to as
+"linuxized ACPICA patches".  The release process is carried out on a local
+copy the ACPICA git repository.  Each commit in the monthly release is
+converted into a linuxized ACPICA patch.  Together, they form the monthly
+ACPICA release patchset for the Linux ACPI community.  This process is
+illustrated in the following figure::
 
     +-----------------------------+
     | acpica / master (-) commits |
@@ -153,7 +168,7 @@ upstream.
 
                 Figure 2. ACPICA -> Linux Upstream Process
 
-   NOTE:
+.. note::
     A. Linuxize Utilities - Provided by the ACPICA repository, including a
        utility located in source/tools/acpisrc folder and a number of
        scripts located in generate/linux folder.
@@ -170,19 +185,20 @@ upstream.
    following kernel configuration options:
    CONFIG_ACPI/CONFIG_ACPI_DEBUG/CONFIG_ACPI_DEBUGGER
 
-3. ACPICA Divergences
+ACPICA Divergences
+==================
 
-   Ideally, all of the ACPICA commits should be converted into Linux patches
-   automatically without manual modifications, the "linux / master" tree should
-   contain the ACPICA code that exactly corresponds to the ACPICA code
-   contained in "new linuxized acpica" tree and it should be possible to run
-   the release process fully automatically.
+Ideally, all of the ACPICA commits should be converted into Linux patches
+automatically without manual modifications, the "linux / master" tree should
+contain the ACPICA code that exactly corresponds to the ACPICA code
+contained in "new linuxized acpica" tree and it should be possible to run
+the release process fully automatically.
 
-   As a matter of fact, however, there are source code differences between
-   the ACPICA code in Linux and the upstream ACPICA code, referred to as
-   "ACPICA Divergences".
+As a matter of fact, however, there are source code differences between
+the ACPICA code in Linux and the upstream ACPICA code, referred to as
+"ACPICA Divergences".
 
-   The various sources of ACPICA divergences include:
+The various sources of ACPICA divergences include:
    1. Legacy divergences - Before the current ACPICA release process was
       established, there already had been divergences between Linux and
       ACPICA. Over the past several years those divergences have been greatly
@@ -213,11 +229,12 @@ upstream.
       rebased on the ACPICA side in order to offer better solutions, new ACPICA
       divergences are generated.
 
-4. ACPICA Development
+ACPICA Development
+==================
 
-   This paragraph guides Linux developers to use the ACPICA upstream release
-   utilities to obtain Linux patches corresponding to upstream ACPICA commits
-   before they become available from the ACPICA release process.
+This paragraph guides Linux developers to use the ACPICA upstream release
+utilities to obtain Linux patches corresponding to upstream ACPICA commits
+before they become available from the ACPICA release process.
 
    1. Cherry-pick an ACPICA commit
 
@@ -225,7 +242,7 @@ upstream.
    you want to cherry pick must be committed into the local repository.
 
    Then the gen-patch.sh command can help to cherry-pick an ACPICA commit
-   from the ACPICA local repository:
+   from the ACPICA local repository::
 
    $ git clone https://github.com/acpica/acpica
    $ cd acpica
@@ -240,7 +257,7 @@ upstream.
    changes that haven't been applied to Linux yet.
 
    You can generate the ACPICA release series yourself and rebase your code on
-   top of the generated ACPICA release patches:
+   top of the generated ACPICA release patches::
 
    $ git clone https://github.com/acpica/acpica
    $ cd acpica
@@ -254,7 +271,7 @@ upstream.
    3. Inspect the current divergences
 
    If you have local copies of both Linux and upstream ACPICA, you can generate
-   a diff file indicating the state of the current divergences:
+   a diff file indicating the state of the current divergences::
 
    # git clone https://github.com/acpica/acpica
    # git clone http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
-- 
GitLab


From 97a63dd43477a93fa0fc53ff082af8d64ff618e1 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:48 +0800
Subject: [PATCH 1540/1861] Documentation: ACPI: move scan_handlers.txt to
 driver-api/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/acpi/index.rst       |  1 +
 .../acpi/scan_handlers.rst}                   | 24 ++++++++++++-------
 2 files changed, 16 insertions(+), 9 deletions(-)
 rename Documentation/{acpi/scan_handlers.txt => driver-api/acpi/scan_handlers.rst} (90%)

diff --git a/Documentation/driver-api/acpi/index.rst b/Documentation/driver-api/acpi/index.rst
index 12649947b19b2..ace0008e54c2f 100644
--- a/Documentation/driver-api/acpi/index.rst
+++ b/Documentation/driver-api/acpi/index.rst
@@ -6,3 +6,4 @@ ACPI Support
    :maxdepth: 2
 
    linuxized-acpica
+   scan_handlers
diff --git a/Documentation/acpi/scan_handlers.txt b/Documentation/driver-api/acpi/scan_handlers.rst
similarity index 90%
rename from Documentation/acpi/scan_handlers.txt
rename to Documentation/driver-api/acpi/scan_handlers.rst
index 3246ccf159925..7a197b3a33fcb 100644
--- a/Documentation/acpi/scan_handlers.txt
+++ b/Documentation/driver-api/acpi/scan_handlers.rst
@@ -1,7 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==================
 ACPI Scan Handlers
+==================
+
+:Copyright: |copy| 2012, Intel Corporation
 
-Copyright (C) 2012, Intel Corporation
-Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
 During system initialization and ACPI-based device hot-add, the ACPI namespace
 is scanned in search of device objects that generally represent various pieces
@@ -30,14 +36,14 @@ to configure that link so that the kernel can use it.
 Those additional configuration tasks usually depend on the type of the hardware
 component represented by the given device node which can be determined on the
 basis of the device node's hardware ID (HID).  They are performed by objects
-called ACPI scan handlers represented by the following structure:
+called ACPI scan handlers represented by the following structure::
 
-struct acpi_scan_handler {
-	const struct acpi_device_id *ids;
-	struct list_head list_node;
-	int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id);
-	void (*detach)(struct acpi_device *dev);
-};
+	struct acpi_scan_handler {
+		const struct acpi_device_id *ids;
+		struct list_head list_node;
+		int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id);
+		void (*detach)(struct acpi_device *dev);
+	};
 
 where ids is the list of IDs of device nodes the given handler is supposed to
 take care of, list_node is the hook to the global list of ACPI scan handlers
-- 
GitLab


From 538f6f76b9ca5cbcd521db80e137d1c43e55556b Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:49 +0800
Subject: [PATCH 1541/1861] Documentation: ACPI: move DSD-properties-rules.txt
 to firmware-guide/acpi and covert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/DSD-properties-rules.rst}            | 21 +++++++++++--------
 Documentation/firmware-guide/acpi/index.rst   |  1 +
 2 files changed, 13 insertions(+), 9 deletions(-)
 rename Documentation/{acpi/DSD-properties-rules.txt => firmware-guide/acpi/DSD-properties-rules.rst} (88%)

diff --git a/Documentation/acpi/DSD-properties-rules.txt b/Documentation/firmware-guide/acpi/DSD-properties-rules.rst
similarity index 88%
rename from Documentation/acpi/DSD-properties-rules.txt
rename to Documentation/firmware-guide/acpi/DSD-properties-rules.rst
index 3e4862bdad98c..4306f29b6103b 100644
--- a/Documentation/acpi/DSD-properties-rules.txt
+++ b/Documentation/firmware-guide/acpi/DSD-properties-rules.rst
@@ -1,8 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
 _DSD Device Properties Usage Rules
-----------------------------------
+==================================
 
 Properties, Property Sets and Property Subsets
-----------------------------------------------
+==============================================
 
 The _DSD (Device Specific Data) configuration object, introduced in ACPI 5.1,
 allows any type of device configuration data to be provided via the ACPI
@@ -18,7 +21,7 @@ specific type) associated with it.
 
 In the ACPI _DSD context it is an element of the sub-package following the
 generic Device Properties UUID in the _DSD return package as specified in the
-Device Properties UUID definition document [1].
+Device Properties UUID definition document [1]_.
 
 It also may be regarded as the definition of a key and the associated data type
 that can be returned by _DSD in the Device Properties UUID sub-package for a
@@ -33,14 +36,14 @@ Property subsets are nested collections of properties.  Each of them is
 associated with an additional key (name) allowing the subset to be referred
 to as a whole (and to be treated as a separate entity).  The canonical
 representation of property subsets is via the mechanism specified in the
-Hierarchical Properties Extension UUID definition document [2].
+Hierarchical Properties Extension UUID definition document [2]_.
 
 Property sets may be hierarchical.  That is, a property set may contain
 multiple property subsets that each may contain property subsets of its
 own and so on.
 
 General Validity Rule for Property Sets
----------------------------------------
+=======================================
 
 Valid property sets must follow the guidance given by the Device Properties UUID
 definition document [1].
@@ -73,7 +76,7 @@ suitable for the ACPI environment and consequently they cannot belong to a valid
 property set.
 
 Property Sets and Device Tree Bindings
---------------------------------------
+======================================
 
 It often is useful to make _DSD return property sets that follow Device Tree
 bindings.
@@ -91,7 +94,7 @@ expected to automatically work in the ACPI environment regardless of their
 contents.
 
 References
-----------
+==========
 
-[1] http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
-[2] http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf
+.. [1] http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
+.. [2] http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 868bd25a3398c..0e05b843521c5 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -10,3 +10,4 @@ ACPI Support
    namespace
    enumeration
    osi
+   DSD-properties-rules
-- 
GitLab


From b6dff0e153e919b62e2a1c90fd6e5a4ba922f99b Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:50 +0800
Subject: [PATCH 1542/1861] Documentation: ACPI: move gpio-properties.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/gpio-properties.rst}                 | 78 +++++++++++--------
 Documentation/firmware-guide/acpi/index.rst   |  1 +
 MAINTAINERS                                   |  2 +-
 3 files changed, 46 insertions(+), 35 deletions(-)
 rename Documentation/{acpi/gpio-properties.txt => firmware-guide/acpi/gpio-properties.rst} (81%)

diff --git a/Documentation/acpi/gpio-properties.txt b/Documentation/firmware-guide/acpi/gpio-properties.rst
similarity index 81%
rename from Documentation/acpi/gpio-properties.txt
rename to Documentation/firmware-guide/acpi/gpio-properties.rst
index 88c65cb5bf0a7..bb6d74f23ee08 100644
--- a/Documentation/acpi/gpio-properties.txt
+++ b/Documentation/firmware-guide/acpi/gpio-properties.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
 _DSD Device Properties Related to GPIO
---------------------------------------
+======================================
 
 With the release of ACPI 5.1, the _DSD configuration object finally
 allows names to be given to GPIOs (and other things as well) returned
@@ -8,7 +11,7 @@ the corresponding GPIO, which is pretty error prone (it depends on
 the _CRS output ordering, for example).
 
 With _DSD we can now query GPIOs using a name instead of an integer
-index, like the ASL example below shows:
+index, like the ASL example below shows::
 
   // Bluetooth device with reset and shutdown GPIOs
   Device (BTH)
@@ -34,15 +37,19 @@ index, like the ASL example below shows:
       })
   }
 
-The format of the supported GPIO property is:
+The format of the supported GPIO property is::
 
   Package () { "name", Package () { ref, index, pin, active_low }}
 
-  ref - The device that has _CRS containing GpioIo()/GpioInt() resources,
-        typically this is the device itself (BTH in our case).
-  index - Index of the GpioIo()/GpioInt() resource in _CRS starting from zero.
-  pin - Pin in the GpioIo()/GpioInt() resource. Typically this is zero.
-  active_low - If 1 the GPIO is marked as active_low.
+ref
+  The device that has _CRS containing GpioIo()/GpioInt() resources,
+  typically this is the device itself (BTH in our case).
+index
+  Index of the GpioIo()/GpioInt() resource in _CRS starting from zero.
+pin
+  Pin in the GpioIo()/GpioInt() resource. Typically this is zero.
+active_low
+  If 1 the GPIO is marked as active_low.
 
 Since ACPI GpioIo() resource does not have a field saying whether it is
 active low or high, the "active_low" argument can be used here.  Setting
@@ -55,7 +62,7 @@ It is possible to leave holes in the array of GPIOs. This is useful in
 cases like with SPI host controllers where some chip selects may be
 implemented as GPIOs and some as native signals. For example a SPI host
 controller can have chip selects 0 and 2 implemented as GPIOs and 1 as
-native:
+native::
 
   Package () {
       "cs-gpios",
@@ -67,7 +74,7 @@ native:
   }
 
 Other supported properties
---------------------------
+==========================
 
 Following Device Tree compatible device properties are also supported by
 _DSD device properties for GPIO controllers:
@@ -78,7 +85,7 @@ _DSD device properties for GPIO controllers:
 - input
 - line-name
 
-Example:
+Example::
 
   Name (_DSD, Package () {
       // _DSD Hierarchical Properties Extension UUID
@@ -100,7 +107,7 @@ Example:
 
 - gpio-line-names
 
-Example:
+Example::
 
   Package () {
       "gpio-line-names",
@@ -114,7 +121,7 @@ See Documentation/devicetree/bindings/gpio/gpio.txt for more information
 about these properties.
 
 ACPI GPIO Mappings Provided by Drivers
---------------------------------------
+======================================
 
 There are systems in which the ACPI tables do not contain _DSD but provide _CRS
 with GpioIo()/GpioInt() resources and device drivers still need to work with
@@ -139,16 +146,16 @@ line in that resource starting from zero, and the active-low flag for that line,
 respectively, in analogy with the _DSD GPIO property format specified above.
 
 For the example Bluetooth device discussed previously the data structures in
-question would look like this:
+question would look like this::
 
-static const struct acpi_gpio_params reset_gpio = { 1, 1, false };
-static const struct acpi_gpio_params shutdown_gpio = { 0, 0, false };
+  static const struct acpi_gpio_params reset_gpio = { 1, 1, false };
+  static const struct acpi_gpio_params shutdown_gpio = { 0, 0, false };
 
-static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = {
-  { "reset-gpios", &reset_gpio, 1 },
-  { "shutdown-gpios", &shutdown_gpio, 1 },
-  { },
-};
+  static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = {
+    { "reset-gpios", &reset_gpio, 1 },
+    { "shutdown-gpios", &shutdown_gpio, 1 },
+    { },
+  };
 
 Next, the mapping table needs to be passed as the second argument to
 acpi_dev_add_driver_gpios() that will register it with the ACPI device object
@@ -158,12 +165,12 @@ calling acpi_dev_remove_driver_gpios() on the ACPI device object where that
 table was previously registered.
 
 Using the _CRS fallback
------------------------
+=======================
 
 If a device does not have _DSD or the driver does not create ACPI GPIO
 mapping, the Linux GPIO framework refuses to return any GPIOs. This is
 because the driver does not know what it actually gets. For example if we
-have a device like below:
+have a device like below::
 
   Device (BTH)
   {
@@ -177,7 +184,7 @@ have a device like below:
       })
   }
 
-The driver might expect to get the right GPIO when it does:
+The driver might expect to get the right GPIO when it does::
 
   desc = gpiod_get(dev, "reset", GPIOD_OUT_LOW);
 
@@ -193,22 +200,25 @@ the ACPI GPIO mapping tables are hardly linked to ACPI ID and certain
 objects, as listed in the above chapter, of the device in question.
 
 Getting GPIO descriptor
------------------------
+=======================
+
+There are two main approaches to get GPIO resource from ACPI::
 
-There are two main approaches to get GPIO resource from ACPI:
-	desc = gpiod_get(dev, connection_id, flags);
-	desc = gpiod_get_index(dev, connection_id, index, flags);
+  desc = gpiod_get(dev, connection_id, flags);
+  desc = gpiod_get_index(dev, connection_id, index, flags);
 
 We may consider two different cases here, i.e. when connection ID is
 provided and otherwise.
 
-Case 1:
-	desc = gpiod_get(dev, "non-null-connection-id", flags);
-	desc = gpiod_get_index(dev, "non-null-connection-id", index, flags);
+Case 1::
+
+  desc = gpiod_get(dev, "non-null-connection-id", flags);
+  desc = gpiod_get_index(dev, "non-null-connection-id", index, flags);
+
+Case 2::
 
-Case 2:
-	desc = gpiod_get(dev, NULL, flags);
-	desc = gpiod_get_index(dev, NULL, index, flags);
+  desc = gpiod_get(dev, NULL, flags);
+  desc = gpiod_get_index(dev, NULL, index, flags);
 
 Case 1 assumes that corresponding ACPI device description must have
 defined device properties and will prevent to getting any GPIO resources
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 0e05b843521c5..61d67763851b7 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -11,3 +11,4 @@ ACPI Support
    enumeration
    osi
    DSD-properties-rules
+   gpio-properties
diff --git a/MAINTAINERS b/MAINTAINERS
index 3e5a5d263f299..89c13e1ed648e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6593,7 +6593,7 @@ M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 L:	linux-gpio@vger.kernel.org
 L:	linux-acpi@vger.kernel.org
 S:	Maintained
-F:	Documentation/acpi/gpio-properties.txt
+F:	Documentation/firmware-guide/acpi/gpio-properties.rst
 F:	drivers/gpio/gpiolib-acpi.c
 
 GPIO IR Transmitter
-- 
GitLab


From eea7803278619d1a3bda6306cd8470497cd2c5e3 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:51 +0800
Subject: [PATCH 1543/1861] Documentation: ACPI: move method-customizing.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/method-customizing.txt     | 73 ---------------
 Documentation/firmware-guide/acpi/index.rst   |  3 +-
 .../acpi/method-customizing.rst               | 89 +++++++++++++++++++
 3 files changed, 91 insertions(+), 74 deletions(-)
 delete mode 100644 Documentation/acpi/method-customizing.txt
 create mode 100644 Documentation/firmware-guide/acpi/method-customizing.rst

diff --git a/Documentation/acpi/method-customizing.txt b/Documentation/acpi/method-customizing.txt
deleted file mode 100644
index 7235da975f23b..0000000000000
--- a/Documentation/acpi/method-customizing.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-Linux ACPI Custom Control Method How To
-=======================================
-
-Written by Zhang Rui <rui.zhang@intel.com>
-
-
-Linux supports customizing ACPI control methods at runtime.
-
-Users can use this to
-1. override an existing method which may not work correctly,
-   or just for debugging purposes.
-2. insert a completely new method in order to create a missing
-   method such as _OFF, _ON, _STA, _INI, etc.
-For these cases, it is far simpler to dynamically install a single
-control method rather than override the entire DSDT, because kernel
-rebuild/reboot is not needed and test result can be got in minutes.
-
-Note: Only ACPI METHOD can be overridden, any other object types like
-      "Device", "OperationRegion", are not recognized. Methods
-      declared inside scope operators are also not supported.
-Note: The same ACPI control method can be overridden for many times,
-      and it's always the latest one that used by Linux/kernel.
-Note: To get the ACPI debug object output (Store (AAAA, Debug)),
-      please run "echo 1 > /sys/module/acpi/parameters/aml_debug_output".
-
-1. override an existing method
-   a) get the ACPI table via ACPI sysfs I/F. e.g. to get the DSDT,
-      just run "cat /sys/firmware/acpi/tables/DSDT > /tmp/dsdt.dat"
-   b) disassemble the table by running "iasl -d dsdt.dat".
-   c) rewrite the ASL code of the method and save it in a new file,
-   d) package the new file (psr.asl) to an ACPI table format.
-      Here is an example of a customized \_SB._AC._PSR method,
-
-      DefinitionBlock ("", "SSDT", 1, "", "", 0x20080715)
-      {
-	Method (\_SB_.AC._PSR, 0, NotSerialized)
-	{
-		Store ("In AC _PSR", Debug)
-		Return (ACON)
-	}
-      }
-      Note that the full pathname of the method in ACPI namespace
-      should be used.
-   e) assemble the file to generate the AML code of the method.
-      e.g. "iasl -vw 6084 psr.asl" (psr.aml is generated as a result)
-      If parameter "-vw 6084" is not supported by your iASL compiler,
-      please try a newer version.
-   f) mount debugfs by "mount -t debugfs none /sys/kernel/debug"
-   g) override the old method via the debugfs by running
-      "cat /tmp/psr.aml > /sys/kernel/debug/acpi/custom_method"
-
-2. insert a new method
-   This is easier than overriding an existing method.
-   We just need to create the ASL code of the method we want to
-   insert and then follow the step c) ~ g) in section 1.
-
-3. undo your changes
-   The "undo" operation is not supported for a new inserted method
-   right now, i.e. we can not remove a method currently.
-   For an overridden method, in order to undo your changes, please
-   save a copy of the method original ASL code in step c) section 1,
-   and redo step c) ~ g) to override the method with the original one.
-
-
-Note: We can use a kernel with multiple custom ACPI method running,
-      But each individual write to debugfs can implement a SINGLE
-      method override. i.e. if we want to insert/override multiple
-      ACPI methods, we need to redo step c) ~ g) for multiple times.
-
-Note: Be aware that root can mis-use this driver to modify arbitrary
-      memory and gain additional rights, if root's privileges got
-      restricted (for example if root is not allowed to load additional
-      modules after boot).
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 61d67763851b7..d1d069b26bbc6 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -10,5 +10,6 @@ ACPI Support
    namespace
    enumeration
    osi
+   method-customizing
    DSD-properties-rules
-   gpio-properties
+   gpio-properties
\ No newline at end of file
diff --git a/Documentation/firmware-guide/acpi/method-customizing.rst b/Documentation/firmware-guide/acpi/method-customizing.rst
new file mode 100644
index 0000000000000..de3ebcaed4cf0
--- /dev/null
+++ b/Documentation/firmware-guide/acpi/method-customizing.rst
@@ -0,0 +1,89 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+Linux ACPI Custom Control Method How To
+=======================================
+
+:Author: Zhang Rui <rui.zhang@intel.com>
+
+
+Linux supports customizing ACPI control methods at runtime.
+
+Users can use this to:
+
+1. override an existing method which may not work correctly,
+   or just for debugging purposes.
+2. insert a completely new method in order to create a missing
+   method such as _OFF, _ON, _STA, _INI, etc.
+
+For these cases, it is far simpler to dynamically install a single
+control method rather than override the entire DSDT, because kernel
+rebuild/reboot is not needed and test result can be got in minutes.
+
+.. note::
+
+  - Only ACPI METHOD can be overridden, any other object types like
+    "Device", "OperationRegion", are not recognized. Methods
+    declared inside scope operators are also not supported.
+
+  - The same ACPI control method can be overridden for many times,
+    and it's always the latest one that used by Linux/kernel.
+
+  - To get the ACPI debug object output (Store (AAAA, Debug)),
+    please run::
+
+      echo 1 > /sys/module/acpi/parameters/aml_debug_output
+
+
+1. override an existing method
+==============================
+a) get the ACPI table via ACPI sysfs I/F. e.g. to get the DSDT,
+   just run "cat /sys/firmware/acpi/tables/DSDT > /tmp/dsdt.dat"
+b) disassemble the table by running "iasl -d dsdt.dat".
+c) rewrite the ASL code of the method and save it in a new file,
+d) package the new file (psr.asl) to an ACPI table format.
+   Here is an example of a customized \_SB._AC._PSR method::
+
+      DefinitionBlock ("", "SSDT", 1, "", "", 0x20080715)
+      {
+         Method (\_SB_.AC._PSR, 0, NotSerialized)
+         {
+            Store ("In AC _PSR", Debug)
+            Return (ACON)
+         }
+      }
+
+   Note that the full pathname of the method in ACPI namespace
+   should be used.
+e) assemble the file to generate the AML code of the method.
+   e.g. "iasl -vw 6084 psr.asl" (psr.aml is generated as a result)
+   If parameter "-vw 6084" is not supported by your iASL compiler,
+   please try a newer version.
+f) mount debugfs by "mount -t debugfs none /sys/kernel/debug"
+g) override the old method via the debugfs by running
+   "cat /tmp/psr.aml > /sys/kernel/debug/acpi/custom_method"
+
+2. insert a new method
+======================
+This is easier than overriding an existing method.
+We just need to create the ASL code of the method we want to
+insert and then follow the step c) ~ g) in section 1.
+
+3. undo your changes
+====================
+The "undo" operation is not supported for a new inserted method
+right now, i.e. we can not remove a method currently.
+For an overridden method, in order to undo your changes, please
+save a copy of the method original ASL code in step c) section 1,
+and redo step c) ~ g) to override the method with the original one.
+
+
+.. note:: We can use a kernel with multiple custom ACPI method running,
+   But each individual write to debugfs can implement a SINGLE
+   method override. i.e. if we want to insert/override multiple
+   ACPI methods, we need to redo step c) ~ g) for multiple times.
+
+.. note:: Be aware that root can mis-use this driver to modify arbitrary
+   memory and gain additional rights, if root's privileges got
+   restricted (for example if root is not allowed to load additional
+   modules after boot).
-- 
GitLab


From 59bcdcccf31f5a42cd23ea46154f9b3546d66009 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:52 +0800
Subject: [PATCH 1544/1861] Documentation: ACPI: move initrd_table_override.txt
 to admin-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/initrd_table_override.txt  | 111 -----------------
 Documentation/admin-guide/acpi/index.rst      |   1 +
 .../acpi/initrd_table_override.rst            | 115 ++++++++++++++++++
 3 files changed, 116 insertions(+), 111 deletions(-)
 delete mode 100644 Documentation/acpi/initrd_table_override.txt
 create mode 100644 Documentation/admin-guide/acpi/initrd_table_override.rst

diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt
deleted file mode 100644
index 30437a6db373a..0000000000000
--- a/Documentation/acpi/initrd_table_override.txt
+++ /dev/null
@@ -1,111 +0,0 @@
-Upgrading ACPI tables via initrd
-================================
-
-1) Introduction (What is this about)
-2) What is this for
-3) How does it work
-4) References (Where to retrieve userspace tools)
-
-1) What is this about
----------------------
-
-If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
-upgrade the ACPI execution environment that is defined by the ACPI tables
-via upgrading the ACPI tables provided by the BIOS with an instrumented,
-modified, more recent version one, or installing brand new ACPI tables.
-
-When building initrd with kernel in a single image, option
-ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this
-feature to work.
-
-For a full list of ACPI tables that can be upgraded/installed, take a look
-at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in
-drivers/acpi/tables.c.
-All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
-be overridable, except:
-   - ACPI_SIG_RSDP (has a signature of 6 bytes)
-   - ACPI_SIG_FACS (does not have an ordinary ACPI table header)
-Both could get implemented as well.
-
-
-2) What is this for
--------------------
-
-Complain to your platform/BIOS vendor if you find a bug which is so severe
-that a workaround is not accepted in the Linux kernel. And this facility
-allows you to upgrade the buggy tables before your platform/BIOS vendor
-releases an upgraded BIOS binary.
-
-This facility can be used by platform/BIOS vendors to provide a Linux
-compatible environment without modifying the underlying platform firmware.
-
-This facility also provides a powerful feature to easily debug and test
-ACPI BIOS table compatibility with the Linux kernel by modifying old
-platform provided ACPI tables or inserting new ACPI tables.
-
-It can and should be enabled in any kernel because there is no functional
-change with not instrumented initrds.
-
-
-3) How does it work
--------------------
-
-# Extract the machine's ACPI tables:
-cd /tmp
-acpidump >acpidump
-acpixtract -a acpidump
-# Disassemble, modify and recompile them:
-iasl -d *.dat
-# For example add this statement into a _PRT (PCI Routing Table) function
-# of the DSDT:
-Store("HELLO WORLD", debug)
-# And increase the OEM Revision. For example, before modification:
-DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
-# After modification:
-DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
-iasl -sa dsdt.dsl
-# Add the raw ACPI tables to an uncompressed cpio archive.
-# They must be put into a /kernel/firmware/acpi directory inside the cpio
-# archive. Note that if the table put here matches a platform table
-# (similar Table Signature, and similar OEMID, and similar OEM Table ID)
-# with a more recent OEM Revision, the platform table will be upgraded by
-# this table. If the table put here doesn't match a platform table
-# (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
-# ID), this table will be appended.
-mkdir -p kernel/firmware/acpi
-cp dsdt.aml kernel/firmware/acpi
-# A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
-# (see osl.c):
-iasl -sa facp.dsl
-iasl -sa ssdt1.dsl
-cp facp.aml kernel/firmware/acpi
-cp ssdt1.aml kernel/firmware/acpi
-# The uncompressed cpio archive must be the first. Other, typically
-# compressed cpio archives, must be concatenated on top of the uncompressed
-# one. Following command creates the uncompressed cpio archive and
-# concatenates the original initrd on top:
-find kernel | cpio -H newc --create > /boot/instrumented_initrd
-cat /boot/initrd >>/boot/instrumented_initrd
-# reboot with increased acpi debug level, e.g. boot params:
-acpi.debug_level=0x2 acpi.debug_layer=0xFFFFFFFF
-# and check your syslog:
-[    1.268089] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
-[    1.272091] [ACPI Debug]  String [0x0B] "HELLO WORLD"
-
-iasl is able to disassemble and recompile quite a lot different,
-also static ACPI tables.
-
-
-4) Where to retrieve userspace tools
-------------------------------------
-
-iasl and acpixtract are part of Intel's ACPICA project:
-http://acpica.org/
-and should be packaged by distributions (for example in the acpica package
-on SUSE).
-
-acpidump can be found in Len Browns pmtools:
-ftp://kernel.org/pub/linux/kernel/people/lenb/acpi/utils/pmtools/acpidump
-This tool is also part of the acpica package on SUSE.
-Alternatively, used ACPI tables can be retrieved via sysfs in latest kernels:
-/sys/firmware/acpi/tables
diff --git a/Documentation/admin-guide/acpi/index.rst b/Documentation/admin-guide/acpi/index.rst
index 3e041206089d1..09e4e81e4fb76 100644
--- a/Documentation/admin-guide/acpi/index.rst
+++ b/Documentation/admin-guide/acpi/index.rst
@@ -8,3 +8,4 @@ the Linux ACPI support.
 .. toctree::
    :maxdepth: 1
 
+   initrd_table_override
diff --git a/Documentation/admin-guide/acpi/initrd_table_override.rst b/Documentation/admin-guide/acpi/initrd_table_override.rst
new file mode 100644
index 0000000000000..cbd7682076314
--- /dev/null
+++ b/Documentation/admin-guide/acpi/initrd_table_override.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Upgrading ACPI tables via initrd
+================================
+
+What is this about
+==================
+
+If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
+upgrade the ACPI execution environment that is defined by the ACPI tables
+via upgrading the ACPI tables provided by the BIOS with an instrumented,
+modified, more recent version one, or installing brand new ACPI tables.
+
+When building initrd with kernel in a single image, option
+ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this
+feature to work.
+
+For a full list of ACPI tables that can be upgraded/installed, take a look
+at the char `*table_sigs[MAX_ACPI_SIGNATURE];` definition in
+drivers/acpi/tables.c.
+
+All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
+be overridable, except:
+
+  - ACPI_SIG_RSDP (has a signature of 6 bytes)
+  - ACPI_SIG_FACS (does not have an ordinary ACPI table header)
+
+Both could get implemented as well.
+
+
+What is this for
+================
+
+Complain to your platform/BIOS vendor if you find a bug which is so severe
+that a workaround is not accepted in the Linux kernel. And this facility
+allows you to upgrade the buggy tables before your platform/BIOS vendor
+releases an upgraded BIOS binary.
+
+This facility can be used by platform/BIOS vendors to provide a Linux
+compatible environment without modifying the underlying platform firmware.
+
+This facility also provides a powerful feature to easily debug and test
+ACPI BIOS table compatibility with the Linux kernel by modifying old
+platform provided ACPI tables or inserting new ACPI tables.
+
+It can and should be enabled in any kernel because there is no functional
+change with not instrumented initrds.
+
+
+How does it work
+================
+::
+
+  # Extract the machine's ACPI tables:
+  cd /tmp
+  acpidump >acpidump
+  acpixtract -a acpidump
+  # Disassemble, modify and recompile them:
+  iasl -d *.dat
+  # For example add this statement into a _PRT (PCI Routing Table) function
+  # of the DSDT:
+  Store("HELLO WORLD", debug)
+  # And increase the OEM Revision. For example, before modification:
+  DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
+  # After modification:
+  DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
+  iasl -sa dsdt.dsl
+  # Add the raw ACPI tables to an uncompressed cpio archive.
+  # They must be put into a /kernel/firmware/acpi directory inside the cpio
+  # archive. Note that if the table put here matches a platform table
+  # (similar Table Signature, and similar OEMID, and similar OEM Table ID)
+  # with a more recent OEM Revision, the platform table will be upgraded by
+  # this table. If the table put here doesn't match a platform table
+  # (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
+  # ID), this table will be appended.
+  mkdir -p kernel/firmware/acpi
+  cp dsdt.aml kernel/firmware/acpi
+  # A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
+  # (see osl.c):
+  iasl -sa facp.dsl
+  iasl -sa ssdt1.dsl
+  cp facp.aml kernel/firmware/acpi
+  cp ssdt1.aml kernel/firmware/acpi
+  # The uncompressed cpio archive must be the first. Other, typically
+  # compressed cpio archives, must be concatenated on top of the uncompressed
+  # one. Following command creates the uncompressed cpio archive and
+  # concatenates the original initrd on top:
+  find kernel | cpio -H newc --create > /boot/instrumented_initrd
+  cat /boot/initrd >>/boot/instrumented_initrd
+  # reboot with increased acpi debug level, e.g. boot params:
+  acpi.debug_level=0x2 acpi.debug_layer=0xFFFFFFFF
+  # and check your syslog:
+  [    1.268089] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
+  [    1.272091] [ACPI Debug]  String [0x0B] "HELLO WORLD"
+
+iasl is able to disassemble and recompile quite a lot different,
+also static ACPI tables.
+
+
+Where to retrieve userspace tools
+=================================
+
+iasl and acpixtract are part of Intel's ACPICA project:
+http://acpica.org/
+
+and should be packaged by distributions (for example in the acpica package
+on SUSE).
+
+acpidump can be found in Len Browns pmtools:
+ftp://kernel.org/pub/linux/kernel/people/lenb/acpi/utils/pmtools/acpidump
+
+This tool is also part of the acpica package on SUSE.
+Alternatively, used ACPI tables can be retrieved via sysfs in latest kernels:
+/sys/firmware/acpi/tables
-- 
GitLab


From 34bf473baef086ec0e277ffb0dbddb697da9f02e Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:53 +0800
Subject: [PATCH 1545/1861] Documentation: ACPI: move dsdt-override.txt to
 admin-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/dsdt-override.rst}                               | 8 +++++++-
 Documentation/admin-guide/acpi/index.rst                  | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)
 rename Documentation/{acpi/dsdt-override.txt => admin-guide/acpi/dsdt-override.rst} (56%)

diff --git a/Documentation/acpi/dsdt-override.txt b/Documentation/admin-guide/acpi/dsdt-override.rst
similarity index 56%
rename from Documentation/acpi/dsdt-override.txt
rename to Documentation/admin-guide/acpi/dsdt-override.rst
index 784841caa6e63..50bd7f194bf44 100644
--- a/Documentation/acpi/dsdt-override.txt
+++ b/Documentation/admin-guide/acpi/dsdt-override.rst
@@ -1,6 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Overriding DSDT
+===============
+
 Linux supports a method of overriding the BIOS DSDT:
 
-CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel.
+CONFIG_ACPI_CUSTOM_DSDT - builds the image into the kernel.
 
 When to use this method is described in detail on the
 Linux/ACPI home page:
diff --git a/Documentation/admin-guide/acpi/index.rst b/Documentation/admin-guide/acpi/index.rst
index 09e4e81e4fb76..d68e9914c5ff6 100644
--- a/Documentation/admin-guide/acpi/index.rst
+++ b/Documentation/admin-guide/acpi/index.rst
@@ -9,3 +9,4 @@ the Linux ACPI support.
    :maxdepth: 1
 
    initrd_table_override
+   dsdt-override
-- 
GitLab


From 572c9fa516f532d6cfd7123d7753fe52730f1c59 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:54 +0800
Subject: [PATCH 1546/1861] Documentation: ACPI: move i2c-muxes.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/i2c-muxes.txt              | 58 ------------------
 .../firmware-guide/acpi/i2c-muxes.rst         | 61 +++++++++++++++++++
 Documentation/firmware-guide/acpi/index.rst   |  3 +-
 3 files changed, 63 insertions(+), 59 deletions(-)
 delete mode 100644 Documentation/acpi/i2c-muxes.txt
 create mode 100644 Documentation/firmware-guide/acpi/i2c-muxes.rst

diff --git a/Documentation/acpi/i2c-muxes.txt b/Documentation/acpi/i2c-muxes.txt
deleted file mode 100644
index 9fcc4f0b885e1..0000000000000
--- a/Documentation/acpi/i2c-muxes.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-ACPI I2C Muxes
---------------
-
-Describing an I2C device hierarchy that includes I2C muxes requires an ACPI
-Device () scope per mux channel.
-
-Consider this topology:
-
-+------+   +------+
-| SMB1 |-->| MUX0 |--CH00--> i2c client A (0x50)
-|      |   | 0x70 |--CH01--> i2c client B (0x50)
-+------+   +------+
-
-which corresponds to the following ASL:
-
-Device (SMB1)
-{
-    Name (_HID, ...)
-    Device (MUX0)
-    {
-        Name (_HID, ...)
-        Name (_CRS, ResourceTemplate () {
-            I2cSerialBus (0x70, ControllerInitiated, I2C_SPEED,
-                          AddressingMode7Bit, "^SMB1", 0x00,
-                          ResourceConsumer,,)
-        }
-
-        Device (CH00)
-        {
-            Name (_ADR, 0)
-
-            Device (CLIA)
-            {
-                Name (_HID, ...)
-                Name (_CRS, ResourceTemplate () {
-                    I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
-                                  AddressingMode7Bit, "^CH00", 0x00,
-                                  ResourceConsumer,,)
-                }
-            }
-        }
-
-        Device (CH01)
-        {
-            Name (_ADR, 1)
-
-            Device (CLIB)
-            {
-                Name (_HID, ...)
-                Name (_CRS, ResourceTemplate () {
-                    I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
-                                  AddressingMode7Bit, "^CH01", 0x00,
-                                  ResourceConsumer,,)
-                }
-            }
-        }
-    }
-}
diff --git a/Documentation/firmware-guide/acpi/i2c-muxes.rst b/Documentation/firmware-guide/acpi/i2c-muxes.rst
new file mode 100644
index 0000000000000..3a8997ccd7c4b
--- /dev/null
+++ b/Documentation/firmware-guide/acpi/i2c-muxes.rst
@@ -0,0 +1,61 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+ACPI I2C Muxes
+==============
+
+Describing an I2C device hierarchy that includes I2C muxes requires an ACPI
+Device () scope per mux channel.
+
+Consider this topology::
+
+    +------+   +------+
+    | SMB1 |-->| MUX0 |--CH00--> i2c client A (0x50)
+    |      |   | 0x70 |--CH01--> i2c client B (0x50)
+    +------+   +------+
+
+which corresponds to the following ASL::
+
+    Device (SMB1)
+    {
+        Name (_HID, ...)
+        Device (MUX0)
+        {
+            Name (_HID, ...)
+            Name (_CRS, ResourceTemplate () {
+                I2cSerialBus (0x70, ControllerInitiated, I2C_SPEED,
+                            AddressingMode7Bit, "^SMB1", 0x00,
+                            ResourceConsumer,,)
+            }
+
+            Device (CH00)
+            {
+                Name (_ADR, 0)
+
+                Device (CLIA)
+                {
+                    Name (_HID, ...)
+                    Name (_CRS, ResourceTemplate () {
+                        I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
+                                    AddressingMode7Bit, "^CH00", 0x00,
+                                    ResourceConsumer,,)
+                    }
+                }
+            }
+
+            Device (CH01)
+            {
+                Name (_ADR, 1)
+
+                Device (CLIB)
+                {
+                    Name (_HID, ...)
+                    Name (_CRS, ResourceTemplate () {
+                        I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
+                                    AddressingMode7Bit, "^CH01", 0x00,
+                                    ResourceConsumer,,)
+                    }
+                }
+            }
+        }
+    }
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index d1d069b26bbc6..1c89888f6ee8f 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -12,4 +12,5 @@ ACPI Support
    osi
    method-customizing
    DSD-properties-rules
-   gpio-properties
\ No newline at end of file
+   gpio-properties
+   i2c-muxes
-- 
GitLab


From 011eed59ba6df428a1380fd1419a2a422f0d807a Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:55 +0800
Subject: [PATCH 1547/1861] Documentation: ACPI: move acpi-lid.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/acpi-lid.rst}                        | 40 ++++++++++++++-----
 Documentation/firmware-guide/acpi/index.rst   |  1 +
 2 files changed, 30 insertions(+), 11 deletions(-)
 rename Documentation/{acpi/acpi-lid.txt => firmware-guide/acpi/acpi-lid.rst} (86%)

diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/firmware-guide/acpi/acpi-lid.rst
similarity index 86%
rename from Documentation/acpi/acpi-lid.txt
rename to Documentation/firmware-guide/acpi/acpi-lid.rst
index effe7af3a5af9..874ce0ed340d8 100644
--- a/Documentation/acpi/acpi-lid.txt
+++ b/Documentation/firmware-guide/acpi/acpi-lid.rst
@@ -1,13 +1,18 @@
-Special Usage Model of the ACPI Control Method Lid Device
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
-Copyright (C) 2016, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
+=========================================================
+Special Usage Model of the ACPI Control Method Lid Device
+=========================================================
 
+:Copyright: |copy| 2016, Intel Corporation
 
-Abstract:
+:Author: Lv Zheng <lv.zheng@intel.com>
 
-Platforms containing lids convey lid state (open/close) to OSPMs using a
-control method lid device. To implement this, the AML tables issue
+Abstract
+========
+Platforms containing lids convey lid state (open/close) to OSPMs
+using a control method lid device. To implement this, the AML tables issue
 Notify(lid_device, 0x80) to notify the OSPMs whenever the lid state has
 changed. The _LID control method for the lid device must be implemented to
 report the "current" state of the lid as either "opened" or "closed".
@@ -19,7 +24,8 @@ taken into account. This document describes the restrictions and the
 expections of the Linux ACPI lid device driver.
 
 
-1. Restrictions of the returning value of the _LID control method
+Restrictions of the returning value of the _LID control method
+==============================================================
 
 The _LID control method is described to return the "current" lid state.
 However the word of "current" has ambiguity, some buggy AML tables return
@@ -30,7 +36,8 @@ initial returning value. When the AML tables implement this control method
 with cached value, the initial returning value is likely not reliable.
 There are platforms always retun "closed" as initial lid state.
 
-2. Restrictions of the lid state change notifications
+Restrictions of the lid state change notifications
+==================================================
 
 There are buggy AML tables never notifying when the lid device state is
 changed to "opened". Thus the "opened" notification is not guaranteed. But
@@ -39,18 +46,22 @@ state is changed to "closed". The "closed" notification is normally used to
 trigger some system power saving operations on Windows. Since it is fully
 tested, it is reliable from all AML tables.
 
-3. Expections for the userspace users of the ACPI lid device driver
+Expections for the userspace users of the ACPI lid device driver
+================================================================
 
 The ACPI button driver exports the lid state to the userspace via the
-following file:
+following file::
+
   /proc/acpi/button/lid/LID0/state
+
 This file actually calls the _LID control method described above. And given
 the previous explanation, it is not reliable enough on some platforms. So
 it is advised for the userspace program to not to solely rely on this file
 to determine the actual lid state.
 
 The ACPI button driver emits the following input event to the userspace:
-  SW_LID
+  * SW_LID
+
 The ACPI lid device driver is implemented to try to deliver the platform
 triggered events to the userspace. However, given the fact that the buggy
 firmware cannot make sure "opened"/"closed" events are paired, the ACPI
@@ -59,20 +70,25 @@ button driver uses the following 3 modes in order not to trigger issues.
 If the userspace hasn't been prepared to ignore the unreliable "opened"
 events and the unreliable initial state notification, Linux users can use
 the following kernel parameters to handle the possible issues:
+
 A. button.lid_init_state=method:
    When this option is specified, the ACPI button driver reports the
    initial lid state using the returning value of the _LID control method
    and whether the "opened"/"closed" events are paired fully relies on the
    firmware implementation.
+
    This option can be used to fix some platforms where the returning value
    of the _LID control method is reliable but the initial lid state
    notification is missing.
+
    This option is the default behavior during the period the userspace
    isn't ready to handle the buggy AML tables.
+
 B. button.lid_init_state=open:
    When this option is specified, the ACPI button driver always reports the
    initial lid state as "opened" and whether the "opened"/"closed" events
    are paired fully relies on the firmware implementation.
+
    This may fix some platforms where the returning value of the _LID
    control method is not reliable and the initial lid state notification is
    missing.
@@ -80,6 +96,7 @@ B. button.lid_init_state=open:
 If the userspace has been prepared to ignore the unreliable "opened" events
 and the unreliable initial state notification, Linux users should always
 use the following kernel parameter:
+
 C. button.lid_init_state=ignore:
    When this option is specified, the ACPI button driver never reports the
    initial lid state and there is a compensation mechanism implemented to
@@ -89,6 +106,7 @@ C. button.lid_init_state=ignore:
    notifications can be delivered to the userspace when the lid is actually
    opens given that some AML tables do not send "opened" notifications
    reliably.
+
    In this mode, if everything is correctly implemented by the platform
    firmware, the old userspace programs should still work. Otherwise, the
    new userspace programs are required to work with the ACPI button driver.
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 1c89888f6ee8f..bedcb0b242a28 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -14,3 +14,4 @@ ACPI Support
    DSD-properties-rules
    gpio-properties
    i2c-muxes
+   acpi-lid
\ No newline at end of file
-- 
GitLab


From f2dde1ed0f2818405b371c2b65a98fece221b7a0 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:56 +0800
Subject: [PATCH 1548/1861] Documentation: ACPI: move dsd/graph.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/dsd/graph.rst}                       | 157 +++++++++---------
 Documentation/firmware-guide/acpi/index.rst   |   1 +
 2 files changed, 81 insertions(+), 77 deletions(-)
 rename Documentation/{acpi/dsd/graph.txt => firmware-guide/acpi/dsd/graph.rst} (56%)

diff --git a/Documentation/acpi/dsd/graph.txt b/Documentation/firmware-guide/acpi/dsd/graph.rst
similarity index 56%
rename from Documentation/acpi/dsd/graph.txt
rename to Documentation/firmware-guide/acpi/dsd/graph.rst
index b9ce910781dcd..e0baed35b037e 100644
--- a/Documentation/acpi/dsd/graph.txt
+++ b/Documentation/firmware-guide/acpi/dsd/graph.rst
@@ -1,8 +1,11 @@
-Graphs
+.. SPDX-License-Identifier: GPL-2.0
 
+======
+Graphs
+======
 
 _DSD
-----
+====
 
 _DSD (Device Specific Data) [7] is a predefined ACPI device
 configuration object that can be used to convey information on
@@ -30,7 +33,7 @@ hierarchical data extension array on each depth.
 
 
 Ports and endpoints
--------------------
+===================
 
 The port and endpoint concepts are very similar to those in Devicetree
 [3]. A port represents an interface in a device, and an endpoint
@@ -38,9 +41,9 @@ represents a connection to that interface.
 
 All port nodes are located under the device's "_DSD" node in the hierarchical
 data extension tree. The data extension related to each port node must begin
-with "port" and must be followed by the "@" character and the number of the port
-as its key. The target object it refers to should be called "PRTX", where "X" is
-the number of the port. An example of such a package would be:
+with "port" and must be followed by the "@" character and the number of the
+port as its key. The target object it refers to should be called "PRTX", where
+"X" is the number of the port. An example of such a package would be::
 
     Package() { "port@4", PRT4 }
 
@@ -49,7 +52,7 @@ data extension key of the endpoint nodes must begin with
 "endpoint" and must be followed by the "@" character and the number of the
 endpoint. The object it refers to should be called "EPXY", where "X" is the
 number of the port and "Y" is the number of the endpoint. An example of such a
-package would be:
+package would be::
 
     Package() { "endpoint@0", EP40 }
 
@@ -62,85 +65,85 @@ of that port shall be zero. Similarly, if a port may only have a single
 endpoint, the number of that endpoint shall be zero.
 
 The endpoint reference uses property extension with "remote-endpoint" property
-name followed by a reference in the same package. Such references consist of the
+name followed by a reference in the same package. Such references consist of
 the remote device reference, the first package entry of the port data extension
 reference under the device and finally the first package entry of the endpoint
-data extension reference under the port. Individual references thus appear as:
+data extension reference under the port. Individual references thus appear as::
 
     Package() { device, "port@X", "endpoint@Y" }
 
-In the above example, "X" is the number of the port and "Y" is the number of the
-endpoint.
+In the above example, "X" is the number of the port and "Y" is the number of
+the endpoint.
 
 The references to endpoints must be always done both ways, to the
 remote endpoint and back from the referred remote endpoint node.
 
-A simple example of this is show below:
+A simple example of this is show below::
 
     Scope (\_SB.PCI0.I2C2)
     {
-	Device (CAM0)
-	{
-	    Name (_DSD, Package () {
-		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
-		Package () {
-		    Package () { "compatible", Package () { "nokia,smia" } },
-		},
-		ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
-		Package () {
-		    Package () { "port@0", PRT0 },
-		}
-	    })
-	    Name (PRT0, Package() {
-		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
-		Package () {
-		    Package () { "reg", 0 },
-		},
-		ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
-		Package () {
-		    Package () { "endpoint@0", EP00 },
-		}
-	    })
-	    Name (EP00, Package() {
-		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
-		Package () {
-		    Package () { "reg", 0 },
-		    Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } },
-		}
-	    })
-	}
+        Device (CAM0)
+        {
+            Name (_DSD, Package () {
+                ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+                Package () {
+                    Package () { "compatible", Package () { "nokia,smia" } },
+                },
+                ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+                Package () {
+                    Package () { "port@0", PRT0 },
+                }
+            })
+            Name (PRT0, Package() {
+                ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+                Package () {
+                    Package () { "reg", 0 },
+                },
+                ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+                Package () {
+                    Package () { "endpoint@0", EP00 },
+                }
+            })
+            Name (EP00, Package() {
+                ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+                Package () {
+                    Package () { "reg", 0 },
+                    Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } },
+                }
+            })
+        }
     }
 
     Scope (\_SB.PCI0)
     {
-	Device (ISP)
-	{
-	    Name (_DSD, Package () {
-		ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
-		Package () {
-		    Package () { "port@4", PRT4 },
-		}
-	    })
-
-	    Name (PRT4, Package() {
-		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
-		Package () {
-		    Package () { "reg", 4 }, /* CSI-2 port number */
-		},
-		ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
-		Package () {
-		    Package () { "endpoint@0", EP40 },
-		}
-	    })
-
-	    Name (EP40, Package() {
-		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
-		Package () {
-		    Package () { "reg", 0 },
-		    Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } },
-		}
-	    })
-	}
+        Device (ISP)
+        {
+            Name (_DSD, Package () {
+                ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+                Package () {
+                    Package () { "port@4", PRT4 },
+                }
+            })
+
+            Name (PRT4, Package() {
+                ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+                Package () {
+                    Package () { "reg", 4 }, /* CSI-2 port number */
+                },
+                ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+                Package () {
+                    Package () { "endpoint@0", EP40 },
+                }
+            })
+
+            Name (EP40, Package() {
+                ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+                Package () {
+                    Package () { "reg", 0 },
+                    Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } },
+                }
+            })
+        }
     }
 
 Here, the port 0 of the "CAM0" device is connected to the port 4 of
@@ -148,27 +151,27 @@ the "ISP" device and vice versa.
 
 
 References
-----------
+==========
 
 [1] _DSD (Device Specific Data) Implementation Guide.
-    <URL:http://www.uefi.org/sites/default/files/resources/_DSD-implementation-guide-toplevel-1_1.htm>,
+    http://www.uefi.org/sites/default/files/resources/_DSD-implementation-guide-toplevel-1_1.htm,
     referenced 2016-10-03.
 
-[2] Devicetree. <URL:http://www.devicetree.org>, referenced 2016-10-03.
+[2] Devicetree. http://www.devicetree.org, referenced 2016-10-03.
 
 [3] Documentation/devicetree/bindings/graph.txt
 
 [4] Device Properties UUID For _DSD.
-    <URL:http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf>,
+    http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf,
     referenced 2016-10-04.
 
 [5] Hierarchical Data Extension UUID For _DSD.
-    <URL:http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
+    http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf,
     referenced 2016-10-04.
 
 [6] Advanced Configuration and Power Interface Specification.
-    <URL:http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf>,
+    http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf,
     referenced 2016-10-04.
 
 [7] _DSD Device Properties Usage Rules.
-    Documentation/acpi/DSD-properties-rules.txt
+    :doc:`../DSD-properties-rules`
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index bedcb0b242a28..f81cfbcb6878c 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -8,6 +8,7 @@ ACPI Support
    :maxdepth: 1
 
    namespace
+   dsd/graph
    enumeration
    osi
    method-customizing
-- 
GitLab


From 05000042f33dd8b2afbeedc1d8fd499486d14b0d Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:57 +0800
Subject: [PATCH 1549/1861] Documentation: ACPI: move
 dsd/data-node-references.txt to firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/dsd/data-node-references.rst}        | 36 ++++++++++---------
 Documentation/firmware-guide/acpi/index.rst   |  1 +
 2 files changed, 21 insertions(+), 16 deletions(-)
 rename Documentation/{acpi/dsd/data-node-references.txt => firmware-guide/acpi/dsd/data-node-references.rst} (71%)

diff --git a/Documentation/acpi/dsd/data-node-references.txt b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
similarity index 71%
rename from Documentation/acpi/dsd/data-node-references.txt
rename to Documentation/firmware-guide/acpi/dsd/data-node-references.rst
index c3871565c8cfb..1351984e767c8 100644
--- a/Documentation/acpi/dsd/data-node-references.txt
+++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
@@ -1,9 +1,12 @@
-Copyright (C) 2018 Intel Corporation
-Author: Sakari Ailus <sakari.ailus@linux.intel.com>
-
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
 
+===================================
 Referencing hierarchical data nodes
------------------------------------
+===================================
+
+:Copyright: |copy| 2018 Intel Corporation
+:Author: Sakari Ailus <sakari.ailus@linux.intel.com>
 
 ACPI in general allows referring to device objects in the tree only.
 Hierarchical data extension nodes may not be referred to directly, hence this
@@ -28,13 +31,14 @@ extension key.
 
 
 Example
--------
+=======
 
-	In the ASL snippet below, the "reference" _DSD property [2] contains a
-	device object reference to DEV0 and under that device object, a
-	hierarchical data extension key "node@1" referring to the NOD1 object
-	and lastly, a hierarchical data extension key "anothernode" referring to
-	the ANOD object which is also the final target node of the reference.
+In the ASL snippet below, the "reference" _DSD property [2] contains a
+device object reference to DEV0 and under that device object, a
+hierarchical data extension key "node@1" referring to the NOD1 object
+and lastly, a hierarchical data extension key "anothernode" referring to
+the ANOD object which is also the final target node of the reference.
+::
 
 	Device (DEV0)
 	{
@@ -75,15 +79,15 @@ Example
 	    })
 	}
 
-Please also see a graph example in graph.txt .
+Please also see a graph example in :doc:`graph`.
 
 References
-----------
+==========
 
 [1] Hierarchical Data Extension UUID For _DSD.
-    <URL:http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
-    referenced 2018-07-17.
+<http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
+referenced 2018-07-17.
 
 [2] Device Properties UUID For _DSD.
-    <URL:http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf>,
-    referenced 2016-10-04.
+<http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf>,
+referenced 2016-10-04.
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index f81cfbcb6878c..6d4e0df4f0638 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -9,6 +9,7 @@ ACPI Support
 
    namespace
    dsd/graph
+   dsd/data-node-references
    enumeration
    osi
    method-customizing
-- 
GitLab


From 99ed6bfaa5a0a4bffc10ff3ffe9c599bcf59236e Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:58 +0800
Subject: [PATCH 1550/1861] Documentation: ACPI: move debug.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/debug.rst}                           | 31 ++++++++++---------
 Documentation/firmware-guide/acpi/index.rst   |  3 +-
 2 files changed, 19 insertions(+), 15 deletions(-)
 rename Documentation/{acpi/debug.txt => firmware-guide/acpi/debug.rst} (91%)

diff --git a/Documentation/acpi/debug.txt b/Documentation/firmware-guide/acpi/debug.rst
similarity index 91%
rename from Documentation/acpi/debug.txt
rename to Documentation/firmware-guide/acpi/debug.rst
index 65bf47c46b6da..1a152dd1d7654 100644
--- a/Documentation/acpi/debug.txt
+++ b/Documentation/firmware-guide/acpi/debug.rst
@@ -1,18 +1,21 @@
-			ACPI Debug Output
+.. SPDX-License-Identifier: GPL-2.0
 
+=================
+ACPI Debug Output
+=================
 
 The ACPI CA, the Linux ACPI core, and some ACPI drivers can generate debug
 output.  This document describes how to use this facility.
 
 Compile-time configuration
---------------------------
+==========================
 
 ACPI debug output is globally enabled by CONFIG_ACPI_DEBUG.  If this config
 option is turned off, the debug messages are not even built into the
 kernel.
 
 Boot- and run-time configuration
---------------------------------
+================================
 
 When CONFIG_ACPI_DEBUG=y, you can select the component and level of messages
 you're interested in.  At boot-time, use the acpi.debug_layer and
@@ -21,7 +24,7 @@ debug_layer and debug_level files in /sys/module/acpi/parameters/ to control
 the debug messages.
 
 debug_layer (component)
------------------------
+=======================
 
 The "debug_layer" is a mask that selects components of interest, e.g., a
 specific driver or part of the ACPI interpreter.  To build the debug_layer
@@ -33,7 +36,7 @@ to /sys/module/acpi/parameters/debug_layer.
 
 The possible components are defined in include/acpi/acoutput.h and
 include/acpi/acpi_drivers.h.  Reading /sys/module/acpi/parameters/debug_layer
-shows the supported mask values, currently these:
+shows the supported mask values, currently these::
 
     ACPI_UTILITIES                  0x00000001
     ACPI_HARDWARE                   0x00000002
@@ -65,7 +68,7 @@ shows the supported mask values, currently these:
     ACPI_PROCESSOR_COMPONENT        0x20000000
 
 debug_level
------------
+===========
 
 The "debug_level" is a mask that selects different types of messages, e.g.,
 those related to initialization, method execution, informational messages, etc.
@@ -81,7 +84,7 @@ to /sys/module/acpi/parameters/debug_level.
 
 The possible levels are defined in include/acpi/acoutput.h.  Reading
 /sys/module/acpi/parameters/debug_level shows the supported mask values,
-currently these:
+currently these::
 
     ACPI_LV_INIT                    0x00000001
     ACPI_LV_DEBUG_OBJECT            0x00000002
@@ -113,9 +116,9 @@ currently these:
     ACPI_LV_EVENTS                  0x80000000
 
 Examples
---------
+========
 
-For example, drivers/acpi/bus.c contains this:
+For example, drivers/acpi/bus.c contains this::
 
     #define _COMPONENT              ACPI_BUS_COMPONENT
     ...
@@ -127,22 +130,22 @@ statement uses ACPI_DB_INFO, which is macro based on the ACPI_LV_INFO
 definition.)
 
 Enable all AML "Debug" output (stores to the Debug object while interpreting
-AML) during boot:
+AML) during boot::
 
     acpi.debug_layer=0xffffffff acpi.debug_level=0x2
 
-Enable PCI and PCI interrupt routing debug messages:
+Enable PCI and PCI interrupt routing debug messages::
 
     acpi.debug_layer=0x400000 acpi.debug_level=0x4
 
-Enable all ACPI hardware-related messages:
+Enable all ACPI hardware-related messages::
 
     acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
 
-Enable all ACPI_DB_INFO messages after boot:
+Enable all ACPI_DB_INFO messages after boot::
 
     # echo 0x4 > /sys/module/acpi/parameters/debug_level
 
-Show all valid component values:
+Show all valid component values::
 
     # cat /sys/module/acpi/parameters/debug_layer
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 6d4e0df4f0638..a45fea11f998a 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -14,6 +14,7 @@ ACPI Support
    osi
    method-customizing
    DSD-properties-rules
+   debug
    gpio-properties
    i2c-muxes
-   acpi-lid
\ No newline at end of file
+   acpi-lid
-- 
GitLab


From 3c03a1bde4dcedef8fb287e348de959d81d04ca6 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:52:59 +0800
Subject: [PATCH 1551/1861] Documentation: ACPI: move method-tracing.txt to
 firmware-guide/acpi and convert to rsST

This converts the plain text documentation to reStructuredText format and
add it to Sphinx TOC tree. No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/method-tracing.txt         | 192 --------------
 Documentation/firmware-guide/acpi/index.rst   |   1 +
 .../firmware-guide/acpi/method-tracing.rst    | 238 ++++++++++++++++++
 3 files changed, 239 insertions(+), 192 deletions(-)
 delete mode 100644 Documentation/acpi/method-tracing.txt
 create mode 100644 Documentation/firmware-guide/acpi/method-tracing.rst

diff --git a/Documentation/acpi/method-tracing.txt b/Documentation/acpi/method-tracing.txt
deleted file mode 100644
index 0aba14c8f4593..0000000000000
--- a/Documentation/acpi/method-tracing.txt
+++ /dev/null
@@ -1,192 +0,0 @@
-ACPICA Trace Facility
-
-Copyright (C) 2015, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
-
-
-Abstract:
-
-This document describes the functions and the interfaces of the method
-tracing facility.
-
-1. Functionalities and usage examples:
-
-   ACPICA provides method tracing capability. And two functions are
-   currently implemented using this capability.
-
-   A. Log reducer
-   ACPICA subsystem provides debugging outputs when CONFIG_ACPI_DEBUG is
-   enabled. The debugging messages which are deployed via
-   ACPI_DEBUG_PRINT() macro can be reduced at 2 levels - per-component
-   level (known as debug layer, configured via
-   /sys/module/acpi/parameters/debug_layer) and per-type level (known as
-   debug level, configured via /sys/module/acpi/parameters/debug_level).
-
-   But when the particular layer/level is applied to the control method
-   evaluations, the quantity of the debugging outputs may still be too
-   large to be put into the kernel log buffer. The idea thus is worked out
-   to only enable the particular debug layer/level (normally more detailed)
-   logs when the control method evaluation is started, and disable the
-   detailed logging when the control method evaluation is stopped.
-
-   The following command examples illustrate the usage of the "log reducer"
-   functionality:
-   a. Filter out the debug layer/level matched logs when control methods
-      are being evaluated:
-      # cd /sys/module/acpi/parameters
-      # echo "0xXXXXXXXX" > trace_debug_layer
-      # echo "0xYYYYYYYY" > trace_debug_level
-      # echo "enable" > trace_state
-   b. Filter out the debug layer/level matched logs when the specified
-      control method is being evaluated:
-      # cd /sys/module/acpi/parameters
-      # echo "0xXXXXXXXX" > trace_debug_layer
-      # echo "0xYYYYYYYY" > trace_debug_level
-      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
-      # echo "method" > /sys/module/acpi/parameters/trace_state
-   c. Filter out the debug layer/level matched logs when the specified
-      control method is being evaluated for the first time:
-      # cd /sys/module/acpi/parameters
-      # echo "0xXXXXXXXX" > trace_debug_layer
-      # echo "0xYYYYYYYY" > trace_debug_level
-      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
-      # echo "method-once" > /sys/module/acpi/parameters/trace_state
-   Where:
-      0xXXXXXXXX/0xYYYYYYYY: Refer to Documentation/acpi/debug.txt for
-			     possible debug layer/level masking values.
-      \PPPP.AAAA.TTTT.HHHH: Full path of a control method that can be found
-			    in the ACPI namespace. It needn't be an entry
-			    of a control method evaluation.
-
-   B. AML tracer
-
-   There are special log entries added by the method tracing facility at
-   the "trace points" the AML interpreter starts/stops to execute a control
-   method, or an AML opcode. Note that the format of the log entries are
-   subject to change:
-     [    0.186427]   exdebug-0398 ex_trace_point        : Method Begin [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
-     [    0.186630]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905c88:If] execution.
-     [    0.186820]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905cc0:LEqual] execution.
-     [    0.187010]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905a20:-NamePath-] execution.
-     [    0.187214]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905a20:-NamePath-] execution.
-     [    0.187407]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905f60:One] execution.
-     [    0.187594]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905f60:One] execution.
-     [    0.187789]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905cc0:LEqual] execution.
-     [    0.187980]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905cc0:Return] execution.
-     [    0.188146]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905f60:One] execution.
-     [    0.188334]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905f60:One] execution.
-     [    0.188524]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905cc0:Return] execution.
-     [    0.188712]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905c88:If] execution.
-     [    0.188903]   exdebug-0398 ex_trace_point        : Method End [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
-
-   Developers can utilize these special log entries to track the AML
-   interpretion, thus can aid issue debugging and performance tuning. Note
-   that, as the "AML tracer" logs are implemented via ACPI_DEBUG_PRINT()
-   macro, CONFIG_ACPI_DEBUG is also required to be enabled for enabling
-   "AML tracer" logs.
-
-   The following command examples illustrate the usage of the "AML tracer"
-   functionality:
-   a. Filter out the method start/stop "AML tracer" logs when control
-      methods are being evaluated:
-      # cd /sys/module/acpi/parameters
-      # echo "0x80" > trace_debug_layer
-      # echo "0x10" > trace_debug_level
-      # echo "enable" > trace_state
-   b. Filter out the method start/stop "AML tracer" when the specified
-      control method is being evaluated:
-      # cd /sys/module/acpi/parameters
-      # echo "0x80" > trace_debug_layer
-      # echo "0x10" > trace_debug_level
-      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
-      # echo "method" > trace_state
-   c. Filter out the method start/stop "AML tracer" logs when the specified
-      control method is being evaluated for the first time:
-      # cd /sys/module/acpi/parameters
-      # echo "0x80" > trace_debug_layer
-      # echo "0x10" > trace_debug_level
-      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
-      # echo "method-once" > trace_state
-   d. Filter out the method/opcode start/stop "AML tracer" when the
-      specified control method is being evaluated:
-      # cd /sys/module/acpi/parameters
-      # echo "0x80" > trace_debug_layer
-      # echo "0x10" > trace_debug_level
-      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
-      # echo "opcode" > trace_state
-   e. Filter out the method/opcode start/stop "AML tracer" when the
-      specified control method is being evaluated for the first time:
-      # cd /sys/module/acpi/parameters
-      # echo "0x80" > trace_debug_layer
-      # echo "0x10" > trace_debug_level
-      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
-      # echo "opcode-opcode" > trace_state
-
-  Note that all above method tracing facility related module parameters can
-  be used as the boot parameters, for example:
-      acpi.trace_debug_layer=0x80 acpi.trace_debug_level=0x10 \
-      acpi.trace_method_name=\_SB.LID0._LID acpi.trace_state=opcode-once
-
-2. Interface descriptions:
-
-   All method tracing functions can be configured via ACPI module
-   parameters that are accessible at /sys/module/acpi/parameters/:
-
-   trace_method_name
-	The full path of the AML method that the user wants to trace.
-	Note that the full path shouldn't contain the trailing "_"s in its
-	name segments but may contain "\" to form an absolute path.
-
-   trace_debug_layer
-	The temporary debug_layer used when the tracing feature is enabled.
-	Using ACPI_EXECUTER (0x80) by default, which is the debug_layer
-	used to match all "AML tracer" logs.
-
-   trace_debug_level
-	The temporary debug_level used when the tracing feature is enabled.
-	Using ACPI_LV_TRACE_POINT (0x10) by default, which is the
-	debug_level used to match all "AML tracer" logs.
-
-   trace_state
-	The status of the tracing feature.
-	Users can enable/disable this debug tracing feature by executing
-	the following command:
-	    # echo string > /sys/module/acpi/parameters/trace_state
-	Where "string" should be one of the following:
-	"disable"
-	    Disable the method tracing feature.
-	"enable"
-	    Enable the method tracing feature.
-	    ACPICA debugging messages matching
-	    "trace_debug_layer/trace_debug_level" during any method
-	    execution will be logged.
-	"method"
-	    Enable the method tracing feature.
-	    ACPICA debugging messages matching
-	    "trace_debug_layer/trace_debug_level" during method execution
-	    of "trace_method_name" will be logged.
-	"method-once"
-	    Enable the method tracing feature.
-	    ACPICA debugging messages matching
-	    "trace_debug_layer/trace_debug_level" during method execution
-	    of "trace_method_name" will be logged only once.
-	"opcode"
-	    Enable the method tracing feature.
-	    ACPICA debugging messages matching
-	    "trace_debug_layer/trace_debug_level" during method/opcode
-	    execution of "trace_method_name" will be logged.
-	"opcode-once"
-	    Enable the method tracing feature.
-	    ACPICA debugging messages matching
-	    "trace_debug_layer/trace_debug_level" during method/opcode
-	    execution of "trace_method_name" will be logged only once.
-	Note that, the difference between the "enable" and other feature
-        enabling options are:
-	1. When "enable" is specified, since
-	   "trace_debug_layer/trace_debug_level" shall apply to all control
-	   method evaluations, after configuring "trace_state" to "enable",
-	   "trace_method_name" will be reset to NULL.
-	2. When "method/opcode" is specified, if
-	   "trace_method_name" is NULL when "trace_state" is configured to
-	   these options, the "trace_debug_layer/trace_debug_level" will
-	   apply to all control method evaluations.
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index a45fea11f998a..287a7cbd82acd 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -13,6 +13,7 @@ ACPI Support
    enumeration
    osi
    method-customizing
+   method-tracing
    DSD-properties-rules
    debug
    gpio-properties
diff --git a/Documentation/firmware-guide/acpi/method-tracing.rst b/Documentation/firmware-guide/acpi/method-tracing.rst
new file mode 100644
index 0000000000000..d0b077b73f5f2
--- /dev/null
+++ b/Documentation/firmware-guide/acpi/method-tracing.rst
@@ -0,0 +1,238 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=====================
+ACPICA Trace Facility
+=====================
+
+:Copyright: |copy| 2015, Intel Corporation
+:Author: Lv Zheng <lv.zheng@intel.com>
+
+
+Abstract
+========
+This document describes the functions and the interfaces of the
+method tracing facility.
+
+Functionalities and usage examples
+==================================
+
+ACPICA provides method tracing capability. And two functions are
+currently implemented using this capability.
+
+Log reducer
+-----------
+
+ACPICA subsystem provides debugging outputs when CONFIG_ACPI_DEBUG is
+enabled. The debugging messages which are deployed via
+ACPI_DEBUG_PRINT() macro can be reduced at 2 levels - per-component
+level (known as debug layer, configured via
+/sys/module/acpi/parameters/debug_layer) and per-type level (known as
+debug level, configured via /sys/module/acpi/parameters/debug_level).
+
+But when the particular layer/level is applied to the control method
+evaluations, the quantity of the debugging outputs may still be too
+large to be put into the kernel log buffer. The idea thus is worked out
+to only enable the particular debug layer/level (normally more detailed)
+logs when the control method evaluation is started, and disable the
+detailed logging when the control method evaluation is stopped.
+
+The following command examples illustrate the usage of the "log reducer"
+functionality:
+
+a. Filter out the debug layer/level matched logs when control methods
+   are being evaluated::
+
+      # cd /sys/module/acpi/parameters
+      # echo "0xXXXXXXXX" > trace_debug_layer
+      # echo "0xYYYYYYYY" > trace_debug_level
+      # echo "enable" > trace_state
+
+b. Filter out the debug layer/level matched logs when the specified
+   control method is being evaluated::
+
+      # cd /sys/module/acpi/parameters
+      # echo "0xXXXXXXXX" > trace_debug_layer
+      # echo "0xYYYYYYYY" > trace_debug_level
+      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
+      # echo "method" > /sys/module/acpi/parameters/trace_state
+
+c. Filter out the debug layer/level matched logs when the specified
+   control method is being evaluated for the first time::
+
+      # cd /sys/module/acpi/parameters
+      # echo "0xXXXXXXXX" > trace_debug_layer
+      # echo "0xYYYYYYYY" > trace_debug_level
+      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
+      # echo "method-once" > /sys/module/acpi/parameters/trace_state
+
+Where:
+   0xXXXXXXXX/0xYYYYYYYY
+     Refer to Documentation/acpi/debug.txt for possible debug layer/level
+     masking values.
+   \PPPP.AAAA.TTTT.HHHH
+     Full path of a control method that can be found in the ACPI namespace.
+     It needn't be an entry of a control method evaluation.
+
+AML tracer
+----------
+
+There are special log entries added by the method tracing facility at
+the "trace points" the AML interpreter starts/stops to execute a control
+method, or an AML opcode. Note that the format of the log entries are
+subject to change::
+
+   [    0.186427]   exdebug-0398 ex_trace_point        : Method Begin [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
+   [    0.186630]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905c88:If] execution.
+   [    0.186820]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905cc0:LEqual] execution.
+   [    0.187010]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905a20:-NamePath-] execution.
+   [    0.187214]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905a20:-NamePath-] execution.
+   [    0.187407]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905f60:One] execution.
+   [    0.187594]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905f60:One] execution.
+   [    0.187789]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905cc0:LEqual] execution.
+   [    0.187980]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905cc0:Return] execution.
+   [    0.188146]   exdebug-0398 ex_trace_point        : Opcode Begin [0xf5905f60:One] execution.
+   [    0.188334]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905f60:One] execution.
+   [    0.188524]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905cc0:Return] execution.
+   [    0.188712]   exdebug-0398 ex_trace_point        : Opcode End [0xf5905c88:If] execution.
+   [    0.188903]   exdebug-0398 ex_trace_point        : Method End [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
+
+Developers can utilize these special log entries to track the AML
+interpretion, thus can aid issue debugging and performance tuning. Note
+that, as the "AML tracer" logs are implemented via ACPI_DEBUG_PRINT()
+macro, CONFIG_ACPI_DEBUG is also required to be enabled for enabling
+"AML tracer" logs.
+
+The following command examples illustrate the usage of the "AML tracer"
+functionality:
+
+a. Filter out the method start/stop "AML tracer" logs when control
+   methods are being evaluated::
+
+      # cd /sys/module/acpi/parameters
+      # echo "0x80" > trace_debug_layer
+      # echo "0x10" > trace_debug_level
+      # echo "enable" > trace_state
+
+b. Filter out the method start/stop "AML tracer" when the specified
+   control method is being evaluated::
+
+      # cd /sys/module/acpi/parameters
+      # echo "0x80" > trace_debug_layer
+      # echo "0x10" > trace_debug_level
+      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
+      # echo "method" > trace_state
+
+c. Filter out the method start/stop "AML tracer" logs when the specified
+   control method is being evaluated for the first time::
+
+      # cd /sys/module/acpi/parameters
+      # echo "0x80" > trace_debug_layer
+      # echo "0x10" > trace_debug_level
+      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
+      # echo "method-once" > trace_state
+
+d. Filter out the method/opcode start/stop "AML tracer" when the
+   specified control method is being evaluated::
+
+      # cd /sys/module/acpi/parameters
+      # echo "0x80" > trace_debug_layer
+      # echo "0x10" > trace_debug_level
+      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
+      # echo "opcode" > trace_state
+
+e. Filter out the method/opcode start/stop "AML tracer" when the
+   specified control method is being evaluated for the first time::
+
+      # cd /sys/module/acpi/parameters
+      # echo "0x80" > trace_debug_layer
+      # echo "0x10" > trace_debug_level
+      # echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
+      # echo "opcode-opcode" > trace_state
+
+Note that all above method tracing facility related module parameters can
+be used as the boot parameters, for example::
+
+   acpi.trace_debug_layer=0x80 acpi.trace_debug_level=0x10 \
+   acpi.trace_method_name=\_SB.LID0._LID acpi.trace_state=opcode-once
+
+
+Interface descriptions
+======================
+
+All method tracing functions can be configured via ACPI module
+parameters that are accessible at /sys/module/acpi/parameters/:
+
+trace_method_name
+  The full path of the AML method that the user wants to trace.
+
+  Note that the full path shouldn't contain the trailing "_"s in its
+  name segments but may contain "\" to form an absolute path.
+
+trace_debug_layer
+  The temporary debug_layer used when the tracing feature is enabled.
+
+  Using ACPI_EXECUTER (0x80) by default, which is the debug_layer
+  used to match all "AML tracer" logs.
+
+trace_debug_level
+  The temporary debug_level used when the tracing feature is enabled.
+
+  Using ACPI_LV_TRACE_POINT (0x10) by default, which is the
+  debug_level used to match all "AML tracer" logs.
+
+trace_state
+  The status of the tracing feature.
+
+  Users can enable/disable this debug tracing feature by executing
+  the following command::
+
+   # echo string > /sys/module/acpi/parameters/trace_state
+
+Where "string" should be one of the following:
+
+"disable"
+  Disable the method tracing feature.
+
+"enable"
+  Enable the method tracing feature.
+  
+  ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
+  during any method execution will be logged.
+
+"method"
+  Enable the method tracing feature.
+
+  ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
+  during method execution of "trace_method_name" will be logged.
+
+"method-once"
+  Enable the method tracing feature.
+
+  ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
+  during method execution of "trace_method_name" will be logged only once.
+
+"opcode"
+  Enable the method tracing feature.
+
+  ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
+  during method/opcode execution of "trace_method_name" will be logged.
+
+"opcode-once"
+  Enable the method tracing feature.
+
+  ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
+  during method/opcode execution of "trace_method_name" will be logged only
+  once.
+
+Note that, the difference between the "enable" and other feature
+enabling options are:
+
+1. When "enable" is specified, since
+   "trace_debug_layer/trace_debug_level" shall apply to all control
+   method evaluations, after configuring "trace_state" to "enable",
+   "trace_method_name" will be reset to NULL.
+2. When "method/opcode" is specified, if
+   "trace_method_name" is NULL when "trace_state" is configured to
+   these options, the "trace_debug_layer/trace_debug_level" will
+   apply to all control method evaluations.
-- 
GitLab


From 472e89b4e1a87cd7f9b5ae99759a635711cf00fb Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:53:00 +0800
Subject: [PATCH 1552/1861] Documentation: ACPI: move aml-debugger.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/aml-debugger.txt           | 66 ----------------
 .../firmware-guide/acpi/aml-debugger.rst      | 75 +++++++++++++++++++
 Documentation/firmware-guide/acpi/index.rst   |  1 +
 3 files changed, 76 insertions(+), 66 deletions(-)
 delete mode 100644 Documentation/acpi/aml-debugger.txt
 create mode 100644 Documentation/firmware-guide/acpi/aml-debugger.rst

diff --git a/Documentation/acpi/aml-debugger.txt b/Documentation/acpi/aml-debugger.txt
deleted file mode 100644
index 75ebeb64ab29a..0000000000000
--- a/Documentation/acpi/aml-debugger.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-The AML Debugger
-
-Copyright (C) 2016, Intel Corporation
-Author: Lv Zheng <lv.zheng@intel.com>
-
-
-This document describes the usage of the AML debugger embedded in the Linux
-kernel.
-
-1. Build the debugger
-
-   The following kernel configuration items are required to enable the AML
-   debugger interface from the Linux kernel:
-
-   CONFIG_ACPI_DEBUGGER=y
-   CONFIG_ACPI_DEBUGGER_USER=m
-
-   The userspace utilities can be built from the kernel source tree using
-   the following commands:
-
-   $ cd tools
-   $ make acpi
-
-   The resultant userspace tool binary is then located at:
-
-     tools/power/acpi/acpidbg
-
-   It can be installed to system directories by running "make install" (as a
-   sufficiently privileged user).
-
-2. Start the userspace debugger interface
-
-   After booting the kernel with the debugger built-in, the debugger can be
-   started by using the following commands:
-
-   # mount -t debugfs none /sys/kernel/debug
-   # modprobe acpi_dbg
-   # tools/power/acpi/acpidbg
-
-   That spawns the interactive AML debugger environment where you can execute
-   debugger commands.
-
-   The commands are documented in the "ACPICA Overview and Programmer Reference"
-   that can be downloaded from
-
-   https://acpica.org/documentation
-
-   The detailed debugger commands reference is located in Chapter 12 "ACPICA
-   Debugger Reference".  The "help" command can be used for a quick reference.
-
-3. Stop the userspace debugger interface
-
-   The interactive debugger interface can be closed by pressing Ctrl+C or using
-   the "quit" or "exit" commands.  When finished, unload the module with:
-
-   # rmmod acpi_dbg
-
-   The module unloading may fail if there is an acpidbg instance running.
-
-4. Run the debugger in a script
-
-   It may be useful to run the AML debugger in a test script. "acpidbg" supports
-   this in a special "batch" mode.  For example, the following command outputs
-   the entire ACPI namespace:
-
-   # acpidbg -b "namespace"
diff --git a/Documentation/firmware-guide/acpi/aml-debugger.rst b/Documentation/firmware-guide/acpi/aml-debugger.rst
new file mode 100644
index 0000000000000..a889d43bc6c5e
--- /dev/null
+++ b/Documentation/firmware-guide/acpi/aml-debugger.rst
@@ -0,0 +1,75 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+================
+The AML Debugger
+================
+
+:Copyright: |copy| 2016, Intel Corporation
+:Author: Lv Zheng <lv.zheng@intel.com>
+
+
+This document describes the usage of the AML debugger embedded in the Linux
+kernel.
+
+1. Build the debugger
+=====================
+
+The following kernel configuration items are required to enable the AML
+debugger interface from the Linux kernel::
+
+   CONFIG_ACPI_DEBUGGER=y
+   CONFIG_ACPI_DEBUGGER_USER=m
+
+The userspace utilities can be built from the kernel source tree using
+the following commands::
+
+   $ cd tools
+   $ make acpi
+
+The resultant userspace tool binary is then located at::
+
+   tools/power/acpi/acpidbg
+
+It can be installed to system directories by running "make install" (as a
+sufficiently privileged user).
+
+2. Start the userspace debugger interface
+=========================================
+
+After booting the kernel with the debugger built-in, the debugger can be
+started by using the following commands::
+
+   # mount -t debugfs none /sys/kernel/debug
+   # modprobe acpi_dbg
+   # tools/power/acpi/acpidbg
+
+That spawns the interactive AML debugger environment where you can execute
+debugger commands.
+
+The commands are documented in the "ACPICA Overview and Programmer Reference"
+that can be downloaded from
+
+https://acpica.org/documentation
+
+The detailed debugger commands reference is located in Chapter 12 "ACPICA
+Debugger Reference".  The "help" command can be used for a quick reference.
+
+3. Stop the userspace debugger interface
+========================================
+
+The interactive debugger interface can be closed by pressing Ctrl+C or using
+the "quit" or "exit" commands.  When finished, unload the module with::
+
+   # rmmod acpi_dbg
+
+The module unloading may fail if there is an acpidbg instance running.
+
+4. Run the debugger in a script
+===============================
+
+It may be useful to run the AML debugger in a test script. "acpidbg" supports
+this in a special "batch" mode.  For example, the following command outputs
+the entire ACPI namespace::
+
+   # acpidbg -b "namespace"
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 287a7cbd82acd..e9f253d54897c 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -16,6 +16,7 @@ ACPI Support
    method-tracing
    DSD-properties-rules
    debug
+   aml-debugger
    gpio-properties
    i2c-muxes
    acpi-lid
-- 
GitLab


From deb95169ef4279362f4c6167c4c59c8d68711d97 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:53:01 +0800
Subject: [PATCH 1553/1861] Documentation: ACPI: move apei/output_format.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/apei/output_format.txt     | 147 -----------------
 .../acpi/apei/output_format.rst               | 150 ++++++++++++++++++
 Documentation/firmware-guide/acpi/index.rst   |   1 +
 3 files changed, 151 insertions(+), 147 deletions(-)
 delete mode 100644 Documentation/acpi/apei/output_format.txt
 create mode 100644 Documentation/firmware-guide/acpi/apei/output_format.rst

diff --git a/Documentation/acpi/apei/output_format.txt b/Documentation/acpi/apei/output_format.txt
deleted file mode 100644
index 0c49c197c47a4..0000000000000
--- a/Documentation/acpi/apei/output_format.txt
+++ /dev/null
@@ -1,147 +0,0 @@
-                     APEI output format
-                     ~~~~~~~~~~~~~~~~~~
-
-APEI uses printk as hardware error reporting interface, the output
-format is as follow.
-
-<error record> :=
-APEI generic hardware error status
-severity: <integer>, <severity string>
-section: <integer>, severity: <integer>, <severity string>
-flags: <integer>
-<section flags strings>
-fru_id: <uuid string>
-fru_text: <string>
-section_type: <section type string>
-<section data>
-
-<severity string>* := recoverable | fatal | corrected | info
-
-<section flags strings># :=
-[primary][, containment warning][, reset][, threshold exceeded]\
-[, resource not accessible][, latent error]
-
-<section type string> := generic processor error | memory error | \
-PCIe error | unknown, <uuid string>
-
-<section data> :=
-<generic processor section data> | <memory section data> | \
-<pcie section data> | <null>
-
-<generic processor section data> :=
-[processor_type: <integer>, <proc type string>]
-[processor_isa: <integer>, <proc isa string>]
-[error_type: <integer>
-<proc error type strings>]
-[operation: <integer>, <proc operation string>]
-[flags: <integer>
-<proc flags strings>]
-[level: <integer>]
-[version_info: <integer>]
-[processor_id: <integer>]
-[target_address: <integer>]
-[requestor_id: <integer>]
-[responder_id: <integer>]
-[IP: <integer>]
-
-<proc type string>* := IA32/X64 | IA64
-
-<proc isa string>* := IA32 | IA64 | X64
-
-<processor error type strings># :=
-[cache error][, TLB error][, bus error][, micro-architectural error]
-
-<proc operation string>* := unknown or generic | data read | data write | \
-instruction execution
-
-<proc flags strings># :=
-[restartable][, precise IP][, overflow][, corrected]
-
-<memory section data> :=
-[error_status: <integer>]
-[physical_address: <integer>]
-[physical_address_mask: <integer>]
-[node: <integer>]
-[card: <integer>]
-[module: <integer>]
-[bank: <integer>]
-[device: <integer>]
-[row: <integer>]
-[column: <integer>]
-[bit_position: <integer>]
-[requestor_id: <integer>]
-[responder_id: <integer>]
-[target_id: <integer>]
-[error_type: <integer>, <mem error type string>]
-
-<mem error type string>* :=
-unknown | no error | single-bit ECC | multi-bit ECC | \
-single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \
-target abort | parity error | watchdog timeout | invalid address | \
-mirror Broken | memory sparing | scrub corrected error | \
-scrub uncorrected error
-
-<pcie section data> :=
-[port_type: <integer>, <pcie port type string>]
-[version: <integer>.<integer>]
-[command: <integer>, status: <integer>]
-[device_id: <integer>:<integer>:<integer>.<integer>
-slot: <integer>
-secondary_bus: <integer>
-vendor_id: <integer>, device_id: <integer>
-class_code: <integer>]
-[serial number: <integer>, <integer>]
-[bridge: secondary_status: <integer>, control: <integer>]
-[aer_status: <integer>, aer_mask: <integer>
-<aer status string>
-[aer_uncor_severity: <integer>]
-aer_layer=<aer layer string>, aer_agent=<aer agent string>
-aer_tlp_header: <integer> <integer> <integer> <integer>]
-
-<pcie port type string>* := PCIe end point | legacy PCI end point | \
-unknown | unknown | root port | upstream switch port | \
-downstream switch port | PCIe to PCI/PCI-X bridge | \
-PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
-root complex event collector
-
-if section severity is fatal or recoverable
-<aer status string># :=
-unknown | unknown | unknown | unknown | Data Link Protocol | \
-unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
-Poisoned TLP | Flow Control Protocol | Completion Timeout | \
-Completer Abort | Unexpected Completion | Receiver Overflow | \
-Malformed TLP | ECRC | Unsupported Request
-else
-<aer status string># :=
-Receiver Error | unknown | unknown | unknown | unknown | unknown | \
-Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
-Replay Timer Timeout | Advisory Non-Fatal
-fi
-
-<aer layer string> :=
-Physical Layer | Data Link Layer | Transaction Layer
-
-<aer agent string> :=
-Receiver ID | Requester ID | Completer ID | Transmitter ID
-
-Where, [] designate corresponding content is optional
-
-All <field string> description with * has the following format:
-
-field: <integer>, <field string>
-
-Where value of <integer> should be the position of "string" in <field
-string> description. Otherwise, <field string> will be "unknown".
-
-All <field strings> description with # has the following format:
-
-field: <integer>
-<field strings>
-
-Where each string in <fields strings> corresponding to one set bit of
-<integer>. The bit position is the position of "string" in <field
-strings> description.
-
-For more detailed explanation of every field, please refer to UEFI
-specification version 2.3 or later, section Appendix N: Common
-Platform Error Record.
diff --git a/Documentation/firmware-guide/acpi/apei/output_format.rst b/Documentation/firmware-guide/acpi/apei/output_format.rst
new file mode 100644
index 0000000000000..c2e7ebddb5298
--- /dev/null
+++ b/Documentation/firmware-guide/acpi/apei/output_format.rst
@@ -0,0 +1,150 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+APEI output format
+==================
+
+APEI uses printk as hardware error reporting interface, the output
+format is as follow::
+
+        <error record> :=
+        APEI generic hardware error status
+        severity: <integer>, <severity string>
+        section: <integer>, severity: <integer>, <severity string>
+        flags: <integer>
+        <section flags strings>
+        fru_id: <uuid string>
+        fru_text: <string>
+        section_type: <section type string>
+        <section data>
+
+        <severity string>* := recoverable | fatal | corrected | info
+
+        <section flags strings># :=
+        [primary][, containment warning][, reset][, threshold exceeded]\
+        [, resource not accessible][, latent error]
+
+        <section type string> := generic processor error | memory error | \
+        PCIe error | unknown, <uuid string>
+
+        <section data> :=
+        <generic processor section data> | <memory section data> | \
+        <pcie section data> | <null>
+
+        <generic processor section data> :=
+        [processor_type: <integer>, <proc type string>]
+        [processor_isa: <integer>, <proc isa string>]
+        [error_type: <integer>
+        <proc error type strings>]
+        [operation: <integer>, <proc operation string>]
+        [flags: <integer>
+        <proc flags strings>]
+        [level: <integer>]
+        [version_info: <integer>]
+        [processor_id: <integer>]
+        [target_address: <integer>]
+        [requestor_id: <integer>]
+        [responder_id: <integer>]
+        [IP: <integer>]
+
+        <proc type string>* := IA32/X64 | IA64
+
+        <proc isa string>* := IA32 | IA64 | X64
+
+        <processor error type strings># :=
+        [cache error][, TLB error][, bus error][, micro-architectural error]
+
+        <proc operation string>* := unknown or generic | data read | data write | \
+        instruction execution
+
+        <proc flags strings># :=
+        [restartable][, precise IP][, overflow][, corrected]
+
+        <memory section data> :=
+        [error_status: <integer>]
+        [physical_address: <integer>]
+        [physical_address_mask: <integer>]
+        [node: <integer>]
+        [card: <integer>]
+        [module: <integer>]
+        [bank: <integer>]
+        [device: <integer>]
+        [row: <integer>]
+        [column: <integer>]
+        [bit_position: <integer>]
+        [requestor_id: <integer>]
+        [responder_id: <integer>]
+        [target_id: <integer>]
+        [error_type: <integer>, <mem error type string>]
+
+        <mem error type string>* :=
+        unknown | no error | single-bit ECC | multi-bit ECC | \
+        single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \
+        target abort | parity error | watchdog timeout | invalid address | \
+        mirror Broken | memory sparing | scrub corrected error | \
+        scrub uncorrected error
+
+        <pcie section data> :=
+        [port_type: <integer>, <pcie port type string>]
+        [version: <integer>.<integer>]
+        [command: <integer>, status: <integer>]
+        [device_id: <integer>:<integer>:<integer>.<integer>
+        slot: <integer>
+        secondary_bus: <integer>
+        vendor_id: <integer>, device_id: <integer>
+        class_code: <integer>]
+        [serial number: <integer>, <integer>]
+        [bridge: secondary_status: <integer>, control: <integer>]
+        [aer_status: <integer>, aer_mask: <integer>
+        <aer status string>
+        [aer_uncor_severity: <integer>]
+        aer_layer=<aer layer string>, aer_agent=<aer agent string>
+        aer_tlp_header: <integer> <integer> <integer> <integer>]
+
+        <pcie port type string>* := PCIe end point | legacy PCI end point | \
+        unknown | unknown | root port | upstream switch port | \
+        downstream switch port | PCIe to PCI/PCI-X bridge | \
+        PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
+        root complex event collector
+
+        if section severity is fatal or recoverable
+        <aer status string># :=
+        unknown | unknown | unknown | unknown | Data Link Protocol | \
+        unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
+        Poisoned TLP | Flow Control Protocol | Completion Timeout | \
+        Completer Abort | Unexpected Completion | Receiver Overflow | \
+        Malformed TLP | ECRC | Unsupported Request
+        else
+        <aer status string># :=
+        Receiver Error | unknown | unknown | unknown | unknown | unknown | \
+        Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
+        Replay Timer Timeout | Advisory Non-Fatal
+        fi
+
+        <aer layer string> :=
+        Physical Layer | Data Link Layer | Transaction Layer
+
+        <aer agent string> :=
+        Receiver ID | Requester ID | Completer ID | Transmitter ID
+
+Where, [] designate corresponding content is optional
+
+All <field string> description with * has the following format::
+
+        field: <integer>, <field string>
+
+Where value of <integer> should be the position of "string" in <field
+string> description. Otherwise, <field string> will be "unknown".
+
+All <field strings> description with # has the following format::
+
+        field: <integer>
+        <field strings>
+
+Where each string in <fields strings> corresponding to one set bit of
+<integer>. The bit position is the position of "string" in <field
+strings> description.
+
+For more detailed explanation of every field, please refer to UEFI
+specification version 2.3 or later, section Appendix N: Common
+Platform Error Record.
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index e9f253d54897c..869badba6d7a0 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -17,6 +17,7 @@ ACPI Support
    DSD-properties-rules
    debug
    aml-debugger
+   apei/output_format
    gpio-properties
    i2c-muxes
    acpi-lid
-- 
GitLab


From 440ebec745dcbc44866f6b19e5145a12d4494a5f Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:53:02 +0800
Subject: [PATCH 1554/1861] Documentation: ACPI: move apei/einj.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/apei/einj.rst}                       | 94 ++++++++++---------
 Documentation/firmware-guide/acpi/index.rst   |  1 +
 2 files changed, 52 insertions(+), 43 deletions(-)
 rename Documentation/{acpi/apei/einj.txt => firmware-guide/acpi/apei/einj.rst} (67%)

diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/firmware-guide/acpi/apei/einj.rst
similarity index 67%
rename from Documentation/acpi/apei/einj.txt
rename to Documentation/firmware-guide/acpi/apei/einj.rst
index e550c8b981399..e588bccf51583 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/firmware-guide/acpi/apei/einj.rst
@@ -1,13 +1,16 @@
-			APEI Error INJection
-			~~~~~~~~~~~~~~~~~~~~
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+APEI Error INJection
+====================
 
 EINJ provides a hardware error injection mechanism. It is very useful
 for debugging and testing APEI and RAS features in general.
 
 You need to check whether your BIOS supports EINJ first. For that, look
-for early boot messages similar to this one:
+for early boot messages similar to this one::
 
-ACPI: EINJ 0x000000007370A000 000150 (v01 INTEL           00000001 INTL 00000001)
+  ACPI: EINJ 0x000000007370A000 000150 (v01 INTEL           00000001 INTL 00000001)
 
 which shows that the BIOS is exposing an EINJ table - it is the
 mechanism through which the injection is done.
@@ -23,11 +26,11 @@ order to see the APEI,EINJ,... functionality supported and exposed by
 the BIOS menu.
 
 To use EINJ, make sure the following are options enabled in your kernel
-configuration:
+configuration::
 
-CONFIG_DEBUG_FS
-CONFIG_ACPI_APEI
-CONFIG_ACPI_APEI_EINJ
+  CONFIG_DEBUG_FS
+  CONFIG_ACPI_APEI
+  CONFIG_ACPI_APEI_EINJ
 
 The EINJ user interface is in <debugfs mount point>/apei/einj.
 
@@ -37,20 +40,22 @@ The following files belong to it:
 
   This file shows which error types are supported:
 
+  ================  ===================================
   Error Type Value	Error Description
-  ================	=================
-  0x00000001		Processor Correctable
-  0x00000002		Processor Uncorrectable non-fatal
-  0x00000004		Processor Uncorrectable fatal
-  0x00000008		Memory Correctable
-  0x00000010		Memory Uncorrectable non-fatal
-  0x00000020		Memory Uncorrectable fatal
-  0x00000040		PCI Express Correctable
-  0x00000080		PCI Express Uncorrectable fatal
-  0x00000100		PCI Express Uncorrectable non-fatal
-  0x00000200		Platform Correctable
-  0x00000400		Platform Uncorrectable non-fatal
-  0x00000800		Platform Uncorrectable fatal
+  ================  ===================================
+  0x00000001        Processor Correctable
+  0x00000002        Processor Uncorrectable non-fatal
+  0x00000004        Processor Uncorrectable fatal
+  0x00000008        Memory Correctable
+  0x00000010        Memory Uncorrectable non-fatal
+  0x00000020        Memory Uncorrectable fatal
+  0x00000040        PCI Express Correctable
+  0x00000080        PCI Express Uncorrectable fatal
+  0x00000100        PCI Express Uncorrectable non-fatal
+  0x00000200        Platform Correctable
+  0x00000400        Platform Uncorrectable non-fatal
+  0x00000800        Platform Uncorrectable fatal
+  ================  ===================================
 
   The format of the file contents are as above, except present are only
   the available error types.
@@ -73,9 +78,12 @@ The following files belong to it:
   injection. Value is a bitmask as specified in ACPI5.0 spec for the
   SET_ERROR_TYPE_WITH_ADDRESS data structure:
 
-	Bit 0 - Processor APIC field valid (see param3 below).
-	Bit 1 - Memory address and mask valid (param1 and param2).
-	Bit 2 - PCIe (seg,bus,dev,fn) valid (see param4 below).
+    Bit 0
+      Processor APIC field valid (see param3 below).
+    Bit 1
+      Memory address and mask valid (param1 and param2).
+    Bit 2
+      PCIe (seg,bus,dev,fn) valid (see param4 below).
 
   If set to zero, legacy behavior is mimicked where the type of
   injection specifies just one bit set, and param1 is multiplexed.
@@ -121,7 +129,7 @@ BIOS versions based on the ACPI 5.0 specification have more control over
 the target of the injection. For processor-related errors (type 0x1, 0x2
 and 0x4), you can set flags to 0x3 (param3 for bit 0, and param1 and
 param2 for bit 1) so that you have more information added to the error
-signature being injected. The actual data passed is this:
+signature being injected. The actual data passed is this::
 
 	memory_address = param1;
 	memory_address_range = param2;
@@ -131,7 +139,7 @@ signature being injected. The actual data passed is this:
 For memory errors (type 0x8, 0x10 and 0x20) the address is set using
 param1 with a mask in param2 (0x0 is equivalent to all ones). For PCI
 express errors (type 0x40, 0x80 and 0x100) the segment, bus, device and
-function are specified using param1:
+function are specified using param1::
 
          31     24 23    16 15    11 10      8  7        0
 	+-------------------------------------------------+
@@ -152,26 +160,26 @@ documentation for details (and expect changes to this API if vendors
 creativity in using this feature expands beyond our expectations).
 
 
-An error injection example:
+An error injection example::
 
-# cd /sys/kernel/debug/apei/einj
-# cat available_error_type		# See which errors can be injected
-0x00000002	Processor Uncorrectable non-fatal
-0x00000008	Memory Correctable
-0x00000010	Memory Uncorrectable non-fatal
-# echo 0x12345000 > param1		# Set memory address for injection
-# echo $((-1 << 12)) > param2		# Mask 0xfffffffffffff000 - anywhere in this page
-# echo 0x8 > error_type			# Choose correctable memory error
-# echo 1 > error_inject			# Inject now
+  # cd /sys/kernel/debug/apei/einj
+  # cat available_error_type		# See which errors can be injected
+  0x00000002	Processor Uncorrectable non-fatal
+  0x00000008	Memory Correctable
+  0x00000010	Memory Uncorrectable non-fatal
+  # echo 0x12345000 > param1		# Set memory address for injection
+  # echo $((-1 << 12)) > param2		# Mask 0xfffffffffffff000 - anywhere in this page
+  # echo 0x8 > error_type			# Choose correctable memory error
+  # echo 1 > error_inject			# Inject now
 
-You should see something like this in dmesg:
+You should see something like this in dmesg::
 
-[22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
-[22715.834759] EDAC sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
-[22715.834759] EDAC sbridge MC3: TSC 0
-[22715.834759] EDAC sbridge MC3: ADDR 12345000 EDAC sbridge MC3: MISC 144780c86
-[22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
-[22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 -  area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
+  [22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
+  [22715.834759] EDAC sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
+  [22715.834759] EDAC sbridge MC3: TSC 0
+  [22715.834759] EDAC sbridge MC3: ADDR 12345000 EDAC sbridge MC3: MISC 144780c86
+  [22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
+  [22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 -  area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
 
 For more information about EINJ, please refer to ACPI specification
 version 4.0, section 17.5 and ACPI 5.0, section 18.6.
diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 869badba6d7a0..fca854f017d8a 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -18,6 +18,7 @@ ACPI Support
    debug
    aml-debugger
    apei/output_format
+   apei/einj
    gpio-properties
    i2c-muxes
    acpi-lid
-- 
GitLab


From 3e57460f007c86476c376a08e31da441cae0b195 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:53:03 +0800
Subject: [PATCH 1555/1861] Documentation: ACPI: move cppc_sysfs.txt to
 admin-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../acpi/cppc_sysfs.rst}                      | 71 ++++++++++---------
 Documentation/admin-guide/acpi/index.rst      |  1 +
 2 files changed, 40 insertions(+), 32 deletions(-)
 rename Documentation/{acpi/cppc_sysfs.txt => admin-guide/acpi/cppc_sysfs.rst} (51%)

diff --git a/Documentation/acpi/cppc_sysfs.txt b/Documentation/admin-guide/acpi/cppc_sysfs.rst
similarity index 51%
rename from Documentation/acpi/cppc_sysfs.txt
rename to Documentation/admin-guide/acpi/cppc_sysfs.rst
index f20fb445135d3..a4b99afbe331b 100644
--- a/Documentation/acpi/cppc_sysfs.txt
+++ b/Documentation/admin-guide/acpi/cppc_sysfs.rst
@@ -1,5 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
 
-	Collaborative Processor Performance Control (CPPC)
+==================================================
+Collaborative Processor Performance Control (CPPC)
+==================================================
+
+CPPC
+====
 
 CPPC defined in the ACPI spec describes a mechanism for the OS to manage the
 performance of a logical processor on a contigious and abstract performance
@@ -10,31 +16,28 @@ For more details on CPPC please refer to the ACPI specification at:
 
 http://uefi.org/specifications
 
-Some of the CPPC registers are exposed via sysfs under:
-
-/sys/devices/system/cpu/cpuX/acpi_cppc/
-
-for each cpu X
+Some of the CPPC registers are exposed via sysfs under::
 
---------------------------------------------------------------------------------
+  /sys/devices/system/cpu/cpuX/acpi_cppc/
 
-$ ls -lR  /sys/devices/system/cpu/cpu0/acpi_cppc/
-/sys/devices/system/cpu/cpu0/acpi_cppc/:
-total 0
--r--r--r-- 1 root root 65536 Mar  5 19:38 feedback_ctrs
--r--r--r-- 1 root root 65536 Mar  5 19:38 highest_perf
--r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_freq
--r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_nonlinear_perf
--r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_perf
--r--r--r-- 1 root root 65536 Mar  5 19:38 nominal_freq
--r--r--r-- 1 root root 65536 Mar  5 19:38 nominal_perf
--r--r--r-- 1 root root 65536 Mar  5 19:38 reference_perf
--r--r--r-- 1 root root 65536 Mar  5 19:38 wraparound_time
+for each cpu X::
 
---------------------------------------------------------------------------------
+  $ ls -lR  /sys/devices/system/cpu/cpu0/acpi_cppc/
+  /sys/devices/system/cpu/cpu0/acpi_cppc/:
+  total 0
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 feedback_ctrs
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 highest_perf
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_freq
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_nonlinear_perf
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_perf
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 nominal_freq
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 nominal_perf
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 reference_perf
+  -r--r--r-- 1 root root 65536 Mar  5 19:38 wraparound_time
 
 * highest_perf : Highest performance of this processor (abstract scale).
-* nominal_perf : Highest sustained performance of this processor (abstract scale).
+* nominal_perf : Highest sustained performance of this processor
+  (abstract scale).
 * lowest_nonlinear_perf : Lowest performance of this processor with nonlinear
   power savings (abstract scale).
 * lowest_perf : Lowest performance of this processor (abstract scale).
@@ -48,22 +51,26 @@ total 0
 * feedback_ctrs : Includes both Reference and delivered performance counter.
   Reference counter ticks up proportional to processor's reference performance.
   Delivered counter ticks up proportional to processor's delivered performance.
-* wraparound_time: Minimum time for the feedback counters to wraparound (seconds).
+* wraparound_time: Minimum time for the feedback counters to wraparound
+  (seconds).
 * reference_perf : Performance level at which reference performance counter
   accumulates (abstract scale).
 
---------------------------------------------------------------------------------
 
-		Computing Average Delivered Performance
+Computing Average Delivered Performance
+=======================================
+
+Below describes the steps to compute the average performance delivered by
+taking two different snapshots of feedback counters at time T1 and T2.
+
+  T1: Read feedback_ctrs as fbc_t1
+      Wait or run some workload
 
-Below describes the steps to compute the average performance delivered by taking
-two different snapshots of feedback counters at time T1 and T2.
+  T2: Read feedback_ctrs as fbc_t2
 
-T1: Read feedback_ctrs as fbc_t1
-    Wait or run some workload
-T2: Read feedback_ctrs as fbc_t2
+::
 
-delivered_counter_delta = fbc_t2[del] - fbc_t1[del]
-reference_counter_delta = fbc_t2[ref] - fbc_t1[ref]
+  delivered_counter_delta = fbc_t2[del] - fbc_t1[del]
+  reference_counter_delta = fbc_t2[ref] - fbc_t1[ref]
 
-delivered_perf = (refernce_perf x delivered_counter_delta) / reference_counter_delta
+  delivered_perf = (refernce_perf x delivered_counter_delta) / reference_counter_delta
diff --git a/Documentation/admin-guide/acpi/index.rst b/Documentation/admin-guide/acpi/index.rst
index d68e9914c5ff6..9049a7b9f0654 100644
--- a/Documentation/admin-guide/acpi/index.rst
+++ b/Documentation/admin-guide/acpi/index.rst
@@ -10,3 +10,4 @@ the Linux ACPI support.
 
    initrd_table_override
    dsdt-override
+   cppc_sysfs
-- 
GitLab


From 4887954cac779abe38d3ba6446dec341ccca04ea Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:53:04 +0800
Subject: [PATCH 1556/1861] Documentation: ACPI: move lpit.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/index.rst    |  1 +
 .../lpit.txt => firmware-guide/acpi/lpit.rst}  | 18 +++++++++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)
 rename Documentation/{acpi/lpit.txt => firmware-guide/acpi/lpit.rst} (68%)

diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index fca854f017d8a..0e60f4b7129a2 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -22,3 +22,4 @@ ACPI Support
    gpio-properties
    i2c-muxes
    acpi-lid
+   lpit
diff --git a/Documentation/acpi/lpit.txt b/Documentation/firmware-guide/acpi/lpit.rst
similarity index 68%
rename from Documentation/acpi/lpit.txt
rename to Documentation/firmware-guide/acpi/lpit.rst
index b426398d2e971..aca928fab027a 100644
--- a/Documentation/acpi/lpit.txt
+++ b/Documentation/firmware-guide/acpi/lpit.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Low Power Idle Table (LPIT)
+===========================
+
 To enumerate platform Low Power Idle states, Intel platforms are using
 “Low Power Idle Table” (LPIT). More details about this table can be
 downloaded from:
@@ -8,13 +14,15 @@ Residencies for each low power state can be read via FFH
 
 On platforms supporting S0ix sleep states, there can be two types of
 residencies:
-- CPU PKG C10 (Read via FFH interface)
-- Platform Controller Hub (PCH) SLP_S0 (Read via memory mapped interface)
+
+  - CPU PKG C10 (Read via FFH interface)
+  - Platform Controller Hub (PCH) SLP_S0 (Read via memory mapped interface)
 
 The following attributes are added dynamically to the cpuidle
-sysfs attribute group:
-	/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
-	/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
+sysfs attribute group::
+
+  /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
+  /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
 
 The "low_power_idle_cpu_residency_us" attribute shows time spent
 by the CPU package in PKG C10
-- 
GitLab


From 7fe19072df5555425268f9452059d3c514c6780f Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:53:05 +0800
Subject: [PATCH 1557/1861] Documentation: ACPI: move ssdt-overlays.txt to
 admin-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/ssdt-overlays.txt          | 172 -----------------
 Documentation/admin-guide/acpi/index.rst      |   1 +
 .../admin-guide/acpi/ssdt-overlays.rst        | 180 ++++++++++++++++++
 3 files changed, 181 insertions(+), 172 deletions(-)
 delete mode 100644 Documentation/acpi/ssdt-overlays.txt
 create mode 100644 Documentation/admin-guide/acpi/ssdt-overlays.rst

diff --git a/Documentation/acpi/ssdt-overlays.txt b/Documentation/acpi/ssdt-overlays.txt
deleted file mode 100644
index 5ae13f161ea2b..0000000000000
--- a/Documentation/acpi/ssdt-overlays.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-
-In order to support ACPI open-ended hardware configurations (e.g. development
-boards) we need a way to augment the ACPI configuration provided by the firmware
-image. A common example is connecting sensors on I2C / SPI buses on development
-boards.
-
-Although this can be accomplished by creating a kernel platform driver or
-recompiling the firmware image with updated ACPI tables, neither is practical:
-the former proliferates board specific kernel code while the latter requires
-access to firmware tools which are often not publicly available.
-
-Because ACPI supports external references in AML code a more practical
-way to augment firmware ACPI configuration is by dynamically loading
-user defined SSDT tables that contain the board specific information.
-
-For example, to enumerate a Bosch BMA222E accelerometer on the I2C bus of the
-Minnowboard MAX development board exposed via the LSE connector [1], the
-following ASL code can be used:
-
-DefinitionBlock ("minnowmax.aml", "SSDT", 1, "Vendor", "Accel", 0x00000003)
-{
-    External (\_SB.I2C6, DeviceObj)
-
-    Scope (\_SB.I2C6)
-    {
-        Device (STAC)
-        {
-            Name (_ADR, Zero)
-            Name (_HID, "BMA222E")
-
-            Method (_CRS, 0, Serialized)
-            {
-                Name (RBUF, ResourceTemplate ()
-                {
-                    I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
-                                  AddressingMode7Bit, "\\_SB.I2C6", 0x00,
-                                  ResourceConsumer, ,)
-                    GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
-                             "\\_SB.GPO2", 0x00, ResourceConsumer, , )
-                    { // Pin list
-                        0
-                    }
-                })
-                Return (RBUF)
-            }
-        }
-    }
-}
-
-which can then be compiled to AML binary format:
-
-$ iasl minnowmax.asl
-
-Intel ACPI Component Architecture
-ASL Optimizing Compiler version 20140214-64 [Mar 29 2014]
-Copyright (c) 2000 - 2014 Intel Corporation
-
-ASL Input:     minnomax.asl - 30 lines, 614 bytes, 7 keywords
-AML Output:    minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
-
-[1] http://wiki.minnowboard.org/MinnowBoard_MAX#Low_Speed_Expansion_Connector_.28Top.29
-
-The resulting AML code can then be loaded by the kernel using one of the methods
-below.
-
-== Loading ACPI SSDTs from initrd ==
-
-This option allows loading of user defined SSDTs from initrd and it is useful
-when the system does not support EFI or when there is not enough EFI storage.
-
-It works in a similar way with initrd based ACPI tables override/upgrade: SSDT
-aml code must be placed in the first, uncompressed, initrd under the
-"kernel/firmware/acpi" path. Multiple files can be used and this will translate
-in loading multiple tables. Only SSDT and OEM tables are allowed. See
-initrd_table_override.txt for more details.
-
-Here is an example:
-
-# Add the raw ACPI tables to an uncompressed cpio archive.
-# They must be put into a /kernel/firmware/acpi directory inside the
-# cpio archive.
-# The uncompressed cpio archive must be the first.
-# Other, typically compressed cpio archives, must be
-# concatenated on top of the uncompressed one.
-mkdir -p kernel/firmware/acpi
-cp ssdt.aml kernel/firmware/acpi
-
-# Create the uncompressed cpio archive and concatenate the original initrd
-# on top:
-find kernel | cpio -H newc --create > /boot/instrumented_initrd
-cat /boot/initrd >>/boot/instrumented_initrd
-
-== Loading ACPI SSDTs from EFI variables ==
-
-This is the preferred method, when EFI is supported on the platform, because it
-allows a persistent, OS independent way of storing the user defined SSDTs. There
-is also work underway to implement EFI support for loading user defined SSDTs
-and using this method will make it easier to convert to the EFI loading
-mechanism when that will arrive.
-
-In order to load SSDTs from an EFI variable the efivar_ssdt kernel command line
-parameter can be used. The argument for the option is the variable name to
-use. If there are multiple variables with the same name but with different
-vendor GUIDs, all of them will be loaded.
-
-In order to store the AML code in an EFI variable the efivarfs filesystem can be
-used. It is enabled and mounted by default in /sys/firmware/efi/efivars in all
-recent distribution.
-
-Creating a new file in /sys/firmware/efi/efivars will automatically create a new
-EFI variable. Updating a file in /sys/firmware/efi/efivars will update the EFI
-variable. Please note that the file name needs to be specially formatted as
-"Name-GUID" and that the first 4 bytes in the file (little-endian format)
-represent the attributes of the EFI variable (see EFI_VARIABLE_MASK in
-include/linux/efi.h). Writing to the file must also be done with one write
-operation.
-
-For example, you can use the following bash script to create/update an EFI
-variable with the content from a given file:
-
-#!/bin/sh -e
-
-while ! [ -z "$1" ]; do
-        case "$1" in
-        "-f") filename="$2"; shift;;
-        "-g") guid="$2"; shift;;
-        *) name="$1";;
-        esac
-        shift
-done
-
-usage()
-{
-        echo "Syntax: ${0##*/} -f filename [ -g guid ] name"
-        exit 1
-}
-
-[ -n "$name" -a -f "$filename" ] || usage
-
-EFIVARFS="/sys/firmware/efi/efivars"
-
-[ -d "$EFIVARFS" ] || exit 2
-
-if stat -tf $EFIVARFS | grep -q -v de5e81e4; then
-        mount -t efivarfs none $EFIVARFS
-fi
-
-# try to pick up an existing GUID
-[ -n "$guid" ] || guid=$(find "$EFIVARFS" -name "$name-*" | head -n1 | cut -f2- -d-)
-
-# use a randomly generated GUID
-[ -n "$guid" ] || guid="$(cat /proc/sys/kernel/random/uuid)"
-
-# efivarfs expects all of the data in one write
-tmp=$(mktemp)
-/bin/echo -ne "\007\000\000\000" | cat - $filename > $tmp
-dd if=$tmp of="$EFIVARFS/$name-$guid" bs=$(stat -c %s $tmp)
-rm $tmp
-
-== Loading ACPI SSDTs from configfs ==
-
-This option allows loading of user defined SSDTs from userspace via the configfs
-interface. The CONFIG_ACPI_CONFIGFS option must be select and configfs must be
-mounted. In the following examples, we assume that configfs has been mounted in
-/config.
-
-New tables can be loading by creating new directories in /config/acpi/table/ and
-writing the SSDT aml code in the aml attribute:
-
-cd /config/acpi/table
-mkdir my_ssdt
-cat ~/ssdt.aml > my_ssdt/aml
diff --git a/Documentation/admin-guide/acpi/index.rst b/Documentation/admin-guide/acpi/index.rst
index 9049a7b9f0654..4d13eeea1ecac 100644
--- a/Documentation/admin-guide/acpi/index.rst
+++ b/Documentation/admin-guide/acpi/index.rst
@@ -10,4 +10,5 @@ the Linux ACPI support.
 
    initrd_table_override
    dsdt-override
+   ssdt-overlays
    cppc_sysfs
diff --git a/Documentation/admin-guide/acpi/ssdt-overlays.rst b/Documentation/admin-guide/acpi/ssdt-overlays.rst
new file mode 100644
index 0000000000000..da37455f96c9b
--- /dev/null
+++ b/Documentation/admin-guide/acpi/ssdt-overlays.rst
@@ -0,0 +1,180 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+SSDT Overlays
+=============
+
+In order to support ACPI open-ended hardware configurations (e.g. development
+boards) we need a way to augment the ACPI configuration provided by the firmware
+image. A common example is connecting sensors on I2C / SPI buses on development
+boards.
+
+Although this can be accomplished by creating a kernel platform driver or
+recompiling the firmware image with updated ACPI tables, neither is practical:
+the former proliferates board specific kernel code while the latter requires
+access to firmware tools which are often not publicly available.
+
+Because ACPI supports external references in AML code a more practical
+way to augment firmware ACPI configuration is by dynamically loading
+user defined SSDT tables that contain the board specific information.
+
+For example, to enumerate a Bosch BMA222E accelerometer on the I2C bus of the
+Minnowboard MAX development board exposed via the LSE connector [1], the
+following ASL code can be used::
+
+    DefinitionBlock ("minnowmax.aml", "SSDT", 1, "Vendor", "Accel", 0x00000003)
+    {
+        External (\_SB.I2C6, DeviceObj)
+
+        Scope (\_SB.I2C6)
+        {
+            Device (STAC)
+            {
+                Name (_ADR, Zero)
+                Name (_HID, "BMA222E")
+
+                Method (_CRS, 0, Serialized)
+                {
+                    Name (RBUF, ResourceTemplate ()
+                    {
+                        I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
+                                    AddressingMode7Bit, "\\_SB.I2C6", 0x00,
+                                    ResourceConsumer, ,)
+                        GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
+                                "\\_SB.GPO2", 0x00, ResourceConsumer, , )
+                        { // Pin list
+                            0
+                        }
+                    })
+                    Return (RBUF)
+                }
+            }
+        }
+    }
+
+which can then be compiled to AML binary format::
+
+    $ iasl minnowmax.asl
+
+    Intel ACPI Component Architecture
+    ASL Optimizing Compiler version 20140214-64 [Mar 29 2014]
+    Copyright (c) 2000 - 2014 Intel Corporation
+
+    ASL Input:     minnomax.asl - 30 lines, 614 bytes, 7 keywords
+    AML Output:    minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
+
+[1] http://wiki.minnowboard.org/MinnowBoard_MAX#Low_Speed_Expansion_Connector_.28Top.29
+
+The resulting AML code can then be loaded by the kernel using one of the methods
+below.
+
+Loading ACPI SSDTs from initrd
+==============================
+
+This option allows loading of user defined SSDTs from initrd and it is useful
+when the system does not support EFI or when there is not enough EFI storage.
+
+It works in a similar way with initrd based ACPI tables override/upgrade: SSDT
+aml code must be placed in the first, uncompressed, initrd under the
+"kernel/firmware/acpi" path. Multiple files can be used and this will translate
+in loading multiple tables. Only SSDT and OEM tables are allowed. See
+initrd_table_override.txt for more details.
+
+Here is an example::
+
+    # Add the raw ACPI tables to an uncompressed cpio archive.
+    # They must be put into a /kernel/firmware/acpi directory inside the
+    # cpio archive.
+    # The uncompressed cpio archive must be the first.
+    # Other, typically compressed cpio archives, must be
+    # concatenated on top of the uncompressed one.
+    mkdir -p kernel/firmware/acpi
+    cp ssdt.aml kernel/firmware/acpi
+
+    # Create the uncompressed cpio archive and concatenate the original initrd
+    # on top:
+    find kernel | cpio -H newc --create > /boot/instrumented_initrd
+    cat /boot/initrd >>/boot/instrumented_initrd
+
+Loading ACPI SSDTs from EFI variables
+=====================================
+
+This is the preferred method, when EFI is supported on the platform, because it
+allows a persistent, OS independent way of storing the user defined SSDTs. There
+is also work underway to implement EFI support for loading user defined SSDTs
+and using this method will make it easier to convert to the EFI loading
+mechanism when that will arrive.
+
+In order to load SSDTs from an EFI variable the efivar_ssdt kernel command line
+parameter can be used. The argument for the option is the variable name to
+use. If there are multiple variables with the same name but with different
+vendor GUIDs, all of them will be loaded.
+
+In order to store the AML code in an EFI variable the efivarfs filesystem can be
+used. It is enabled and mounted by default in /sys/firmware/efi/efivars in all
+recent distribution.
+
+Creating a new file in /sys/firmware/efi/efivars will automatically create a new
+EFI variable. Updating a file in /sys/firmware/efi/efivars will update the EFI
+variable. Please note that the file name needs to be specially formatted as
+"Name-GUID" and that the first 4 bytes in the file (little-endian format)
+represent the attributes of the EFI variable (see EFI_VARIABLE_MASK in
+include/linux/efi.h). Writing to the file must also be done with one write
+operation.
+
+For example, you can use the following bash script to create/update an EFI
+variable with the content from a given file::
+
+    #!/bin/sh -e
+
+    while ! [ -z "$1" ]; do
+            case "$1" in
+            "-f") filename="$2"; shift;;
+            "-g") guid="$2"; shift;;
+            *) name="$1";;
+            esac
+            shift
+    done
+
+    usage()
+    {
+            echo "Syntax: ${0##*/} -f filename [ -g guid ] name"
+            exit 1
+    }
+
+    [ -n "$name" -a -f "$filename" ] || usage
+
+    EFIVARFS="/sys/firmware/efi/efivars"
+
+    [ -d "$EFIVARFS" ] || exit 2
+
+    if stat -tf $EFIVARFS | grep -q -v de5e81e4; then
+            mount -t efivarfs none $EFIVARFS
+    fi
+
+    # try to pick up an existing GUID
+    [ -n "$guid" ] || guid=$(find "$EFIVARFS" -name "$name-*" | head -n1 | cut -f2- -d-)
+
+    # use a randomly generated GUID
+    [ -n "$guid" ] || guid="$(cat /proc/sys/kernel/random/uuid)"
+
+    # efivarfs expects all of the data in one write
+    tmp=$(mktemp)
+    /bin/echo -ne "\007\000\000\000" | cat - $filename > $tmp
+    dd if=$tmp of="$EFIVARFS/$name-$guid" bs=$(stat -c %s $tmp)
+    rm $tmp
+
+Loading ACPI SSDTs from configfs
+================================
+
+This option allows loading of user defined SSDTs from userspace via the configfs
+interface. The CONFIG_ACPI_CONFIGFS option must be select and configfs must be
+mounted. In the following examples, we assume that configfs has been mounted in
+/config.
+
+New tables can be loading by creating new directories in /config/acpi/table/ and
+writing the SSDT aml code in the aml attribute::
+
+    cd /config/acpi/table
+    mkdir my_ssdt
+    cat ~/ssdt.aml > my_ssdt/aml
-- 
GitLab


From 7fb091f806c55da593b93bc858d81203a9597257 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Thu, 25 Apr 2019 01:53:06 +0800
Subject: [PATCH 1558/1861] Documentation: ACPI: move video_extension.txt to
 firmware-guide/acpi and convert to reST

This converts the plain text documentation to reStructuredText format
and adds it to Sphinx TOC tree.

No essential content change.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/firmware-guide/acpi/index.rst   |  1 +
 .../acpi/video_extension.rst}                 | 83 +++++++++++--------
 2 files changed, 50 insertions(+), 34 deletions(-)
 rename Documentation/{acpi/video_extension.txt => firmware-guide/acpi/video_extension.rst} (70%)

diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index 0e60f4b7129a2..ae609eec4679c 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -23,3 +23,4 @@ ACPI Support
    i2c-muxes
    acpi-lid
    lpit
+   video_extension
diff --git a/Documentation/acpi/video_extension.txt b/Documentation/firmware-guide/acpi/video_extension.rst
similarity index 70%
rename from Documentation/acpi/video_extension.txt
rename to Documentation/firmware-guide/acpi/video_extension.rst
index 79bf6a4921bee..099b8607e07b2 100644
--- a/Documentation/acpi/video_extension.txt
+++ b/Documentation/firmware-guide/acpi/video_extension.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
 ACPI video extensions
-~~~~~~~~~~~~~~~~~~~~~
+=====================
 
 This driver implement the ACPI Extensions For Display Adapters for
 integrated graphics devices on motherboard, as specified in ACPI 2.0
@@ -8,9 +11,10 @@ defining the video POST device, retrieving EDID information or to
 setup a video output, etc.  Note that this is an ref. implementation
 only.  It may or may not work for your integrated video device.
 
-The ACPI video driver does 3 things regarding backlight control:
+The ACPI video driver does 3 things regarding backlight control.
 
-1 Export a sysfs interface for user space to control backlight level
+Export a sysfs interface for user space to control backlight level
+==================================================================
 
 If the ACPI table has a video device, and acpi_backlight=vendor kernel
 command line is not present, the driver will register a backlight device
@@ -22,36 +26,41 @@ The backlight sysfs interface has a standard definition here:
 Documentation/ABI/stable/sysfs-class-backlight.
 
 And what ACPI video driver does is:
-actual_brightness: on read, control method _BQC will be evaluated to
-get the brightness level the firmware thinks it is at;
-bl_power: not implemented, will set the current brightness instead;
-brightness: on write, control method _BCM will run to set the requested
-brightness level;
-max_brightness: Derived from the _BCL package(see below);
-type: firmware
+
+actual_brightness:
+  on read, control method _BQC will be evaluated to
+  get the brightness level the firmware thinks it is at;
+bl_power:
+  not implemented, will set the current brightness instead;
+brightness:
+  on write, control method _BCM will run to set the requested brightness level;
+max_brightness:
+  Derived from the _BCL package(see below);
+type:
+  firmware
 
 Note that ACPI video backlight driver will always use index for
 brightness, actual_brightness and max_brightness. So if we have
-the following _BCL package:
+the following _BCL package::
 
-Method (_BCL, 0, NotSerialized)
-{
-	Return (Package (0x0C)
+	Method (_BCL, 0, NotSerialized)
 	{
-		0x64,
-		0x32,
-		0x0A,
-		0x14,
-		0x1E,
-		0x28,
-		0x32,
-		0x3C,
-		0x46,
-		0x50,
-		0x5A,
-		0x64
-	})
-}
+		Return (Package (0x0C)
+		{
+			0x64,
+			0x32,
+			0x0A,
+			0x14,
+			0x1E,
+			0x28,
+			0x32,
+			0x3C,
+			0x46,
+			0x50,
+			0x5A,
+			0x64
+		})
+	}
 
 The first two levels are for when laptop are on AC or on battery and are
 not used by Linux currently. The remaining 10 levels are supported levels
@@ -62,13 +71,15 @@ as a "brightness level" indicator. Thus from the user space perspective
 the range of available brightness levels is from 0 to 9 (max_brightness)
 inclusive.
 
-2 Notify user space about hotkey event
+Notify user space about hotkey event
+====================================
 
 There are generally two cases for hotkey event reporting:
+
 i) For some laptops, when user presses the hotkey, a scancode will be
    generated and sent to user space through the input device created by
    the keyboard driver as a key type input event, with proper remap, the
-   following key code will appear to user space:
+   following key code will appear to user space::
 
 	EV_KEY, KEY_BRIGHTNESSUP
 	EV_KEY, KEY_BRIGHTNESSDOWN
@@ -84,23 +95,27 @@ ii) For some laptops, the press of the hotkey will not generate the
     notify value it received and send the event to user space through the
     input device it created:
 
+	=====		==================
 	event		keycode
+	=====		==================
 	0x86		KEY_BRIGHTNESSUP
 	0x87		KEY_BRIGHTNESSDOWN
 	etc.
+	=====		==================
 
 so this would lead to the same effect as case i) now.
 
 Once user space tool receives this event, it can modify the backlight
 level through the sysfs interface.
 
-3 Change backlight level in the kernel
+Change backlight level in the kernel
+====================================
 
 This works for machines covered by case ii) in Section 2. Once the driver
 received a notification, it will set the backlight level accordingly. This does
 not affect the sending of event to user space, they are always sent to user
 space regardless of whether or not the video module controls the backlight level
 directly. This behaviour can be controlled through the brightness_switch_enabled
-module parameter as documented in admin-guide/kernel-parameters.rst. It is recommended to
-disable this behaviour once a GUI environment starts up and wants to have full
-control of the backlight level.
+module parameter as documented in admin-guide/kernel-parameters.rst. It is
+recommended to disable this behaviour once a GUI environment starts up and
+wants to have full control of the backlight level.
-- 
GitLab


From 51dcf7482f4b3166ef477d44181648110d501d25 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 25 Apr 2019 11:04:10 +0200
Subject: [PATCH 1559/1861] PM / Domains: Use the base device for
 driver_deferred_probe_check_state()

When genpd fails to attach a device to one of its multiple PM domains,
we end up calling driver_deferred_probe_check_state() for the recently
allocated virtual device. This is incorrect, as it's the base device
that is being probed.

Fix this by passing along the base device to __genpd_dev_pm_attach()
and use that instead.

Fixes: e01afc325025 ("PM / Domains: Stop deferring probe at the end of initcall")
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 8362dfe187f51..8aca1c9b4406b 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2405,8 +2405,8 @@ static void genpd_dev_pm_sync(struct device *dev)
 	genpd_queue_power_off_work(pd);
 }
 
-static int __genpd_dev_pm_attach(struct device *dev, unsigned int index,
-				 bool power_on)
+static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
+				 unsigned int index, bool power_on)
 {
 	struct of_phandle_args pd_args;
 	struct generic_pm_domain *pd;
@@ -2424,7 +2424,7 @@ static int __genpd_dev_pm_attach(struct device *dev, unsigned int index,
 		mutex_unlock(&gpd_list_lock);
 		dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
 			__func__, PTR_ERR(pd));
-		return driver_deferred_probe_check_state(dev);
+		return driver_deferred_probe_check_state(base_dev);
 	}
 
 	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
@@ -2480,7 +2480,7 @@ int genpd_dev_pm_attach(struct device *dev)
 				       "#power-domain-cells") != 1)
 		return 0;
 
-	return __genpd_dev_pm_attach(dev, 0, true);
+	return __genpd_dev_pm_attach(dev, dev, 0, true);
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
 
@@ -2533,7 +2533,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 	}
 
 	/* Try to attach the device to the PM domain at the specified index. */
-	ret = __genpd_dev_pm_attach(virt_dev, index, false);
+	ret = __genpd_dev_pm_attach(virt_dev, dev, index, false);
 	if (ret < 1) {
 		device_unregister(virt_dev);
 		return ret ? ERR_PTR(ret) : NULL;
-- 
GitLab


From a174920d69db9be80332f535db4a8ca2e3bccf24 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 25 Apr 2019 11:04:11 +0200
Subject: [PATCH 1560/1861] PM / Domains: Drop unused in-parameter to some
 genpd functions

Both genpd_alloc_dev_data() and genpd_add_device(), that are internal
genpd functions, allow a struct gpd_timing_data *td to be passed as an
in-parameter. However, as NULL is always passed, let's just drop the
in-parameter altogether.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 8aca1c9b4406b..93298b7db4080 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1396,8 +1396,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
 
 #endif /* CONFIG_PM_SLEEP */
 
-static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
-					struct gpd_timing_data *td)
+static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
 {
 	struct generic_pm_domain_data *gpd_data;
 	int ret;
@@ -1412,9 +1411,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
 		goto err_put;
 	}
 
-	if (td)
-		gpd_data->td = *td;
-
 	gpd_data->base.dev = dev;
 	gpd_data->td.constraint_changed = true;
 	gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
@@ -1504,8 +1500,7 @@ static void genpd_clear_cpumask(struct generic_pm_domain *genpd,
 	genpd_update_cpumask(genpd, dev, false);
 }
 
-static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
-			    struct gpd_timing_data *td)
+static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 {
 	struct generic_pm_domain_data *gpd_data;
 	int ret;
@@ -1515,7 +1510,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
 		return -EINVAL;
 
-	gpd_data = genpd_alloc_dev_data(dev, td);
+	gpd_data = genpd_alloc_dev_data(dev);
 	if (IS_ERR(gpd_data))
 		return PTR_ERR(gpd_data);
 
@@ -1553,7 +1548,7 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 	int ret;
 
 	mutex_lock(&gpd_list_lock);
-	ret = genpd_add_device(genpd, dev, NULL);
+	ret = genpd_add_device(genpd, dev);
 	mutex_unlock(&gpd_list_lock);
 
 	return ret;
@@ -2259,7 +2254,7 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
 		goto out;
 	}
 
-	ret = genpd_add_device(genpd, dev, NULL);
+	ret = genpd_add_device(genpd, dev);
 
 out:
 	mutex_unlock(&gpd_list_lock);
@@ -2429,7 +2424,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
 
 	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
 
-	ret = genpd_add_device(pd, dev, NULL);
+	ret = genpd_add_device(pd, dev);
 	mutex_unlock(&gpd_list_lock);
 
 	if (ret < 0) {
-- 
GitLab


From b24e196586fecafed1c3cff9b2f87c1a64138ade Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 25 Apr 2019 11:04:12 +0200
Subject: [PATCH 1561/1861] PM / Domains: Search for the CPU device outside the
 genpd lock

While attaching/detaching a device to a PM domain (genpd) with
GENPD_FLAG_CPU_DOMAIN set, genpd iterates the cpu_possible_mask to
check whether or not the device corresponds to a CPU. This iteration
is done while holding the genpd's lock, which is unnecessary.

Avoid the locking by restructuring the corresponding code a bit.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 52 +++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 93298b7db4080..da1c991789433 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1450,8 +1450,8 @@ static void genpd_free_dev_data(struct device *dev,
 	dev_pm_put_subsys_data(dev);
 }
 
-static void __genpd_update_cpumask(struct generic_pm_domain *genpd,
-				   int cpu, bool set, unsigned int depth)
+static void genpd_update_cpumask(struct generic_pm_domain *genpd,
+				 int cpu, bool set, unsigned int depth)
 {
 	struct gpd_link *link;
 
@@ -1462,7 +1462,7 @@ static void __genpd_update_cpumask(struct generic_pm_domain *genpd,
 		struct generic_pm_domain *master = link->master;
 
 		genpd_lock_nested(master, depth + 1);
-		__genpd_update_cpumask(master, cpu, set, depth + 1);
+		genpd_update_cpumask(master, cpu, set, depth + 1);
 		genpd_unlock(master);
 	}
 
@@ -1472,38 +1472,37 @@ static void __genpd_update_cpumask(struct generic_pm_domain *genpd,
 		cpumask_clear_cpu(cpu, genpd->cpus);
 }
 
-static void genpd_update_cpumask(struct generic_pm_domain *genpd,
-				 struct device *dev, bool set)
+static void genpd_set_cpumask(struct generic_pm_domain *genpd, int cpu)
+{
+	if (cpu >= 0)
+		genpd_update_cpumask(genpd, cpu, true, 0);
+}
+
+static void genpd_clear_cpumask(struct generic_pm_domain *genpd, int cpu)
+{
+	if (cpu >= 0)
+		genpd_update_cpumask(genpd, cpu, false, 0);
+}
+
+static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev)
 {
 	int cpu;
 
 	if (!genpd_is_cpu_domain(genpd))
-		return;
+		return -1;
 
 	for_each_possible_cpu(cpu) {
-		if (get_cpu_device(cpu) == dev) {
-			__genpd_update_cpumask(genpd, cpu, set, 0);
-			return;
-		}
+		if (get_cpu_device(cpu) == dev)
+			return cpu;
 	}
-}
 
-static void genpd_set_cpumask(struct generic_pm_domain *genpd,
-			      struct device *dev)
-{
-	genpd_update_cpumask(genpd, dev, true);
-}
-
-static void genpd_clear_cpumask(struct generic_pm_domain *genpd,
-				struct device *dev)
-{
-	genpd_update_cpumask(genpd, dev, false);
+	return -1;
 }
 
 static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 {
 	struct generic_pm_domain_data *gpd_data;
-	int ret;
+	int ret, cpu;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1514,13 +1513,15 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 	if (IS_ERR(gpd_data))
 		return PTR_ERR(gpd_data);
 
+	cpu = genpd_get_cpu(genpd, dev);
+
 	ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0;
 	if (ret)
 		goto out;
 
 	genpd_lock(genpd);
 
-	genpd_set_cpumask(genpd, dev);
+	genpd_set_cpumask(genpd, cpu);
 	dev_pm_domain_set(dev, &genpd->domain);
 
 	genpd->device_count++;
@@ -1560,13 +1561,14 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
 {
 	struct generic_pm_domain_data *gpd_data;
 	struct pm_domain_data *pdd;
-	int ret = 0;
+	int cpu, ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
 	pdd = dev->power.subsys_data->domain_data;
 	gpd_data = to_gpd_data(pdd);
 	dev_pm_qos_remove_notifier(dev, &gpd_data->nb);
+	cpu = genpd_get_cpu(genpd, dev);
 
 	genpd_lock(genpd);
 
@@ -1578,7 +1580,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
 	genpd->device_count--;
 	genpd->max_off_time_changed = true;
 
-	genpd_clear_cpumask(genpd, dev);
+	genpd_clear_cpumask(genpd, cpu);
 	dev_pm_domain_set(dev, NULL);
 
 	list_del_init(&pdd->list_node);
-- 
GitLab


From f9ccd7c3a1d87cea3a6f9ed6c946dee9e7456b2e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 25 Apr 2019 11:04:13 +0200
Subject: [PATCH 1562/1861] PM / Domains: Allow to attach a CPU via
 genpd_dev_pm_attach_by_id|name()

Attaching a device via genpd_dev_pm_attach_by_id|name() makes
genpd allocate a virtual device that it attaches instead. This
leads to a problem in case when the base device belongs to a CPU.
More precisely, it means genpd_get_cpu() compares against the
virtual device, thus it fails to find a matching CPU device.

Address this limitation by passing the base device to genpd_get_cpu()
rather than the virtual device.

Moreover, to deal with detach correctly from genpd_remove_device(),
store the CPU number in struct generic_pm_domain_data, so as to be
able to clear the corresponding bit in the cpumask for the genpd.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 20 ++++++++++----------
 include/linux/pm_domain.h   |  1 +
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index da1c991789433..3d899e8abd585 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1499,10 +1499,11 @@ static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev)
 	return -1;
 }
 
-static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
+static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
+			    struct device *base_dev)
 {
 	struct generic_pm_domain_data *gpd_data;
-	int ret, cpu;
+	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1513,7 +1514,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 	if (IS_ERR(gpd_data))
 		return PTR_ERR(gpd_data);
 
-	cpu = genpd_get_cpu(genpd, dev);
+	gpd_data->cpu = genpd_get_cpu(genpd, base_dev);
 
 	ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0;
 	if (ret)
@@ -1521,7 +1522,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 
 	genpd_lock(genpd);
 
-	genpd_set_cpumask(genpd, cpu);
+	genpd_set_cpumask(genpd, gpd_data->cpu);
 	dev_pm_domain_set(dev, &genpd->domain);
 
 	genpd->device_count++;
@@ -1549,7 +1550,7 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 	int ret;
 
 	mutex_lock(&gpd_list_lock);
-	ret = genpd_add_device(genpd, dev);
+	ret = genpd_add_device(genpd, dev, dev);
 	mutex_unlock(&gpd_list_lock);
 
 	return ret;
@@ -1561,14 +1562,13 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
 {
 	struct generic_pm_domain_data *gpd_data;
 	struct pm_domain_data *pdd;
-	int cpu, ret = 0;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
 	pdd = dev->power.subsys_data->domain_data;
 	gpd_data = to_gpd_data(pdd);
 	dev_pm_qos_remove_notifier(dev, &gpd_data->nb);
-	cpu = genpd_get_cpu(genpd, dev);
 
 	genpd_lock(genpd);
 
@@ -1580,7 +1580,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
 	genpd->device_count--;
 	genpd->max_off_time_changed = true;
 
-	genpd_clear_cpumask(genpd, cpu);
+	genpd_clear_cpumask(genpd, gpd_data->cpu);
 	dev_pm_domain_set(dev, NULL);
 
 	list_del_init(&pdd->list_node);
@@ -2256,7 +2256,7 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
 		goto out;
 	}
 
-	ret = genpd_add_device(genpd, dev);
+	ret = genpd_add_device(genpd, dev, dev);
 
 out:
 	mutex_unlock(&gpd_list_lock);
@@ -2426,7 +2426,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
 
 	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
 
-	ret = genpd_add_device(pd, dev);
+	ret = genpd_add_device(pd, dev, base_dev);
 	mutex_unlock(&gpd_list_lock);
 
 	if (ret < 0) {
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index bc82e74560ee2..0e8e356bed6a4 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -175,6 +175,7 @@ struct generic_pm_domain_data {
 	struct pm_domain_data base;
 	struct gpd_timing_data td;
 	struct notifier_block nb;
+	int cpu;
 	unsigned int performance_state;
 	void *data;
 };
-- 
GitLab


From c208ac8f8f862dba7b01eb54557f4803b3c17296 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 18 Apr 2019 16:11:37 +0200
Subject: [PATCH 1563/1861] x86: tsc: Rework time_cpufreq_notifier()

There are problems with running time_cpufreq_notifier() on SMP
systems.

First off, the rdtsc() called from there runs on the CPU executing
that code and not necessarily on the CPU whose sched_clock() rate is
updated which is questionable at best.

Second, in the cases when the frequencies of all CPUs in an SMP
system are always in sync, it is not sufficient to update just
one of them or the set associated with a given cpufreq policy on
frequency changes - all CPUs in the system should be updated and
that would require more than a simple transition notifier.

Note, however, that the underlying issue (the TSC rate depending on
the CPU frequency) has not been present in hardware shipping for the
last few years and in quite a few relevant cases (acpi-cpufreq in
particular) running time_cpufreq_notifier() will cause the TSC to
be marked as unstable anyway.

For this reason, make time_cpufreq_notifier() simply mark the TSC
as unstable and give up when run on SMP and only try to carry out
any adjustments otherwise.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/x86/kernel/tsc.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3fae238340699..cc6df5c6d7b3c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -185,8 +185,7 @@ static void __init cyc2ns_init_boot_cpu(void)
 /*
  * Secondary CPUs do not run through tsc_init(), so set up
  * all the scale factors for all CPUs, assuming the same
- * speed as the bootup CPU. (cpufreq notifiers will fix this
- * up if their speed diverges)
+ * speed as the bootup CPU.
  */
 static void __init cyc2ns_init_secondary_cpus(void)
 {
@@ -937,12 +936,12 @@ void tsc_restore_sched_clock_state(void)
 }
 
 #ifdef CONFIG_CPU_FREQ
-/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+/*
+ * Frequency scaling support. Adjust the TSC based timer when the CPU frequency
  * changes.
  *
- * RED-PEN: On SMP we assume all CPUs run with the same frequency.  It's
- * not that important because current Opteron setups do not support
- * scaling on SMP anyroads.
+ * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC
+ * as unstable and give up in those cases.
  *
  * Should fix up last_tsc too. Currently gettimeofday in the
  * first tick after the change will be slightly wrong.
@@ -956,22 +955,22 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 				void *data)
 {
 	struct cpufreq_freqs *freq = data;
-	unsigned long *lpj;
 
-	lpj = &boot_cpu_data.loops_per_jiffy;
-#ifdef CONFIG_SMP
-	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-		lpj = &cpu_data(freq->cpu).loops_per_jiffy;
-#endif
+	if (num_online_cpus() > 1) {
+		mark_tsc_unstable("cpufreq changes on SMP");
+		return 0;
+	}
 
 	if (!ref_freq) {
 		ref_freq = freq->old;
-		loops_per_jiffy_ref = *lpj;
+		loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
 		tsc_khz_ref = tsc_khz;
 	}
+
 	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
-			(val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
-		*lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
+		boot_cpu_data.loops_per_jiffy =
+			cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
 
 		tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-- 
GitLab


From 9a8f612ca0d6a436e6471c9bed516d34a2cc626f Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Mon, 8 Apr 2019 10:31:45 +0200
Subject: [PATCH 1564/1861] mtd: rawnand: marvell: Clean the controller state
 before each operation

Since the migration of the driver to stop using the legacy
->select_chip() hook, there is nothing deselecting the target anymore,
thus the selection is not forced at the next access. Ensure the ND_RUN
bit and the interrupts are always in a clean state.

Cc: Daniel Mack <daniel@zonque.org>
Cc: stable@vger.kernel.org
Fixes: b25251414f6e00 ("mtd: rawnand: marvell: Stop implementing ->select_chip()")
Suggested-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Tested-by: Daniel Mack <daniel@zonque.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/nand/raw/marvell_nand.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index f38e5c1b87e47..d984538980e28 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -722,12 +722,6 @@ static void marvell_nfc_select_target(struct nand_chip *chip,
 	struct marvell_nfc *nfc = to_marvell_nfc(chip->controller);
 	u32 ndcr_generic;
 
-	if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die)
-		return;
-
-	writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0);
-	writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1);
-
 	/*
 	 * Reset the NDCR register to a clean state for this particular chip,
 	 * also clear ND_RUN bit.
@@ -739,6 +733,12 @@ static void marvell_nfc_select_target(struct nand_chip *chip,
 	/* Also reset the interrupt status register */
 	marvell_nfc_clear_int(nfc, NDCR_ALL_INT);
 
+	if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die)
+		return;
+
+	writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0);
+	writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1);
+
 	nfc->selected_chip = chip;
 	marvell_nand->selected_die = die_nr;
 }
-- 
GitLab


From 349ced9984ff540ce74ca8a0b2e9b03dc434b9dd Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Wed, 24 Apr 2019 00:16:10 -0700
Subject: [PATCH 1565/1861] power: supply: sysfs: prevent endless uevent loop
 with CONFIG_POWER_SUPPLY_DEBUG

Fix a similar endless event loop as was done in commit
8dcf32175b4e ("i2c: prevent endless uevent loop with
CONFIG_I2C_DEBUG_CORE"):

  The culprit is the dev_dbg printk in the i2c uevent handler. If
  this is activated (for instance by CONFIG_I2C_DEBUG_CORE) it results
  in an endless loop with systemd-journald.

  This happens if user-space scans the system log and reads the uevent
  file to get information about a newly created device, which seems
  fair use to me. Unfortunately reading the "uevent" file uses the
  same function that runs for creating the uevent for a new device,
  generating the next syslog entry

Both CONFIG_I2C_DEBUG_CORE and CONFIG_POWER_SUPPLY_DEBUG were reported
in https://bugs.freedesktop.org/show_bug.cgi?id=76886 but only former
seems to have been fixed. Drop debug prints as it was done in I2C
subsystem to resolve the issue.

Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Cc: Chris Healy <cphealy@gmail.com>
Cc: linux-pm@vger.kernel.org
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/power_supply_sysfs.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index dce24f5961609..5358a80d854f9 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -383,15 +383,11 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env)
 	char *prop_buf;
 	char *attrname;
 
-	dev_dbg(dev, "uevent\n");
-
 	if (!psy || !psy->desc) {
 		dev_dbg(dev, "No power supply yet\n");
 		return ret;
 	}
 
-	dev_dbg(dev, "POWER_SUPPLY_NAME=%s\n", psy->desc->name);
-
 	ret = add_uevent_var(env, "POWER_SUPPLY_NAME=%s", psy->desc->name);
 	if (ret)
 		return ret;
@@ -427,8 +423,6 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env)
 			goto out;
 		}
 
-		dev_dbg(dev, "prop %s=%s\n", attrname, prop_buf);
-
 		ret = add_uevent_var(env, "POWER_SUPPLY_%s=%s", attrname, prop_buf);
 		kfree(attrname);
 		if (ret)
-- 
GitLab


From 4ee0776760af03f181e6b80baf5fb1cc1a980f50 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 24 Apr 2019 09:32:55 -0700
Subject: [PATCH 1566/1861] selftests/seccomp: Prepare for exclusive seccomp
 flags

Some seccomp flags will become exclusive, so the selftest needs to
be adjusted to mask those out and test them individually for the "all
flags" tests.

Cc: stable@vger.kernel.org # v5.0+
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Tycho Andersen <tycho@tycho.ws>
Acked-by: James Morris <jamorris@linux.microsoft.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 34 ++++++++++++++-----
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index f69d2ee297428..5019cdae5d0b8 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2166,11 +2166,14 @@ TEST(detect_seccomp_filter_flags)
 				 SECCOMP_FILTER_FLAG_LOG,
 				 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
 				 SECCOMP_FILTER_FLAG_NEW_LISTENER };
-	unsigned int flag, all_flags;
+	unsigned int exclusive[] = {
+				SECCOMP_FILTER_FLAG_TSYNC,
+				SECCOMP_FILTER_FLAG_NEW_LISTENER };
+	unsigned int flag, all_flags, exclusive_mask;
 	int i;
 	long ret;
 
-	/* Test detection of known-good filter flags */
+	/* Test detection of individual known-good filter flags */
 	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
 		int bits = 0;
 
@@ -2197,16 +2200,29 @@ TEST(detect_seccomp_filter_flags)
 		all_flags |= flag;
 	}
 
-	/* Test detection of all known-good filter flags */
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
-	EXPECT_EQ(-1, ret);
-	EXPECT_EQ(EFAULT, errno) {
-		TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
-		       all_flags);
+	/*
+	 * Test detection of all known-good filter flags combined. But
+	 * for the exclusive flags we need to mask them out and try them
+	 * individually for the "all flags" testing.
+	 */
+	exclusive_mask = 0;
+	for (i = 0; i < ARRAY_SIZE(exclusive); i++)
+		exclusive_mask |= exclusive[i];
+	for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
+		flag = all_flags & ~exclusive_mask;
+		flag |= exclusive[i];
+
+		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EFAULT, errno) {
+			TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+			       flag);
+		}
 	}
 
-	/* Test detection of an unknown filter flag */
+	/* Test detection of an unknown filter flags, without exclusives. */
 	flag = -1;
+	flag &= ~exclusive_mask;
 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
 	EXPECT_EQ(-1, ret);
 	EXPECT_EQ(EINVAL, errno) {
-- 
GitLab


From 7a0df7fbc14505e2e2be19ed08654a09e1ed5bf6 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho@tycho.ws>
Date: Wed, 6 Mar 2019 13:14:13 -0700
Subject: [PATCH 1567/1861] seccomp: Make NEW_LISTENER and TSYNC flags
 exclusive

As the comment notes, the return codes for TSYNC and NEW_LISTENER
conflict, because they both return positive values, one in the case of
success and one in the case of error. So, let's disallow both of these
flags together.

While this is technically a userspace break, all the users I know
of are still waiting on me to land this feature in libseccomp, so I
think it'll be safe. Also, at present my use case doesn't require
TSYNC at all, so this isn't a big deal to disallow. If someone
wanted to support this, a path forward would be to add a new flag like
TSYNC_AND_LISTENER_YES_I_UNDERSTAND_THAT_TSYNC_WILL_JUST_RETURN_EAGAIN,
but the use cases are so different I don't see it really happening.

Finally, it's worth noting that this does actually fix a UAF issue: at the
end of seccomp_set_mode_filter(), we have:

        if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
                if (ret < 0) {
                        listener_f->private_data = NULL;
                        fput(listener_f);
                        put_unused_fd(listener);
                } else {
                        fd_install(listener, listener_f);
                        ret = listener;
                }
        }
out_free:
        seccomp_filter_free(prepared);

But if ret > 0 because TSYNC raced, we'll install the listener fd and then
free the filter out from underneath it, causing a UAF when the task closes
it or dies. This patch also switches the condition to be simply if (ret),
so that if someone does add the flag mentioned above, they won't have to
remember to fix this too.

Reported-by: syzbot+b562969adb2e04af3442@syzkaller.appspotmail.com
Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
CC: stable@vger.kernel.org # v5.0+
Signed-off-by: Tycho Andersen <tycho@tycho.ws>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: James Morris <jamorris@linux.microsoft.com>
---
 kernel/seccomp.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54a0347ca8128..4b51d9cbcf061 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -502,7 +502,10 @@ out:
  *
  * Caller must be holding current->sighand->siglock lock.
  *
- * Returns 0 on success, -ve on error.
+ * Returns 0 on success, -ve on error, or
+ *   - in TSYNC mode: the pid of a thread which was either not in the correct
+ *     seccomp mode or did not have an ancestral seccomp filter
+ *   - in NEW_LISTENER mode: the fd of the new listener
  */
 static long seccomp_attach_filter(unsigned int flags,
 				  struct seccomp_filter *filter)
@@ -1258,6 +1261,16 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
 		return -EINVAL;
 
+	/*
+	 * In the successful case, NEW_LISTENER returns the new listener fd.
+	 * But in the failure case, TSYNC returns the thread that died. If you
+	 * combine these two flags, there's no way to tell whether something
+	 * succeeded or failed. So, let's disallow this combination.
+	 */
+	if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
+	    (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER))
+		return -EINVAL;
+
 	/* Prepare the new filter before holding any locks. */
 	prepared = seccomp_prepare_user_filter(filter);
 	if (IS_ERR(prepared))
@@ -1304,7 +1317,7 @@ out:
 		mutex_unlock(&current->signal->cred_guard_mutex);
 out_put_fd:
 	if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
-		if (ret < 0) {
+		if (ret) {
 			listener_f->private_data = NULL;
 			fput(listener_f);
 			put_unused_fd(listener);
-- 
GitLab


From ecfc3fcabbb5291d1e61600a3dac6cdbfdb04cb1 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 10 Apr 2019 21:49:23 +0800
Subject: [PATCH 1568/1861] MIPS: eBPF: Make ebpf_to_mips_reg() static

Fix sparse warning:

arch/mips/net/ebpf_jit.c:196:5: warning:
 symbol 'ebpf_to_mips_reg' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/mips/net/ebpf_jit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 0effd3cba9a73..98bf0c222b5fe 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -186,8 +186,9 @@ enum which_ebpf_reg {
  * separate frame pointer, so BPF_REG_10 relative accesses are
  * adjusted to be $sp relative.
  */
-int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn,
-		     enum which_ebpf_reg w)
+static int ebpf_to_mips_reg(struct jit_ctx *ctx,
+			    const struct bpf_insn *insn,
+			    enum which_ebpf_reg w)
 {
 	int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ?
 		insn->src_reg : insn->dst_reg;
-- 
GitLab


From 8694d8c1f82cccec9380e0d3720b84eee315dfb7 Mon Sep 17 00:00:00 2001
From: Alban Crequy <alban@kinvolk.io>
Date: Fri, 12 Apr 2019 14:40:50 +0200
Subject: [PATCH 1569/1861] tools: bpftool: fix infinite loop in map create

"bpftool map create" has an infinite loop on "while (argc)". The error
case is missing.

Symptoms: when forgetting to type the keyword 'type' in front of 'hash':
$ sudo bpftool map create /sys/fs/bpf/dir/foobar hash key 8 value 8 entries 128
(infinite loop, taking all the CPU)
^C

After the patch:
$ sudo bpftool map create /sys/fs/bpf/dir/foobar hash key 8 value 8 entries 128
Error: unknown arg hash

Fixes: 0b592b5a01be ("tools: bpftool: add map create command")
Signed-off-by: Alban Crequy <alban@kinvolk.io>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/map.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e0c650d91784a..994a7e0d16fb5 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1151,6 +1151,9 @@ static int do_create(int argc, char **argv)
 				return -1;
 			}
 			NEXT_ARG();
+		} else {
+			p_err("unknown arg %s", *argv);
+			return -1;
 		}
 	}
 
-- 
GitLab


From 39391377f8ecf2fa4569e2fede624dc091bcd859 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Sat, 13 Apr 2019 03:37:32 +0200
Subject: [PATCH 1570/1861] libbpf: add binary to gitignore

Some binaries are generated when building libbpf from tools/lib/bpf/,
namely libbpf.so.0.0.2 and libbpf.so.0.
Add them to the local .gitignore.

Signed-off-by: Matteo Croce <mcroce@redhat.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
index 4db74758c6743..fecb78afea3fe 100644
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@ -1,3 +1,4 @@
 libbpf_version.h
 FEATURE-DUMP.libbpf
 test_libbpf
+libbpf.so.*
-- 
GitLab


From c6a9efa1d8353d8960d152e7d469d952b01495c0 Mon Sep 17 00:00:00 2001
From: Paul Chaignon <paul.chaignon@orange.com>
Date: Wed, 24 Apr 2019 21:50:42 +0200
Subject: [PATCH 1571/1861] bpf: mark registers in all frames after pkt/null
 checks

In case of a null check on a pointer inside a subprog, we should mark all
registers with this pointer as either safe or unknown, in both the current
and previous frames.  Currently, only spilled registers and registers in
the current frame are marked.  Packet bound checks in subprogs have the
same issue.  This patch fixes it to mark registers in previous frames as
well.

A good reproducer for null checks looks as follow:

1: ptr = bpf_map_lookup_elem(map, &key);
2: ret = subprog(ptr) {
3:   return ptr != NULL;
4: }
5: if (ret)
6:   value = *ptr;

With the above, the verifier will complain on line 6 because it sees ptr
as map_value_or_null despite the null check in subprog 1.

Note that this patch fixes another resulting bug when using
bpf_sk_release():

1: sk = bpf_sk_lookup_tcp(...);
2: subprog(sk) {
3:   if (sk)
4:     bpf_sk_release(sk);
5: }
6: if (!sk)
7:   return 0;
8: return 1;

In the above, mark_ptr_or_null_regs will warn on line 6 because it will
try to free the reference state, even though it was already freed on
line 3.

Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)")
Signed-off-by: Paul Chaignon <paul.chaignon@orange.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 76 ++++++++++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 30 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6c5a41f7f3385..09d5d972c9ff2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4138,15 +4138,35 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
+static void __find_good_pkt_pointers(struct bpf_func_state *state,
+				     struct bpf_reg_state *dst_reg,
+				     enum bpf_reg_type type, u16 new_range)
+{
+	struct bpf_reg_state *reg;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		reg = &state->regs[i];
+		if (reg->type == type && reg->id == dst_reg->id)
+			/* keep the maximum range already checked */
+			reg->range = max(reg->range, new_range);
+	}
+
+	bpf_for_each_spilled_reg(i, state, reg) {
+		if (!reg)
+			continue;
+		if (reg->type == type && reg->id == dst_reg->id)
+			reg->range = max(reg->range, new_range);
+	}
+}
+
 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 				   struct bpf_reg_state *dst_reg,
 				   enum bpf_reg_type type,
 				   bool range_right_open)
 {
-	struct bpf_func_state *state = vstate->frame[vstate->curframe];
-	struct bpf_reg_state *regs = state->regs, *reg;
 	u16 new_range;
-	int i, j;
+	int i;
 
 	if (dst_reg->off < 0 ||
 	    (dst_reg->off == 0 && range_right_open))
@@ -4211,20 +4231,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 	 * the range won't allow anything.
 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
 	 */
-	for (i = 0; i < MAX_BPF_REG; i++)
-		if (regs[i].type == type && regs[i].id == dst_reg->id)
-			/* keep the maximum range already checked */
-			regs[i].range = max(regs[i].range, new_range);
-
-	for (j = 0; j <= vstate->curframe; j++) {
-		state = vstate->frame[j];
-		bpf_for_each_spilled_reg(i, state, reg) {
-			if (!reg)
-				continue;
-			if (reg->type == type && reg->id == dst_reg->id)
-				reg->range = max(reg->range, new_range);
-		}
-	}
+	for (i = 0; i <= vstate->curframe; i++)
+		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+					 new_range);
 }
 
 /* compute branch direction of the expression "if (reg opcode val) goto target;"
@@ -4698,6 +4707,22 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 	}
 }
 
+static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
+				    bool is_null)
+{
+	struct bpf_reg_state *reg;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
+
+	bpf_for_each_spilled_reg(i, state, reg) {
+		if (!reg)
+			continue;
+		mark_ptr_or_null_reg(state, reg, id, is_null);
+	}
+}
+
 /* The logic is similar to find_good_pkt_pointers(), both could eventually
  * be folded together at some point.
  */
@@ -4705,10 +4730,10 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 				  bool is_null)
 {
 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
-	struct bpf_reg_state *reg, *regs = state->regs;
+	struct bpf_reg_state *regs = state->regs;
 	u32 ref_obj_id = regs[regno].ref_obj_id;
 	u32 id = regs[regno].id;
-	int i, j;
+	int i;
 
 	if (ref_obj_id && ref_obj_id == id && is_null)
 		/* regs[regno] is in the " == NULL" branch.
@@ -4717,17 +4742,8 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 		 */
 		WARN_ON_ONCE(release_reference_state(state, id));
 
-	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
-
-	for (j = 0; j <= vstate->curframe; j++) {
-		state = vstate->frame[j];
-		bpf_for_each_spilled_reg(i, state, reg) {
-			if (!reg)
-				continue;
-			mark_ptr_or_null_reg(state, reg, id, is_null);
-		}
-	}
+	for (i = 0; i <= vstate->curframe; i++)
+		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
 }
 
 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
-- 
GitLab


From 6dd7f14080473b655c247863e61b7c34424f0c83 Mon Sep 17 00:00:00 2001
From: Paul Chaignon <paul.chaignon@orange.com>
Date: Wed, 24 Apr 2019 21:51:26 +0200
Subject: [PATCH 1572/1861] selftests/bpf: test cases for pkt/null checks in
 subprogs

The first test case, for pointer null checks, is equivalent to the
following pseudo-code.  It checks that the verifier does not complain on
line 6 and recognizes that ptr isn't null.

1: ptr = bpf_map_lookup_elem(map, &key);
2: ret = subprog(ptr) {
3:   return ptr != NULL;
4: }
5: if (ret)
6:   value = *ptr;

The second test case, for packet bound checks, is equivalent to the
following pseudo-code.  It checks that the verifier does not complain on
line 7 and recognizes that the packet is at least 1 byte long.

1: pkt_end = ctx.pkt_end;
2: ptr = ctx.pkt + 8;
3: ret = subprog(ptr, pkt_end) {
4:   return ptr <= pkt_end;
5: }
6: if (ret)
7:   value = *(u8 *)ctx.pkt;

Signed-off-by: Paul Chaignon <paul.chaignon@orange.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/calls.c  | 25 +++++++++++++++++++
 .../bpf/verifier/direct_packet_access.c       | 22 ++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index fb11240b758b1..9093a8f64dc61 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -374,6 +374,31 @@
 	.prog_type = BPF_PROG_TYPE_XDP,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
+{
+	"calls: ptr null check in subprog",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.fixup_map_hash_48b = { 3 },
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
+	.retval = 0,
+},
 {
 	"calls: two calls with args",
 	.insns = {
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index e3fc22e672c27..d5c596fdc4b9a 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -631,3 +631,25 @@
 	.errstr = "invalid access to packet",
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
+{
+	"direct packet access: test29 (reg > pkt_end in subprog)",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
-- 
GitLab


From 0edd6b64d1939e9e9168ff27947995bb7751db5d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 23 Apr 2019 21:55:59 +0200
Subject: [PATCH 1573/1861] bpf: Fix preempt_enable_no_resched() abuse

Unless the very next line is schedule(), or implies it, one must not use
preempt_enable_no_resched(). It can cause a preemption to go missing and
thereby cause arbitrary delays, breaking the PREEMPT=y invariant.

Cc: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f02367faa58db..944ccc310201d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -510,7 +510,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 		}					\
 _out:							\
 		rcu_read_unlock();			\
-		preempt_enable_no_resched();		\
+		preempt_enable();			\
 		_ret;					\
 	 })
 
-- 
GitLab


From 3db6d5a5ecaf0a778d721ccf9809248350d4bfaf Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 16:01:43 -0700
Subject: [PATCH 1574/1861] x86/mm/tlb: Remove 'struct flush_tlb_info' from the
 stack

Move flush_tlb_info variables off the stack. This allows to align
flush_tlb_info to cache-line and avoid potentially unnecessary cache
line movements. It also allows to have a fixed virtual-to-physical
translation of the variables, which reduces TLB misses.

Use per-CPU struct for flush_tlb_mm_range() and
flush_tlb_kernel_range(). Add debug assertions to ensure there are
no nested TLB flushes that might overwrite the per-CPU data. For
arch_tlbbatch_flush() use a const struct.

Results when running a microbenchmarks that performs 10^6 MADV_DONTEED
operations and touching a page, in which 3 additional threads run a
busy-wait loop (5 runs, PTI and retpolines are turned off):

			base		off-stack
			----		---------
  avg (usec/op)		1.629		1.570	(-3%)
  stddev		0.014		0.009

Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190425230143.7008-1-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/tlb.c | 116 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 82 insertions(+), 34 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 487b8474c01cd..7f61431c75fb7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -634,7 +634,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 	this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
 }
 
-static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
+static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
 {
 	const struct flush_tlb_info *f = info;
 
@@ -722,43 +722,81 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
  */
 unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
+
+#ifdef CONFIG_DEBUG_VM
+static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
+#endif
+
+static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
+			unsigned long start, unsigned long end,
+			unsigned int stride_shift, bool freed_tables,
+			u64 new_tlb_gen)
+{
+	struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
+
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * Ensure that the following code is non-reentrant and flush_tlb_info
+	 * is not overwritten. This means no TLB flushing is initiated by
+	 * interrupt handlers and machine-check exception handlers.
+	 */
+	BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
+#endif
+
+	info->start		= start;
+	info->end		= end;
+	info->mm		= mm;
+	info->stride_shift	= stride_shift;
+	info->freed_tables	= freed_tables;
+	info->new_tlb_gen	= new_tlb_gen;
+
+	return info;
+}
+
+static inline void put_flush_tlb_info(void)
+{
+#ifdef CONFIG_DEBUG_VM
+	/* Complete reentrency prevention checks */
+	barrier();
+	this_cpu_dec(flush_tlb_info_idx);
+#endif
+}
+
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				unsigned long end, unsigned int stride_shift,
 				bool freed_tables)
 {
+	struct flush_tlb_info *info;
+	u64 new_tlb_gen;
 	int cpu;
 
-	struct flush_tlb_info info = {
-		.mm = mm,
-		.stride_shift = stride_shift,
-		.freed_tables = freed_tables,
-	};
-
 	cpu = get_cpu();
 
-	/* This is also a barrier that synchronizes with switch_mm(). */
-	info.new_tlb_gen = inc_mm_tlb_gen(mm);
-
 	/* Should we flush just the requested range? */
-	if ((end != TLB_FLUSH_ALL) &&
-	    ((end - start) >> stride_shift) <= tlb_single_page_flush_ceiling) {
-		info.start = start;
-		info.end = end;
-	} else {
-		info.start = 0UL;
-		info.end = TLB_FLUSH_ALL;
+	if ((end == TLB_FLUSH_ALL) ||
+	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
+		start = 0;
+		end = TLB_FLUSH_ALL;
 	}
 
+	/* This is also a barrier that synchronizes with switch_mm(). */
+	new_tlb_gen = inc_mm_tlb_gen(mm);
+
+	info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
+				  new_tlb_gen);
+
 	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
-		VM_WARN_ON(irqs_disabled());
+		lockdep_assert_irqs_enabled();
 		local_irq_disable();
-		flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+		flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
 		local_irq_enable();
 	}
 
 	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
-		flush_tlb_others(mm_cpumask(mm), &info);
+		flush_tlb_others(mm_cpumask(mm), info);
 
+	put_flush_tlb_info();
 	put_cpu();
 }
 
@@ -787,38 +825,48 @@ static void do_kernel_range_flush(void *info)
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-
 	/* Balance as user space task's flush, a bit conservative */
 	if (end == TLB_FLUSH_ALL ||
 	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
 		on_each_cpu(do_flush_tlb_all, NULL, 1);
 	} else {
-		struct flush_tlb_info info;
-		info.start = start;
-		info.end = end;
-		on_each_cpu(do_kernel_range_flush, &info, 1);
+		struct flush_tlb_info *info;
+
+		preempt_disable();
+		info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
+
+		on_each_cpu(do_kernel_range_flush, info, 1);
+
+		put_flush_tlb_info();
+		preempt_enable();
 	}
 }
 
+/*
+ * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
+ * This means that the 'struct flush_tlb_info' that describes which mappings to
+ * flush is actually fixed. We therefore set a single fixed struct and use it in
+ * arch_tlbbatch_flush().
+ */
+static const struct flush_tlb_info full_flush_tlb_info = {
+	.mm = NULL,
+	.start = 0,
+	.end = TLB_FLUSH_ALL,
+};
+
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
-	struct flush_tlb_info info = {
-		.mm = NULL,
-		.start = 0UL,
-		.end = TLB_FLUSH_ALL,
-	};
-
 	int cpu = get_cpu();
 
 	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
-		VM_WARN_ON(irqs_disabled());
+		lockdep_assert_irqs_enabled();
 		local_irq_disable();
-		flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
+		flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
 		local_irq_enable();
 	}
 
 	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
-		flush_tlb_others(&batch->cpumask, &info);
+		flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
 
 	cpumask_clear(&batch->cpumask);
 
-- 
GitLab


From 86c74d869d321bee4753dc3f8c3d1c3809d8ed8a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Feb 2019 14:08:26 +0100
Subject: [PATCH 1575/1861] s390/ipl: make ipl_info less confusing

The ipl_info union in struct ipl_parameter_block has the same name as
the struct ipl_info. This does not help while reading the code and the
union in struct ipl_parameter_block does not need to be named. Drop
the name from the union.

Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/ipl_parm.c     |  14 ++---
 arch/s390/include/asm/ipl.h   |   2 +-
 arch/s390/kernel/ipl.c        | 110 +++++++++++++++++-----------------
 arch/s390/kernel/ipl_vmparm.c |   8 +--
 4 files changed, 66 insertions(+), 68 deletions(-)

diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 1900670a83fdc..849cf786db91c 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -71,26 +71,26 @@ static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
 	size_t i;
 	int has_lowercase;
 
-	count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
-					     ipb->ipl_info.fcp.scp_data_len));
+	count = min(size - 1, scpdata_length(ipb->fcp.scp_data,
+					     ipb->fcp.scp_data_len));
 	if (!count)
 		goto out;
 
 	has_lowercase = 0;
 	for (i = 0; i < count; i++) {
-		if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+		if (!isascii(ipb->fcp.scp_data[i])) {
 			count = 0;
 			goto out;
 		}
-		if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+		if (!has_lowercase && islower(ipb->fcp.scp_data[i]))
 			has_lowercase = 1;
 	}
 
 	if (has_lowercase)
-		memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+		memcpy(dest, ipb->fcp.scp_data, count);
 	else
 		for (i = 0; i < count; i++)
-			dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+			dest[i] = tolower(ipb->fcp.scp_data[i]);
 out:
 	dest[count] = '\0';
 	return count;
@@ -239,7 +239,7 @@ void setup_memory_end(void)
 #ifdef CONFIG_CRASH_DUMP
 	if (!OLDMEM_BASE && ipl_block_valid &&
 	    ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP &&
-	    ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) {
+	    ipl_block.fcp.opt == DIAG308_IPL_OPT_DUMP) {
 		if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
 			memory_end_set = 1;
 	}
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 6403cc9952bff..3530b613234f1 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -78,7 +78,7 @@ struct ipl_parameter_block {
 		struct ipl_block_fcp fcp;
 		struct ipl_block_ccw ccw;
 		char raw[PAGE_SIZE - sizeof(struct ipl_list_hdr)];
-	} ipl_info;
+	};
 } __packed __aligned(PAGE_SIZE);
 
 struct save_area;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index cffe3374d201d..d452f403a4293 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -248,7 +248,7 @@ static __init enum ipl_type get_ipl_type(void)
 	case DIAG308_IPL_TYPE_CCW:
 		return IPL_TYPE_CCW;
 	case DIAG308_IPL_TYPE_FCP:
-		if (ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
+		if (ipl_block.fcp.opt == DIAG308_IPL_OPT_DUMP)
 			return IPL_TYPE_FCP_DUMP;
 		else
 			return IPL_TYPE_FCP;
@@ -285,12 +285,11 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
 {
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
-		return sprintf(page, "0.%x.%04x\n", ipl_block.ipl_info.ccw.ssid,
-			       ipl_block.ipl_info.ccw.devno);
+		return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+			       ipl_block.ccw.devno);
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
-		return sprintf(page, "0.0.%04x\n",
-			       ipl_block.ipl_info.fcp.devno);
+		return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
 	default:
 		return 0;
 	}
@@ -314,8 +313,8 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *attr, char *buf,
 				 loff_t off, size_t count)
 {
-	unsigned int size = ipl_block.ipl_info.fcp.scp_data_len;
-	void *scp_data = &ipl_block.ipl_info.fcp.scp_data;
+	unsigned int size = ipl_block.fcp.scp_data_len;
+	void *scp_data = &ipl_block.fcp.scp_data;
 
 	return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
@@ -331,13 +330,13 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = {
 /* FCP ipl device attributes */
 
 DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
-		   (unsigned long long)ipl_block.ipl_info.fcp.wwpn);
+		   (unsigned long long)ipl_block.fcp.wwpn);
 DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
-		   (unsigned long long)ipl_block.ipl_info.fcp.lun);
+		   (unsigned long long)ipl_block.fcp.lun);
 DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
-		   (unsigned long long)ipl_block.ipl_info.fcp.bootprog);
+		   (unsigned long long)ipl_block.fcp.bootprog);
 DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
-		   (unsigned long long)ipl_block.ipl_info.fcp.br_lba);
+		   (unsigned long long)ipl_block.fcp.br_lba);
 
 static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
 				     struct kobj_attribute *attr, char *page)
@@ -493,14 +492,14 @@ static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
 		if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
 			return -EINVAL;
 
-	memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
-	ipb->ipl_info.ccw.vm_parm_len = ip_len;
+	memset(ipb->ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+	ipb->ccw.vm_parm_len = ip_len;
 	if (ip_len > 0) {
-		ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
-		memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
-		ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+		ipb->ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+		memcpy(ipb->ccw.vm_parm, buf, ip_len);
+		ASCEBC(ipb->ccw.vm_parm, ip_len);
 	} else {
-		ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+		ipb->ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
 	}
 
 	return len;
@@ -547,8 +546,8 @@ static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
 				      struct bin_attribute *attr,
 				      char *buf, loff_t off, size_t count)
 {
-	size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
-	void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+	size_t size = reipl_block_fcp->fcp.scp_data_len;
+	void *scp_data = reipl_block_fcp->fcp.scp_data;
 
 	return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
@@ -564,15 +563,15 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
 	if (off)
 		return -EINVAL;
 
-	memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count);
+	memcpy(reipl_block_fcp->fcp.scp_data, buf, count);
 	if (scpdata_len % 8) {
 		padding = 8 - (scpdata_len % 8);
-		memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+		memset(reipl_block_fcp->fcp.scp_data + scpdata_len,
 		       0, padding);
 		scpdata_len += padding;
 	}
 
-	reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
+	reipl_block_fcp->fcp.scp_data_len = scpdata_len;
 	reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
 	reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
 
@@ -588,15 +587,15 @@ static struct bin_attribute *reipl_fcp_bin_attrs[] = {
 };
 
 DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
-		   reipl_block_fcp->ipl_info.fcp.wwpn);
+		   reipl_block_fcp->fcp.wwpn);
 DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
-		   reipl_block_fcp->ipl_info.fcp.lun);
+		   reipl_block_fcp->fcp.lun);
 DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
-		   reipl_block_fcp->ipl_info.fcp.bootprog);
+		   reipl_block_fcp->fcp.bootprog);
 DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
-		   reipl_block_fcp->ipl_info.fcp.br_lba);
+		   reipl_block_fcp->fcp.br_lba);
 DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
-		   reipl_block_fcp->ipl_info.fcp.devno);
+		   reipl_block_fcp->fcp.devno);
 
 static void reipl_get_ascii_loadparm(char *loadparm,
 				     struct ipl_parameter_block *ibp)
@@ -678,7 +677,7 @@ static struct attribute_group reipl_fcp_attr_group = {
 };
 
 /* CCW reipl device attributes */
-DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw);
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
 
 /* NSS wrapper */
 static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
@@ -740,7 +739,7 @@ static struct attribute_group reipl_ccw_attr_group_lpar = {
 static void reipl_get_ascii_nss_name(char *dst,
 				     struct ipl_parameter_block *ipb)
 {
-	memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+	memcpy(dst, ipb->ccw.nss_name, NSS_NAME_SIZE);
 	EBCASC(dst, NSS_NAME_SIZE);
 	dst[NSS_NAME_SIZE] = 0;
 }
@@ -768,15 +767,15 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj,
 	if (nss_len > NSS_NAME_SIZE)
 		return -EINVAL;
 
-	memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+	memset(reipl_block_nss->ccw.nss_name, 0x40, NSS_NAME_SIZE);
 	if (nss_len > 0) {
-		reipl_block_nss->ipl_info.ccw.vm_flags |=
+		reipl_block_nss->ccw.vm_flags |=
 			DIAG308_VM_FLAGS_NSS_VALID;
-		memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
-		ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
-		EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+		memcpy(reipl_block_nss->ccw.nss_name, buf, nss_len);
+		ASCEBC(reipl_block_nss->ccw.nss_name, nss_len);
+		EBC_TOUPPER(reipl_block_nss->ccw.nss_name, nss_len);
 	} else {
-		reipl_block_nss->ipl_info.ccw.vm_flags &=
+		reipl_block_nss->ccw.vm_flags &=
 			~DIAG308_VM_FLAGS_NSS_VALID;
 	}
 
@@ -916,13 +915,12 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
 
 	/* VM PARM */
 	if (MACHINE_IS_VM && ipl_block_valid &&
-	    (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+	    (ipl_block.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
 
-		ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
-		ipb->ipl_info.ccw.vm_parm_len =
-					ipl_block.ipl_info.ccw.vm_parm_len;
-		memcpy(ipb->ipl_info.ccw.vm_parm,
-		       ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+		ipb->ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+		ipb->ccw.vm_parm_len = ipl_block.ccw.vm_parm_len;
+		memcpy(ipb->ccw.vm_parm,
+		       ipl_block.ccw.vm_parm, DIAG308_VMPARM_SIZE);
 	}
 }
 
@@ -962,8 +960,8 @@ static int __init reipl_ccw_init(void)
 
 	reipl_block_ccw_init(reipl_block_ccw);
 	if (ipl_info.type == IPL_TYPE_CCW) {
-		reipl_block_ccw->ipl_info.ccw.ssid = ipl_block.ipl_info.ccw.ssid;
-		reipl_block_ccw->ipl_info.ccw.devno = ipl_block.ipl_info.ccw.devno;
+		reipl_block_ccw->ccw.ssid = ipl_block.ccw.ssid;
+		reipl_block_ccw->ccw.devno = ipl_block.ccw.devno;
 		reipl_block_ccw_fill_parms(reipl_block_ccw);
 	}
 
@@ -1008,7 +1006,7 @@ static int __init reipl_fcp_init(void)
 		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
 		reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
 		reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
-		reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+		reipl_block_fcp->fcp.opt = DIAG308_IPL_OPT_IPL;
 	}
 	reipl_capabilities |= IPL_TYPE_FCP;
 	return 0;
@@ -1074,15 +1072,15 @@ static struct shutdown_action __refdata reipl_action = {
 /* FCP dump device attributes */
 
 DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
-		   dump_block_fcp->ipl_info.fcp.wwpn);
+		   dump_block_fcp->fcp.wwpn);
 DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
-		   dump_block_fcp->ipl_info.fcp.lun);
+		   dump_block_fcp->fcp.lun);
 DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
-		   dump_block_fcp->ipl_info.fcp.bootprog);
+		   dump_block_fcp->fcp.bootprog);
 DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
-		   dump_block_fcp->ipl_info.fcp.br_lba);
+		   dump_block_fcp->fcp.br_lba);
 DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
-		   dump_block_fcp->ipl_info.fcp.devno);
+		   dump_block_fcp->fcp.devno);
 
 static struct attribute *dump_fcp_attrs[] = {
 	&sys_dump_fcp_device_attr.attr,
@@ -1099,7 +1097,7 @@ static struct attribute_group dump_fcp_attr_group = {
 };
 
 /* CCW dump device attributes */
-DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw);
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
 
 static struct attribute *dump_ccw_attrs[] = {
 	&sys_dump_ccw_device_attr.attr,
@@ -1219,7 +1217,7 @@ static int __init dump_fcp_init(void)
 	dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
 	dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
 	dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
-	dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+	dump_block_fcp->fcp.opt = DIAG308_IPL_OPT_DUMP;
 	dump_capabilities |= DUMP_TYPE_FCP;
 	return 0;
 }
@@ -1663,15 +1661,15 @@ void __init setup_ipl(void)
 	ipl_info.type = get_ipl_type();
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
-		ipl_info.data.ccw.dev_id.ssid = ipl_block.ipl_info.ccw.ssid;
-		ipl_info.data.ccw.dev_id.devno = ipl_block.ipl_info.ccw.devno;
+		ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid;
+		ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno;
 		break;
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
 		ipl_info.data.fcp.dev_id.ssid = 0;
-		ipl_info.data.fcp.dev_id.devno = ipl_block.ipl_info.fcp.devno;
-		ipl_info.data.fcp.wwpn = ipl_block.ipl_info.fcp.wwpn;
-		ipl_info.data.fcp.lun = ipl_block.ipl_info.fcp.lun;
+		ipl_info.data.fcp.dev_id.devno = ipl_block.fcp.devno;
+		ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
+		ipl_info.data.fcp.lun = ipl_block.fcp.lun;
 		break;
 	case IPL_TYPE_NSS:
 	case IPL_TYPE_UNKNOWN:
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
index 411838c0a0af9..41613c1617ffc 100644
--- a/arch/s390/kernel/ipl_vmparm.c
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -11,11 +11,11 @@ size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
 	char has_lowercase = 0;
 
 	len = 0;
-	if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
-	    (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+	if ((ipb->ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+	    (ipb->ccw.vm_parm_len > 0)) {
 
-		len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
-		memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+		len = min_t(size_t, size - 1, ipb->ccw.vm_parm_len);
+		memcpy(dest, ipb->ccw.vm_parm, len);
 		/* If at least one character is lowercase, we assume mixed
 		 * case; otherwise we convert everything to lowercase.
 		 */
-- 
GitLab


From 5f1207fbe74450eb887155ba432bfc079312b0fe Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Feb 2019 14:26:51 +0100
Subject: [PATCH 1576/1861] s390/ipl: provide uapi header for list directed IPL

The IPL parameter block is used as an interface between Linux and
the machine to query and change the boot device and boot options.
To be able to create IPL parameter block in user space and pass it
as segment to kexec provide an uapi header with proper structure
definitions for the block.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/ipl_parm.c        |  10 +--
 arch/s390/include/asm/ipl.h      | 103 +++++++------------------------
 arch/s390/include/uapi/asm/ipl.h |  94 ++++++++++++++++++++++++++++
 arch/s390/kernel/ipl.c           |  80 ++++++++++++------------
 arch/s390/kernel/ipl_vmparm.c    |   2 +-
 5 files changed, 161 insertions(+), 128 deletions(-)
 create mode 100644 arch/s390/include/uapi/asm/ipl.h

diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 849cf786db91c..96777092fb14d 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -106,12 +106,12 @@ static void append_ipl_block_parm(void)
 	delim = early_command_line + len;    /* '\0' character position */
 	parm = early_command_line + len + 1; /* append right after '\0' */
 
-	switch (ipl_block.hdr.pbt) {
-	case DIAG308_IPL_TYPE_CCW:
+	switch (ipl_block.pb0_hdr.pbt) {
+	case IPL_PBT_CCW:
 		rc = ipl_block_get_ascii_vmparm(
 			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
 		break;
-	case DIAG308_IPL_TYPE_FCP:
+	case IPL_PBT_FCP:
 		rc = ipl_block_get_ascii_scpdata(
 			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
 		break;
@@ -238,8 +238,8 @@ void setup_memory_end(void)
 {
 #ifdef CONFIG_CRASH_DUMP
 	if (!OLDMEM_BASE && ipl_block_valid &&
-	    ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP &&
-	    ipl_block.fcp.opt == DIAG308_IPL_OPT_DUMP) {
+	    ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
+	    ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) {
 		if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
 			memory_end_set = 1;
 	}
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 3530b613234f1..878f6fd5f2e74 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -12,74 +12,34 @@
 #include <asm/types.h>
 #include <asm/cio.h>
 #include <asm/setup.h>
+#include <uapi/asm/ipl.h>
 
-#define NSS_NAME_SIZE	8
-
-#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
-			      sizeof(struct ipl_block_fcp))
-
-#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 16)
+struct ipl_parameter_block {
+	struct ipl_pl_hdr hdr;
+	union {
+		struct ipl_pb_hdr pb0_hdr;
+		struct ipl_pb0_common common;
+		struct ipl_pb0_fcp fcp;
+		struct ipl_pb0_ccw ccw;
+		char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)];
+	};
+} __packed __aligned(PAGE_SIZE);
 
-#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
-			      sizeof(struct ipl_block_ccw))
+#define NSS_NAME_SIZE 8
 
-#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 16)
+#define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \
+			      sizeof(struct ipl_pb0_fcp))
+#define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp))
+#define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \
+			      sizeof(struct ipl_pb0_ccw))
+#define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw))
 
 #define IPL_MAX_SUPPORTED_VERSION (0)
 
-struct ipl_list_hdr {
-	u32 len;
-	u8  reserved1[3];
-	u8  version;
-	u32 blk0_len;
-	u8  pbt;
-	u8  flags;
-	u16 reserved2;
-	u8  loadparm[8];
-} __attribute__((packed));
-
-struct ipl_block_fcp {
-	u8  reserved1[305-1];
-	u8  opt;
-	u8  reserved2[3];
-	u16 reserved3;
-	u16 devno;
-	u8  reserved4[4];
-	u64 wwpn;
-	u64 lun;
-	u32 bootprog;
-	u8  reserved5[12];
-	u64 br_lba;
-	u32 scp_data_len;
-	u8  reserved6[260];
-	u8  scp_data[];
-} __attribute__((packed));
-
-#define DIAG308_VMPARM_SIZE	64
-#define DIAG308_SCPDATA_SIZE	(PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
-				 offsetof(struct ipl_block_fcp, scp_data)))
-
-struct ipl_block_ccw {
-	u8  reserved1[84];
-	u16 reserved2 : 13;
-	u8  ssid : 3;
-	u16 devno;
-	u8  vm_flags;
-	u8  reserved3[3];
-	u32 vm_parm_len;
-	u8  nss_name[8];
-	u8  vm_parm[DIAG308_VMPARM_SIZE];
-	u8  reserved4[8];
-} __attribute__((packed));
-
-struct ipl_parameter_block {
-	struct ipl_list_hdr hdr;
-	union {
-		struct ipl_block_fcp fcp;
-		struct ipl_block_ccw ccw;
-		char raw[PAGE_SIZE - sizeof(struct ipl_list_hdr)];
-	};
-} __packed __aligned(PAGE_SIZE);
+#define DIAG308_VMPARM_SIZE (64)
+#define DIAG308_SCPDATA_OFFSET offsetof(struct ipl_parameter_block, \
+					fcp.scp_data)
+#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - DIAG308_SCPDATA_OFFSET)
 
 struct save_area;
 struct save_area * __init save_area_alloc(bool is_boot_cpu);
@@ -132,25 +92,6 @@ enum diag308_subcode  {
 	DIAG308_STORE = 6,
 };
 
-enum diag308_ipl_type {
-	DIAG308_IPL_TYPE_FCP	= 0,
-	DIAG308_IPL_TYPE_CCW	= 2,
-};
-
-enum diag308_opt {
-	DIAG308_IPL_OPT_IPL	= 0x10,
-	DIAG308_IPL_OPT_DUMP	= 0x20,
-};
-
-enum diag308_flags {
-	DIAG308_FLAGS_LP_VALID	= 0x80,
-};
-
-enum diag308_vm_flags {
-	DIAG308_VM_FLAGS_NSS_VALID	= 0x80,
-	DIAG308_VM_FLAGS_VP_VALID	= 0x40,
-};
-
 enum diag308_rc {
 	DIAG308_RC_OK		= 0x0001,
 	DIAG308_RC_NOCONFIG	= 0x0102,
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
new file mode 100644
index 0000000000000..3b513b39fca05
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_UAPI_IPL_H
+#define _ASM_S390_UAPI_IPL_H
+
+#include <linux/types.h>
+
+/* IPL Parameter List header */
+struct ipl_pl_hdr {
+	__u32 len;
+	__u8  reserved1[3];
+	__u8  version;
+} __packed;
+
+/* IPL Parameter Block header */
+struct ipl_pb_hdr {
+	__u32 len;
+	__u8  pbt;
+} __packed;
+
+/* IPL Parameter Block types */
+enum ipl_pbt {
+	IPL_PBT_FCP = 0,
+	IPL_PBT_SCP_DATA = 1,
+	IPL_PBT_CCW = 2,
+};
+
+/* IPL Parameter Block 0 with common fields */
+struct ipl_pb0_common {
+	__u32 len;
+	__u8  pbt;
+	__u8  flags;
+	__u8  reserved1[2];
+	__u8  loadparm[8];
+	__u8  reserved2[84];
+} __packed;
+
+#define IPL_PB0_FLAG_LOADPARM	0x80
+
+/* IPL Parameter Block 0 for FCP */
+struct ipl_pb0_fcp {
+	__u32 len;
+	__u8  pbt;
+	__u8  reserved1[3];
+	__u8  loadparm[8];
+	__u8  reserved2[304];
+	__u8  opt;
+	__u8  reserved3[3];
+	__u8  cssid;
+	__u8  reserved4[1];
+	__u16 devno;
+	__u8  reserved5[4];
+	__u64 wwpn;
+	__u64 lun;
+	__u32 bootprog;
+	__u8  reserved6[12];
+	__u64 br_lba;
+	__u32 scp_data_len;
+	__u8  reserved7[260];
+	__u8  scp_data[];
+} __packed;
+
+#define IPL_PB0_FCP_OPT_IPL	0x10
+#define IPL_PB0_FCP_OPT_DUMP	0x20
+
+/* IPL Parameter Block 0 for CCW */
+struct ipl_pb0_ccw {
+	__u32 len;
+	__u8  pbt;
+	__u8  flags;
+	__u8  reserved1[2];
+	__u8  loadparm[8];
+	__u8  reserved2[84];
+	__u16 reserved3 : 13;
+	__u8  ssid : 3;
+	__u16 devno;
+	__u8  vm_flags;
+	__u8  reserved4[3];
+	__u32 vm_parm_len;
+	__u8  nss_name[8];
+	__u8  vm_parm[64];
+	__u8  reserved5[8];
+} __packed;
+
+#define IPL_PB0_CCW_VM_FLAG_NSS		0x80
+#define IPL_PB0_CCW_VM_FLAG_VP		0x40
+
+/* IPL Parameter Block 1 for additional SCP data */
+struct ipl_pb1_scp_data {
+	__u32 len;
+	__u8  pbt;
+	__u8  scp_data[];
+} __packed;
+
+#endif
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index d452f403a4293..f9718bc67cd4e 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -244,11 +244,11 @@ static __init enum ipl_type get_ipl_type(void)
 	if (!ipl_block_valid)
 		return IPL_TYPE_UNKNOWN;
 
-	switch (ipl_block.hdr.pbt) {
-	case DIAG308_IPL_TYPE_CCW:
+	switch (ipl_block.pb0_hdr.pbt) {
+	case IPL_PBT_CCW:
 		return IPL_TYPE_CCW;
-	case DIAG308_IPL_TYPE_FCP:
-		if (ipl_block.fcp.opt == DIAG308_IPL_OPT_DUMP)
+	case IPL_PBT_FCP:
+		if (ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
 			return IPL_TYPE_FCP_DUMP;
 		else
 			return IPL_TYPE_FCP;
@@ -272,7 +272,7 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj,
 {
 	char parm[DIAG308_VMPARM_SIZE + 1] = {};
 
-	if (ipl_block_valid && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+	if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
 		ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
 	return sprintf(page, "%s\n", parm);
 }
@@ -495,11 +495,11 @@ static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
 	memset(ipb->ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
 	ipb->ccw.vm_parm_len = ip_len;
 	if (ip_len > 0) {
-		ipb->ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+		ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
 		memcpy(ipb->ccw.vm_parm, buf, ip_len);
 		ASCEBC(ipb->ccw.vm_parm, ip_len);
 	} else {
-		ipb->ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+		ipb->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_VP;
 	}
 
 	return len;
@@ -571,9 +571,9 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
 		scpdata_len += padding;
 	}
 
+	reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+	reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN + scpdata_len;
 	reipl_block_fcp->fcp.scp_data_len = scpdata_len;
-	reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
-	reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
 
 	return count;
 }
@@ -600,7 +600,7 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
 static void reipl_get_ascii_loadparm(char *loadparm,
 				     struct ipl_parameter_block *ibp)
 {
-	memcpy(loadparm, ibp->hdr.loadparm, LOADPARM_LEN);
+	memcpy(loadparm, ibp->common.loadparm, LOADPARM_LEN);
 	EBCASC(loadparm, LOADPARM_LEN);
 	loadparm[LOADPARM_LEN] = 0;
 	strim(loadparm);
@@ -635,11 +635,11 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
 		return -EINVAL;
 	}
 	/* initialize loadparm with blanks */
-	memset(ipb->hdr.loadparm, ' ', LOADPARM_LEN);
+	memset(ipb->common.loadparm, ' ', LOADPARM_LEN);
 	/* copy and convert to ebcdic */
-	memcpy(ipb->hdr.loadparm, buf, lp_len);
-	ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
-	ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID;
+	memcpy(ipb->common.loadparm, buf, lp_len);
+	ASCEBC(ipb->common.loadparm, LOADPARM_LEN);
+	ipb->common.flags |= IPL_PB0_FLAG_LOADPARM;
 	return len;
 }
 
@@ -769,14 +769,12 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj,
 
 	memset(reipl_block_nss->ccw.nss_name, 0x40, NSS_NAME_SIZE);
 	if (nss_len > 0) {
-		reipl_block_nss->ccw.vm_flags |=
-			DIAG308_VM_FLAGS_NSS_VALID;
+		reipl_block_nss->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_NSS;
 		memcpy(reipl_block_nss->ccw.nss_name, buf, nss_len);
 		ASCEBC(reipl_block_nss->ccw.nss_name, nss_len);
 		EBC_TOUPPER(reipl_block_nss->ccw.nss_name, nss_len);
 	} else {
-		reipl_block_nss->ccw.vm_flags &=
-			~DIAG308_VM_FLAGS_NSS_VALID;
+		reipl_block_nss->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_NSS;
 	}
 
 	return len;
@@ -896,10 +894,10 @@ static void reipl_run(struct shutdown_trigger *trigger)
 
 static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
 {
-	ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	ipb->hdr.len = IPL_BP_CCW_LEN;
 	ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
-	ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
-	ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+	ipb->pb0_hdr.len = IPL_BP0_CCW_LEN;
+	ipb->pb0_hdr.pbt = IPL_PBT_CCW;
 }
 
 static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
@@ -907,17 +905,17 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
 	/* LOADPARM */
 	/* check if read scp info worked and set loadparm */
 	if (sclp_ipl_info.is_valid)
-		memcpy(ipb->hdr.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+		memcpy(ipb->ccw.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
 	else
 		/* read scp info failed: set empty loadparm (EBCDIC blanks) */
-		memset(ipb->hdr.loadparm, 0x40, LOADPARM_LEN);
-	ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+		memset(ipb->ccw.loadparm, 0x40, LOADPARM_LEN);
+	ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
 
 	/* VM PARM */
 	if (MACHINE_IS_VM && ipl_block_valid &&
-	    (ipl_block.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+	    (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
 
-		ipb->ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+		ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
 		ipb->ccw.vm_parm_len = ipl_block.ccw.vm_parm_len;
 		memcpy(ipb->ccw.vm_parm,
 		       ipl_block.ccw.vm_parm, DIAG308_VMPARM_SIZE);
@@ -999,14 +997,14 @@ static int __init reipl_fcp_init(void)
 		 * is invalid in the SCSI IPL parameter block, so take it
 		 * always from sclp_ipl_info.
 		 */
-		memcpy(reipl_block_fcp->hdr.loadparm, sclp_ipl_info.loadparm,
+		memcpy(reipl_block_fcp->fcp.loadparm, sclp_ipl_info.loadparm,
 		       LOADPARM_LEN);
 	} else {
-		reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+		reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN;
 		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
-		reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
-		reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
-		reipl_block_fcp->fcp.opt = DIAG308_IPL_OPT_IPL;
+		reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+		reipl_block_fcp->fcp.pbt = IPL_PBT_FCP;
+		reipl_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_IPL;
 	}
 	reipl_capabilities |= IPL_TYPE_FCP;
 	return 0;
@@ -1024,10 +1022,10 @@ static int __init reipl_type_init(void)
 	/*
 	 * If we have an OS info reipl block, this will be used
 	 */
-	if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+	if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
 		memcpy(reipl_block_fcp, reipl_block, size);
 		reipl_type = IPL_TYPE_FCP;
-	} else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
 		memcpy(reipl_block_ccw, reipl_block, size);
 		reipl_type = IPL_TYPE_CCW;
 	}
@@ -1191,10 +1189,10 @@ static int __init dump_ccw_init(void)
 		free_page((unsigned long)dump_block_ccw);
 		return rc;
 	}
-	dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	dump_block_ccw->hdr.len = IPL_BP_CCW_LEN;
 	dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
-	dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
-	dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+	dump_block_ccw->ccw.len = IPL_BP0_CCW_LEN;
+	dump_block_ccw->ccw.pbt = IPL_PBT_CCW;
 	dump_capabilities |= DUMP_TYPE_CCW;
 	return 0;
 }
@@ -1213,11 +1211,11 @@ static int __init dump_fcp_init(void)
 		free_page((unsigned long)dump_block_fcp);
 		return rc;
 	}
-	dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+	dump_block_fcp->hdr.len = IPL_BP_FCP_LEN;
 	dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
-	dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
-	dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
-	dump_block_fcp->fcp.opt = DIAG308_IPL_OPT_DUMP;
+	dump_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+	dump_block_fcp->fcp.pbt = IPL_PBT_FCP;
+	dump_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_DUMP;
 	dump_capabilities |= DUMP_TYPE_FCP;
 	return 0;
 }
@@ -1576,7 +1574,7 @@ static int __init s390_ipl_init(void)
 	 * READ SCP info provides the correct value.
 	 */
 	if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
-		memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm, LOADPARM_LEN);
+		memcpy(sclp_ipl_info.loadparm, ipl_block.ccw.loadparm, LOADPARM_LEN);
 	shutdown_actions_init();
 	shutdown_triggers_init();
 	return 0;
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
index 41613c1617ffc..af43535a976df 100644
--- a/arch/s390/kernel/ipl_vmparm.c
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -11,7 +11,7 @@ size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
 	char has_lowercase = 0;
 
 	len = 0;
-	if ((ipb->ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+	if ((ipb->ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP) &&
 	    (ipb->ccw.vm_parm_len > 0)) {
 
 		len = min_t(size_t, size - 1, ipb->ccw.vm_parm_len);
-- 
GitLab


From d29af5b7a886033e6a4eb5f0a9a25cd00da63ae8 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 20 Feb 2019 16:14:16 +0100
Subject: [PATCH 1577/1861] s390/ipl: add definitions for the IPL report block

To transport the information required for secure boot a new IPL report
will be created at boot time. It will be written to memory right after
the IPL parameter block. To work with the IPL report a couple of
additional structure definitions are added the the uapi/ipl.h header.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/uapi/asm/ipl.h | 62 +++++++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index 3b513b39fca05..fd32b1cd80d29 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -7,10 +7,15 @@
 /* IPL Parameter List header */
 struct ipl_pl_hdr {
 	__u32 len;
-	__u8  reserved1[3];
+	__u8  flags;
+	__u8  reserved1[2];
 	__u8  version;
 } __packed;
 
+#define IPL_PL_FLAG_IPLPS	0x80
+#define IPL_PL_FLAG_SIPL	0x40
+#define IPL_PL_FLAG_IPLSR	0x20
+
 /* IPL Parameter Block header */
 struct ipl_pb_hdr {
 	__u32 len;
@@ -91,4 +96,59 @@ struct ipl_pb1_scp_data {
 	__u8  scp_data[];
 } __packed;
 
+/* IPL Report List header */
+struct ipl_rl_hdr {
+	__u32 len;
+	__u8  flags;
+	__u8  reserved1[2];
+	__u8  version;
+	__u8  reserved2[8];
+} __packed;
+
+/* IPL Report Block header */
+struct ipl_rb_hdr {
+	__u32 len;
+	__u8  rbt;
+	__u8  reserved1[11];
+} __packed;
+
+/* IPL Report Block types */
+enum ipl_rbt {
+	IPL_RBT_CERTIFICATES = 1,
+	IPL_RBT_COMPONENTS = 2,
+};
+
+/* IPL Report Block for the certificate list */
+struct ipl_rb_certificate_entry {
+	__u64 addr;
+	__u64 len;
+} __packed;
+
+struct ipl_rb_certificates {
+	__u32 len;
+	__u8  rbt;
+	__u8  reserved1[11];
+	struct ipl_rb_certificate_entry entries[];
+} __packed;
+
+/* IPL Report Block for the component list */
+struct ipl_rb_component_entry {
+	__u64 addr;
+	__u64 len;
+	__u8  flags;
+	__u8  reserved1[5];
+	__u16 certificate_index;
+	__u8  reserved2[8];
+};
+
+#define IPL_RB_COMPONENT_FLAG_SIGNED	0x80
+#define IPL_RB_COMPONENT_FLAG_VERIFIED	0x40
+
+struct ipl_rb_components {
+	__u32 len;
+	__u8  rbt;
+	__u8  reserved1[11];
+	struct ipl_rb_component_entry entries[];
+} __packed;
+
 #endif
-- 
GitLab


From 9641b8cc733f70a5400aa7e6831de4542c46a94c Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 21 Feb 2019 14:23:04 +0100
Subject: [PATCH 1578/1861] s390/ipl: read IPL report at early boot

Read the IPL Report block provided by secure-boot, add the entries
of the certificate list to the system key ring and print the list
of components.

PR: Adjust to Vasilys bootdata_preserved patch set. Preserve ipl_cert_list
for later use in kexec_file.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/Makefile                       |   5 +-
 arch/s390/boot/boot.h                         |   2 +
 arch/s390/boot/ipl_report.c                   | 165 ++++++++++++++++++
 arch/s390/boot/startup.c                      |  18 +-
 arch/s390/include/asm/boot_data.h             |   7 +
 arch/s390/kernel/ipl.c                        |  19 ++
 arch/s390/kernel/setup.c                      |  45 +++++
 security/integrity/Kconfig                    |  11 +-
 security/integrity/Makefile                   |   8 +-
 .../integrity/platform_certs/load_ipl_s390.c  |  36 ++++
 10 files changed, 300 insertions(+), 16 deletions(-)
 create mode 100644 arch/s390/boot/ipl_report.c
 create mode 100644 security/integrity/platform_certs/load_ipl_s390.c

diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index a5ae68b2aa844..1f8fd68beae38 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -28,8 +28,9 @@ endif
 
 CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
 
-obj-y	:= head.o als.o startup.o mem_detect.o ipl_parm.o string.o ebcdic.o
-obj-y	+= sclp_early_core.o mem.o ipl_vmparm.o cmdline.o ctype.o
+obj-y	:= head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
+obj-y	+= string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
+obj-y	+= ctype.o
 obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST)	+= uv.o
 targets	:= bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
 subdir-	:= compressed
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 82bc06346e058..ca395fcff15ef 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -10,4 +10,6 @@ void parse_boot_command_line(void);
 void setup_memory_end(void);
 void print_missing_facilities(void);
 
+unsigned long read_ipl_report(unsigned long safe_offset);
+
 #endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
new file mode 100644
index 0000000000000..0b4965573656f
--- /dev/null
+++ b/arch/s390/boot/ipl_report.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
+#include <uapi/asm/ipl.h>
+#include "boot.h"
+
+int __bootdata_preserved(ipl_secure_flag);
+
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
+
+#define for_each_rb_entry(entry, rb) \
+	for (entry = rb->entries; \
+	     (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
+	     entry++)
+
+static inline bool intersects(unsigned long addr0, unsigned long size0,
+			      unsigned long addr1, unsigned long size1)
+{
+	return addr0 + size0 > addr1 && addr1 + size1 > addr0;
+}
+
+static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
+					 struct ipl_rb_certificates *certs,
+					 unsigned long safe_addr)
+{
+	struct ipl_rb_certificate_entry *cert;
+	struct ipl_rb_component_entry *comp;
+	size_t size;
+
+	/*
+	 * Find the length for the IPL report boot data
+	 */
+	early_ipl_comp_list_size = 0;
+	for_each_rb_entry(comp, comps)
+		early_ipl_comp_list_size += sizeof(*comp);
+	ipl_cert_list_size = 0;
+	for_each_rb_entry(cert, certs)
+		ipl_cert_list_size += sizeof(unsigned int) + cert->len;
+	size = ipl_cert_list_size + early_ipl_comp_list_size;
+
+	/*
+	 * Start from safe_addr to find a free memory area large
+	 * enough for the IPL report boot data. This area is used
+	 * for ipl_cert_list_addr/ipl_cert_list_size and
+	 * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must
+	 * not overlap with any component or any certificate.
+	 */
+repeat:
+	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+	    intersects(INITRD_START, INITRD_SIZE, safe_addr, size))
+		safe_addr = INITRD_START + INITRD_SIZE;
+	for_each_rb_entry(comp, comps)
+		if (intersects(safe_addr, size, comp->addr, comp->len)) {
+			safe_addr = comp->addr + comp->len;
+			goto repeat;
+		}
+	for_each_rb_entry(cert, certs)
+		if (intersects(safe_addr, size, cert->addr, cert->len)) {
+			safe_addr = cert->addr + cert->len;
+			goto repeat;
+		}
+	early_ipl_comp_list_addr = safe_addr;
+	ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size;
+
+	return safe_addr + size;
+}
+
+static void copy_components_bootdata(struct ipl_rb_components *comps)
+{
+	struct ipl_rb_component_entry *comp, *ptr;
+
+	ptr = (struct ipl_rb_component_entry *) early_ipl_comp_list_addr;
+	for_each_rb_entry(comp, comps)
+		memcpy(ptr++, comp, sizeof(*ptr));
+}
+
+static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
+{
+	struct ipl_rb_certificate_entry *cert;
+	void *ptr;
+
+	ptr = (void *) ipl_cert_list_addr;
+	for_each_rb_entry(cert, certs) {
+		*(unsigned int *) ptr = cert->len;
+		ptr += sizeof(unsigned int);
+		memcpy(ptr, (void *) cert->addr, cert->len);
+		ptr += cert->len;
+	}
+}
+
+unsigned long read_ipl_report(unsigned long safe_addr)
+{
+	struct ipl_rb_certificates *certs;
+	struct ipl_rb_components *comps;
+	struct ipl_pl_hdr *pl_hdr;
+	struct ipl_rl_hdr *rl_hdr;
+	struct ipl_rb_hdr *rb_hdr;
+	unsigned long tmp;
+	void *rl_end;
+
+	/*
+	 * Check if there is a IPL report by looking at the copy
+	 * of the IPL parameter information block.
+	 */
+	if (!ipl_block_valid ||
+	    !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
+		return safe_addr;
+	ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
+	/*
+	 * There is an IPL report, to find it load the pointer to the
+	 * IPL parameter information block from lowcore and skip past
+	 * the IPL parameter list, then align the address to a double
+	 * word boundary.
+	 */
+	tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr;
+	pl_hdr = (struct ipl_pl_hdr *) tmp;
+	tmp = (tmp + pl_hdr->len + 7) & -8UL;
+	rl_hdr = (struct ipl_rl_hdr *) tmp;
+	/* Walk through the IPL report blocks in the IPL Report list */
+	certs = NULL;
+	comps = NULL;
+	rl_end = (void *) rl_hdr + rl_hdr->len;
+	rb_hdr = (void *) rl_hdr + sizeof(*rl_hdr);
+	while ((void *) rb_hdr + sizeof(*rb_hdr) < rl_end &&
+	       (void *) rb_hdr + rb_hdr->len <= rl_end) {
+
+		switch (rb_hdr->rbt) {
+		case IPL_RBT_CERTIFICATES:
+			certs = (struct ipl_rb_certificates *) rb_hdr;
+			break;
+		case IPL_RBT_COMPONENTS:
+			comps = (struct ipl_rb_components *) rb_hdr;
+			break;
+		default:
+			break;
+		}
+
+		rb_hdr = (void *) rb_hdr + rb_hdr->len;
+	}
+
+	/*
+	 * With either the component list or the certificate list
+	 * missing the kernel will stay ignorant of secure IPL.
+	 */
+	if (!comps || !certs)
+		return safe_addr;
+
+	/*
+	 * Copy component and certificate list to a safe area
+	 * where the decompressed kernel can find them.
+	 */
+	safe_addr = find_bootdata_space(comps, certs, safe_addr);
+	copy_components_bootdata(comps);
+	copy_certificates_bootdata(certs);
+
+	return safe_addr;
+}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 2bd4a62d436c1..90898976a9410 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -25,19 +25,16 @@ unsigned long mem_safe_offset(void)
 }
 #endif
 
-static void rescue_initrd(void)
+static void rescue_initrd(unsigned long addr)
 {
-	unsigned long min_initrd_addr;
-
 	if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
 		return;
 	if (!INITRD_START || !INITRD_SIZE)
 		return;
-	min_initrd_addr = mem_safe_offset();
-	if (min_initrd_addr <= INITRD_START)
+	if (addr <= INITRD_START)
 		return;
-	memmove((void *)min_initrd_addr, (void *)INITRD_START, INITRD_SIZE);
-	INITRD_START = min_initrd_addr;
+	memmove((void *)addr, (void *)INITRD_START, INITRD_SIZE);
+	INITRD_START = addr;
 }
 
 static void copy_bootdata(void)
@@ -52,12 +49,15 @@ static void copy_bootdata(void)
 
 void startup_kernel(void)
 {
+	unsigned long safe_addr;
 	void *img;
 
+	store_ipl_parmblock();
+	safe_addr = mem_safe_offset();
+	safe_addr = read_ipl_report(safe_addr);
 	uv_query_info();
-	rescue_initrd();
+	rescue_initrd(safe_addr);
 	sclp_early_read_info();
-	store_ipl_parmblock();
 	setup_boot_command_line();
 	parse_boot_command_line();
 	setup_memory_end();
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index d794802a22917..f7eed27b3220f 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -7,5 +7,12 @@
 extern char early_command_line[COMMAND_LINE_SIZE];
 extern struct ipl_parameter_block ipl_block;
 extern int ipl_block_valid;
+extern int ipl_secure_flag;
+
+extern unsigned long ipl_cert_list_addr;
+extern unsigned long ipl_cert_list_size;
+
+extern unsigned long early_ipl_comp_list_addr;
+extern unsigned long early_ipl_comp_list_size;
 
 #endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f9718bc67cd4e..0567de4005b42 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -122,6 +122,13 @@ static char *dump_type_str(enum dump_type type)
 
 int __bootdata_preserved(ipl_block_valid);
 struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_secure_flag);
+
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
 
 static int reipl_capabilities = IPL_TYPE_UNKNOWN;
 
@@ -267,6 +274,15 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
 
 static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
 
+static ssize_t ipl_secure_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%i\n", !!ipl_secure_flag);
+}
+
+static struct kobj_attribute sys_ipl_secure_attr =
+	__ATTR(secure, 0444, ipl_secure_show, NULL);
+
 static ssize_t ipl_vm_parm_show(struct kobject *kobj,
 				struct kobj_attribute *attr, char *page)
 {
@@ -362,6 +378,7 @@ static struct attribute *ipl_fcp_attrs[] = {
 	&sys_ipl_fcp_bootprog_attr.attr,
 	&sys_ipl_fcp_br_lba_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_secure_attr.attr,
 	NULL,
 };
 
@@ -377,6 +394,7 @@ static struct attribute *ipl_ccw_attrs_vm[] = {
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
 	&sys_ipl_vm_parm_attr.attr,
+	&sys_ipl_secure_attr.attr,
 	NULL,
 };
 
@@ -384,6 +402,7 @@ static struct attribute *ipl_ccw_attrs_lpar[] = {
 	&sys_ipl_type_attr.attr,
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_secure_attr.attr,
 	NULL,
 };
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 12d136e567c48..ffc87520aca96 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -50,6 +50,7 @@
 #include <linux/compat.h>
 #include <linux/start_kernel.h>
 
+#include <asm/boot_data.h>
 #include <asm/ipl.h>
 #include <asm/facility.h>
 #include <asm/smp.h>
@@ -741,6 +742,15 @@ static void __init reserve_initrd(void)
 #endif
 }
 
+/*
+ * Reserve the memory area used to pass the certificate lists
+ */
+static void __init reserve_certificate_list(void)
+{
+	if (ipl_cert_list_addr)
+		memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
+}
+
 static void __init reserve_mem_detect_info(void)
 {
 	unsigned long start, size;
@@ -1035,6 +1045,38 @@ static void __init setup_control_program_code(void)
 	asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
 }
 
+/*
+ * Print the component list from the IPL report
+ */
+static void __init log_component_list(void)
+{
+	struct ipl_rb_component_entry *ptr, *end;
+	char *str;
+
+	if (!early_ipl_comp_list_addr)
+		return;
+	if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)
+		pr_info("Linux is running with Secure-IPL enabled\n");
+	else
+		pr_info("Linux is running with Secure-IPL disabled\n");
+	ptr = (void *) early_ipl_comp_list_addr;
+	end = (void *) ptr + early_ipl_comp_list_size;
+	pr_info("The IPL report contains the following components:\n");
+	while (ptr < end) {
+		if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
+			if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
+				str = "signed, verified";
+			else
+				str = "signed, verification failed";
+		} else {
+			str = "not signed";
+		}
+		pr_info("%016llx - %016llx (%s)\n",
+			ptr->addr, ptr->addr + ptr->len, str);
+		ptr++;
+	}
+}
+
 /*
  * Setup function called from init/main.c just after the banner
  * was printed.
@@ -1055,6 +1097,8 @@ void __init setup_arch(char **cmdline_p)
 	else
 		pr_info("Linux is running as a guest in 64-bit mode\n");
 
+	log_component_list();
+
 	/* Have one command line that is parsed and saved in /proc/cmdline */
 	/* boot_command_line has been already set up in early.c */
 	*cmdline_p = boot_command_line;
@@ -1086,6 +1130,7 @@ void __init setup_arch(char **cmdline_p)
 	reserve_oldmem();
 	reserve_kernel();
 	reserve_initrd();
+	reserve_certificate_list();
 	reserve_mem_detect_info();
 	memblock_allow_resize();
 
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index 2ea4ec9991d51..3ba1168b1756c 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -55,13 +55,22 @@ config INTEGRITY_PLATFORM_KEYRING
         bool "Provide keyring for platform/firmware trusted keys"
         depends on INTEGRITY_ASYMMETRIC_KEYS
         depends on SYSTEM_BLACKLIST_KEYRING
-        depends on EFI
         help
          Provide a separate, distinct keyring for platform trusted keys, which
          the kernel automatically populates during initialization from values
          provided by the platform for verifying the kexec'ed kerned image
          and, possibly, the initramfs signature.
 
+config LOAD_UEFI_KEYS
+       depends on INTEGRITY_PLATFORM_KEYRING
+       depends on EFI
+       def_bool y
+
+config LOAD_IPL_KEYS
+       depends on INTEGRITY_PLATFORM_KEYRING
+       depends on S390
+       def_bool y
+
 config INTEGRITY_AUDIT
 	bool "Enables integrity auditing support "
 	depends on AUDIT
diff --git a/security/integrity/Makefile b/security/integrity/Makefile
index 86df9aba8c0fe..19faace696441 100644
--- a/security/integrity/Makefile
+++ b/security/integrity/Makefile
@@ -9,10 +9,10 @@ integrity-y := iint.o
 integrity-$(CONFIG_INTEGRITY_AUDIT) += integrity_audit.o
 integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
 integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
-integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o \
-						  platform_certs/efi_parser.o \
-						  platform_certs/load_uefi.o
-obj-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/load_uefi.o
+integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o
+integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \
+					platform_certs/load_uefi.o
+integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o
 $(obj)/load_uefi.o: KBUILD_CFLAGS += -fshort-wchar
 
 subdir-$(CONFIG_IMA)			+= ima
diff --git a/security/integrity/platform_certs/load_ipl_s390.c b/security/integrity/platform_certs/load_ipl_s390.c
new file mode 100644
index 0000000000000..e769dcb7ea94e
--- /dev/null
+++ b/security/integrity/platform_certs/load_ipl_s390.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include <asm/boot_data.h>
+#include "../integrity.h"
+
+/*
+ * Load the certs contained in the IPL report created by the machine loader
+ * into the platform trusted keyring.
+ */
+static int __init load_ipl_certs(void)
+{
+	void *ptr, *end;
+	unsigned int len;
+
+	if (!ipl_cert_list_addr)
+		return 0;
+	/* Copy the certificates to the system keyring */
+	ptr = (void *) ipl_cert_list_addr;
+	end = ptr + ipl_cert_list_size;
+	while ((void *) ptr < end) {
+		len = *(unsigned int *) ptr;
+		ptr += sizeof(unsigned int);
+		add_to_platform_keyring("IPL:db", ptr, len);
+		ptr += len;
+	}
+	return 0;
+}
+late_initcall(load_ipl_certs);
-- 
GitLab


From 937347ac56bfca10c76153ac700e88a4b41f7130 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 25 Feb 2019 17:23:39 +0100
Subject: [PATCH 1579/1861] s390/ipl: add helper functions to create an IPL
 report

PR: Adjusted to the use in kexec_file later.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ipl.h |  27 ++++++++
 arch/s390/kernel/ipl.c      | 134 ++++++++++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+)

diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 878f6fd5f2e74..bf62af49af063 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -81,6 +81,33 @@ extern struct ipl_info ipl_info;
 extern void setup_ipl(void);
 extern void set_os_info_reipl_block(void);
 
+struct ipl_report {
+	struct ipl_parameter_block *ipib;
+	struct list_head components;
+	struct list_head certificates;
+	size_t size;
+};
+
+struct ipl_report_component {
+	struct list_head list;
+	struct ipl_rb_component_entry entry;
+};
+
+struct ipl_report_certificate {
+	struct list_head list;
+	struct ipl_rb_certificate_entry entry;
+	void *key;
+};
+
+struct kexec_buf;
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib);
+void *ipl_report_finish(struct ipl_report *report);
+int ipl_report_free(struct ipl_report *report);
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+			     unsigned char flags, unsigned short cert);
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+			       unsigned long addr, unsigned long len);
+
 /*
  * DIAG 308 support
  */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 0567de4005b42..6f2bb64cf70ea 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1705,3 +1705,137 @@ void s390_reset_system(void)
 	__ctl_clear_bit(0, 28);
 	diag308_reset();
 }
+
+#ifdef CONFIG_KEXEC_FILE
+
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+			     unsigned char flags, unsigned short cert)
+{
+	struct ipl_report_component *comp;
+
+	comp = vzalloc(sizeof(*comp));
+	if (!comp)
+		return -ENOMEM;
+	list_add_tail(&comp->list, &report->components);
+
+	comp->entry.addr = kbuf->mem;
+	comp->entry.len = kbuf->memsz;
+	comp->entry.flags = flags;
+	comp->entry.certificate_index = cert;
+
+	report->size += sizeof(comp->entry);
+
+	return 0;
+}
+
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+			       unsigned long addr, unsigned long len)
+{
+	struct ipl_report_certificate *cert;
+
+	cert = vzalloc(sizeof(*cert));
+	if (!cert)
+		return -ENOMEM;
+	list_add_tail(&cert->list, &report->certificates);
+
+	cert->entry.addr = addr;
+	cert->entry.len = len;
+	cert->key = key;
+
+	report->size += sizeof(cert->entry);
+	report->size += cert->entry.len;
+
+	return 0;
+}
+
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib)
+{
+	struct ipl_report *report;
+
+	report = vzalloc(sizeof(*report));
+	if (!report)
+		return ERR_PTR(-ENOMEM);
+
+	report->ipib = ipib;
+	INIT_LIST_HEAD(&report->components);
+	INIT_LIST_HEAD(&report->certificates);
+
+	report->size = ALIGN(ipib->hdr.len, 8);
+	report->size += sizeof(struct ipl_rl_hdr);
+	report->size += sizeof(struct ipl_rb_components);
+	report->size += sizeof(struct ipl_rb_certificates);
+
+	return report;
+}
+
+void *ipl_report_finish(struct ipl_report *report)
+{
+	struct ipl_report_certificate *cert;
+	struct ipl_report_component *comp;
+	struct ipl_rb_certificates *certs;
+	struct ipl_parameter_block *ipib;
+	struct ipl_rb_components *comps;
+	struct ipl_rl_hdr *rl_hdr;
+	void *buf, *ptr;
+
+	buf = vzalloc(report->size);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+	ptr = buf;
+
+	memcpy(ptr, report->ipib, report->ipib->hdr.len);
+	ipib = ptr;
+	if (ipl_secure_flag)
+		ipib->hdr.flags |= IPL_PL_FLAG_SIPL;
+	ipib->hdr.flags |= IPL_PL_FLAG_IPLSR;
+	ptr += report->ipib->hdr.len;
+	ptr = PTR_ALIGN(ptr, 8);
+
+	rl_hdr = ptr;
+	ptr += sizeof(*rl_hdr);
+
+	comps = ptr;
+	comps->rbt = IPL_RBT_COMPONENTS;
+	ptr += sizeof(*comps);
+	list_for_each_entry(comp, &report->components, list) {
+		memcpy(ptr, &comp->entry, sizeof(comp->entry));
+		ptr += sizeof(comp->entry);
+	}
+	comps->len = ptr - (void *)comps;
+
+	certs = ptr;
+	certs->rbt = IPL_RBT_CERTIFICATES;
+	ptr += sizeof(*certs);
+	list_for_each_entry(cert, &report->certificates, list) {
+		memcpy(ptr, &cert->entry, sizeof(cert->entry));
+		ptr += sizeof(cert->entry);
+	}
+	certs->len = ptr - (void *)certs;
+	rl_hdr->len = ptr - (void *)rl_hdr;
+
+	list_for_each_entry(cert, &report->certificates, list) {
+		memcpy(ptr, cert->key, cert->entry.len);
+		ptr += cert->entry.len;
+	}
+
+	BUG_ON(ptr > buf + report->size);
+	return buf;
+}
+
+int ipl_report_free(struct ipl_report *report)
+{
+	struct ipl_report_component *comp, *ncomp;
+	struct ipl_report_certificate *cert, *ncert;
+
+	list_for_each_entry_safe(comp, ncomp, &report->components, list)
+		vfree(comp);
+
+	list_for_each_entry_safe(cert, ncert, &report->certificates, list)
+		vfree(cert);
+
+	vfree(report);
+
+	return 0;
+}
+
+#endif
-- 
GitLab


From f6780686525cc69a16a893cac0dd6adfbf25b7ff Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 17 Apr 2019 16:32:27 +0200
Subject: [PATCH 1580/1861] s390/boot: pad bzImage to 4K

In order to be able to sign the bzImage independent of the block size
of the IPL device, align the bzImage to 4096 bytes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 1f8fd68beae38..c1993c57300fb 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -50,6 +50,7 @@ define cmd_section_cmp
 	touch $@
 endef
 
+OBJCOPYFLAGS_bzImage := --pad-to $$(readelf -s $(obj)/compressed/vmlinux | awk '/\<_end\>/ {print or(strtonum("0x"$$2),4095)+1}')
 $(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
 	$(call if_changed,objcopy)
 
-- 
GitLab


From f3df44e7c9869b7691a4a0b57fa39ca47060b424 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Thu, 14 Mar 2019 13:50:32 +0100
Subject: [PATCH 1581/1861] s390/zcore: Rename ipl_block to mitigate name
 collision

With git commit 1e941d39493f1820475d80729a03cd7ab8c3c86d
"s390: move ipl block to .boot.preserved.data section" the earl_ipl_block
got renamed to ipl_block and became publicly available via boot_data.h.
This might cause problems with zcore, which has it's own ipl_block
variable. Thus rename the ipl_block in zcore to prevent name collision
and highlight that it's only used locally.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Fixes: 1e941d39493f ("s390: move ipl block to .boot.preserved.data section")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/zcore.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 76d3c50bf078b..f75d3bfb5af35 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -51,7 +51,7 @@ static struct dentry *zcore_dir;
 static struct dentry *zcore_memmap_file;
 static struct dentry *zcore_reipl_file;
 static struct dentry *zcore_hsa_file;
-static struct ipl_parameter_block *ipl_block;
+static struct ipl_parameter_block *zcore_ipl_block;
 
 static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
 
@@ -182,8 +182,8 @@ static const struct file_operations zcore_memmap_fops = {
 static ssize_t zcore_reipl_write(struct file *filp, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
-	if (ipl_block) {
-		diag308(DIAG308_SET, ipl_block);
+	if (zcore_ipl_block) {
+		diag308(DIAG308_SET, zcore_ipl_block);
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 	}
 	return count;
@@ -265,18 +265,20 @@ static int __init zcore_reipl_init(void)
 		return rc;
 	if (ipib_info.ipib == 0)
 		return 0;
-	ipl_block = (void *) __get_free_page(GFP_KERNEL);
-	if (!ipl_block)
+	zcore_ipl_block = (void *) __get_free_page(GFP_KERNEL);
+	if (!zcore_ipl_block)
 		return -ENOMEM;
 	if (ipib_info.ipib < sclp.hsa_size)
-		rc = memcpy_hsa_kernel(ipl_block, ipib_info.ipib, PAGE_SIZE);
+		rc = memcpy_hsa_kernel(zcore_ipl_block, ipib_info.ipib,
+				       PAGE_SIZE);
 	else
-		rc = memcpy_real(ipl_block, (void *) ipib_info.ipib, PAGE_SIZE);
-	if (rc || (__force u32)csum_partial(ipl_block, ipl_block->hdr.len, 0) !=
+		rc = memcpy_real(zcore_ipl_block, (void *) ipib_info.ipib,
+				 PAGE_SIZE);
+	if (rc || (__force u32)csum_partial(zcore_ipl_block, zcore_ipl_block->hdr.len, 0) !=
 	    ipib_info.checksum) {
 		TRACE("Checksum does not match\n");
-		free_page((unsigned long) ipl_block);
-		ipl_block = NULL;
+		free_page((unsigned long) zcore_ipl_block);
+		zcore_ipl_block = NULL;
 	}
 	return 0;
 }
-- 
GitLab


From 5bb5c3a3ac102158b799bf5eda871223aa5e9c25 Mon Sep 17 00:00:00 2001
From: Shun-Chih Yu <shun-chih.yu@mediatek.com>
Date: Thu, 25 Apr 2019 11:53:50 +0800
Subject: [PATCH 1582/1861] dmaengine: mediatek-cqdma: fix wrong register usage
 in mtk_cqdma_start

This patch fixes wrong register usage in the mtk_cqdma_start. The
destination register should be MTK_CQDMA_DST2 instead.

Fixes: b1f01e48df5a ("dmaengine: mediatek: Add MediaTek Command-Queue DMA controller for MT6765 SoC")
Signed-off-by: Shun-Chih Yu <shun-chih.yu@mediatek.com>
Cc: stable@vger.kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/mediatek/mtk-cqdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c
index 131f3974740d5..814853842e29f 100644
--- a/drivers/dma/mediatek/mtk-cqdma.c
+++ b/drivers/dma/mediatek/mtk-cqdma.c
@@ -253,7 +253,7 @@ static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc,
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 	mtk_dma_set(pc, MTK_CQDMA_DST2, cvd->dest >> MTK_CQDMA_ADDR2_SHFIT);
 #else
-	mtk_dma_set(pc, MTK_CQDMA_SRC2, 0);
+	mtk_dma_set(pc, MTK_CQDMA_DST2, 0);
 #endif
 
 	/* setup the length */
-- 
GitLab


From 84ff7a09c371bc7417eabfda19bf7f113ec917b6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Apr 2019 12:45:09 +0100
Subject: [PATCH 1583/1861] arm64: futex: Fix FUTEX_WAKE_OP atomic ops with
 non-zero result value

Rather embarrassingly, our futex() FUTEX_WAKE_OP implementation doesn't
explicitly set the return value on the non-faulting path and instead
leaves it holding the result of the underlying atomic operation. This
means that any FUTEX_WAKE_OP atomic operation which computes a non-zero
value will be reported as having failed. Regrettably, I wrote the buggy
code back in 2011 and it was upstreamed as part of the initial arm64
support in 2012.

The reasons we appear to get away with this are:

  1. FUTEX_WAKE_OP is rarely used and therefore doesn't appear to get
     exercised by futex() test applications

  2. If the result of the atomic operation is zero, the system call
     behaves correctly

  3. Prior to version 2.25, the only operation used by GLIBC set the
     futex to zero, and therefore worked as expected. From 2.25 onwards,
     FUTEX_WAKE_OP is not used by GLIBC at all.

Fix the implementation by ensuring that the return value is either 0
to indicate that the atomic operation completed successfully, or -EFAULT
if we encountered a fault when accessing the user mapping.

Cc: <stable@kernel.org>
Fixes: 6170a97460db ("arm64: Atomic operations")
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/futex.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index cccb83ad7fa8e..e1d95f08f8e12 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -30,8 +30,8 @@ do {									\
 "	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
-"2:	stlxr	%w3, %w0, %2\n"						\
-"	cbnz	%w3, 1b\n"						\
+"2:	stlxr	%w0, %w3, %2\n"						\
+"	cbnz	%w0, 1b\n"						\
 "	dmb	ish\n"							\
 "3:\n"									\
 "	.pushsection .fixup,\"ax\"\n"					\
@@ -50,30 +50,30 @@ do {									\
 static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
 {
-	int oldval = 0, ret, tmp;
+	int oldval, ret, tmp;
 	u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
 
 	pagefault_disable();
 
 	switch (op) {
 	case FUTEX_OP_SET:
-		__futex_atomic_op("mov	%w0, %w4",
+		__futex_atomic_op("mov	%w3, %w4",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_ADD:
-		__futex_atomic_op("add	%w0, %w1, %w4",
+		__futex_atomic_op("add	%w3, %w1, %w4",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_OR:
-		__futex_atomic_op("orr	%w0, %w1, %w4",
+		__futex_atomic_op("orr	%w3, %w1, %w4",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_ANDN:
-		__futex_atomic_op("and	%w0, %w1, %w4",
+		__futex_atomic_op("and	%w3, %w1, %w4",
 				  ret, oldval, uaddr, tmp, ~oparg);
 		break;
 	case FUTEX_OP_XOR:
-		__futex_atomic_op("eor	%w0, %w1, %w4",
+		__futex_atomic_op("eor	%w3, %w1, %w4",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	default:
-- 
GitLab


From 6b4f4bc9cb22875f97023984a625386f0c7cc1c0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 28 Feb 2019 11:58:08 +0000
Subject: [PATCH 1584/1861] locking/futex: Allow low-level atomic operations to
 return -EAGAIN

Some futex() operations, including FUTEX_WAKE_OP, require the kernel to
perform an atomic read-modify-write of the futex word via the userspace
mapping. These operations are implemented by each architecture in
arch_futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic(), which
are called in atomic context with the relevant hash bucket locks held.

Although these routines may return -EFAULT in response to a page fault
generated when accessing userspace, they are expected to succeed (i.e.
return 0) in all other cases. This poses a problem for architectures
that do not provide bounded forward progress guarantees or fairness of
contended atomic operations and can lead to starvation in some cases.

In these problematic scenarios, we must return back to the core futex
code so that we can drop the hash bucket locks and reschedule if
necessary, much like we do in the case of a page fault.

Allow architectures to return -EAGAIN from their implementations of
arch_futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic(), which
will cause the core futex code to reschedule if necessary and return
back to the architecture code later on.

Cc: <stable@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 kernel/futex.c | 188 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 117 insertions(+), 71 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 9e40cf7be6066..6262f1534ac94 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1311,13 +1311,15 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
 
 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
 {
+	int err;
 	u32 uninitialized_var(curval);
 
 	if (unlikely(should_fail_futex(true)))
 		return -EFAULT;
 
-	if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
-		return -EFAULT;
+	err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+	if (unlikely(err))
+		return err;
 
 	/* If user space value changed, let the caller retry */
 	return curval != uval ? -EAGAIN : 0;
@@ -1502,10 +1504,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
 	if (unlikely(should_fail_futex(true)))
 		ret = -EFAULT;
 
-	if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
-		ret = -EFAULT;
-
-	} else if (curval != uval) {
+	ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+	if (!ret && (curval != uval)) {
 		/*
 		 * If a unconditional UNLOCK_PI operation (user space did not
 		 * try the TID->0 transition) raced with a waiter setting the
@@ -1700,32 +1700,32 @@ retry_private:
 	double_lock_hb(hb1, hb2);
 	op_ret = futex_atomic_op_inuser(op, uaddr2);
 	if (unlikely(op_ret < 0)) {
-
 		double_unlock_hb(hb1, hb2);
 
-#ifndef CONFIG_MMU
-		/*
-		 * we don't get EFAULT from MMU faults if we don't have an MMU,
-		 * but we might get them from range checking
-		 */
-		ret = op_ret;
-		goto out_put_keys;
-#endif
-
-		if (unlikely(op_ret != -EFAULT)) {
+		if (!IS_ENABLED(CONFIG_MMU) ||
+		    unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
+			/*
+			 * we don't get EFAULT from MMU faults if we don't have
+			 * an MMU, but we might get them from range checking
+			 */
 			ret = op_ret;
 			goto out_put_keys;
 		}
 
-		ret = fault_in_user_writeable(uaddr2);
-		if (ret)
-			goto out_put_keys;
+		if (op_ret == -EFAULT) {
+			ret = fault_in_user_writeable(uaddr2);
+			if (ret)
+				goto out_put_keys;
+		}
 
-		if (!(flags & FLAGS_SHARED))
+		if (!(flags & FLAGS_SHARED)) {
+			cond_resched();
 			goto retry_private;
+		}
 
 		put_futex_key(&key2);
 		put_futex_key(&key1);
+		cond_resched();
 		goto retry;
 	}
 
@@ -2350,7 +2350,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 	u32 uval, uninitialized_var(curval), newval;
 	struct task_struct *oldowner, *newowner;
 	u32 newtid;
-	int ret;
+	int ret, err = 0;
 
 	lockdep_assert_held(q->lock_ptr);
 
@@ -2421,14 +2421,17 @@ retry:
 	if (!pi_state->owner)
 		newtid |= FUTEX_OWNER_DIED;
 
-	if (get_futex_value_locked(&uval, uaddr))
-		goto handle_fault;
+	err = get_futex_value_locked(&uval, uaddr);
+	if (err)
+		goto handle_err;
 
 	for (;;) {
 		newval = (uval & FUTEX_OWNER_DIED) | newtid;
 
-		if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
-			goto handle_fault;
+		err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
+		if (err)
+			goto handle_err;
+
 		if (curval == uval)
 			break;
 		uval = curval;
@@ -2456,23 +2459,37 @@ retry:
 	return 0;
 
 	/*
-	 * To handle the page fault we need to drop the locks here. That gives
-	 * the other task (either the highest priority waiter itself or the
-	 * task which stole the rtmutex) the chance to try the fixup of the
-	 * pi_state. So once we are back from handling the fault we need to
-	 * check the pi_state after reacquiring the locks and before trying to
-	 * do another fixup. When the fixup has been done already we simply
-	 * return.
+	 * In order to reschedule or handle a page fault, we need to drop the
+	 * locks here. In the case of a fault, this gives the other task
+	 * (either the highest priority waiter itself or the task which stole
+	 * the rtmutex) the chance to try the fixup of the pi_state. So once we
+	 * are back from handling the fault we need to check the pi_state after
+	 * reacquiring the locks and before trying to do another fixup. When
+	 * the fixup has been done already we simply return.
 	 *
 	 * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
 	 * drop hb->lock since the caller owns the hb -> futex_q relation.
 	 * Dropping the pi_mutex->wait_lock requires the state revalidate.
 	 */
-handle_fault:
+handle_err:
 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
 	spin_unlock(q->lock_ptr);
 
-	ret = fault_in_user_writeable(uaddr);
+	switch (err) {
+	case -EFAULT:
+		ret = fault_in_user_writeable(uaddr);
+		break;
+
+	case -EAGAIN:
+		cond_resched();
+		ret = 0;
+		break;
+
+	default:
+		WARN_ON_ONCE(1);
+		ret = err;
+		break;
+	}
 
 	spin_lock(q->lock_ptr);
 	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
@@ -3041,10 +3058,8 @@ retry:
 		 * A unconditional UNLOCK_PI op raced against a waiter
 		 * setting the FUTEX_WAITERS bit. Try again.
 		 */
-		if (ret == -EAGAIN) {
-			put_futex_key(&key);
-			goto retry;
-		}
+		if (ret == -EAGAIN)
+			goto pi_retry;
 		/*
 		 * wake_futex_pi has detected invalid state. Tell user
 		 * space.
@@ -3059,9 +3074,19 @@ retry:
 	 * preserve the WAITERS bit not the OWNER_DIED one. We are the
 	 * owner.
 	 */
-	if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) {
+	if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
 		spin_unlock(&hb->lock);
-		goto pi_faulted;
+		switch (ret) {
+		case -EFAULT:
+			goto pi_faulted;
+
+		case -EAGAIN:
+			goto pi_retry;
+
+		default:
+			WARN_ON_ONCE(1);
+			goto out_putkey;
+		}
 	}
 
 	/*
@@ -3075,6 +3100,11 @@ out_putkey:
 	put_futex_key(&key);
 	return ret;
 
+pi_retry:
+	put_futex_key(&key);
+	cond_resched();
+	goto retry;
+
 pi_faulted:
 	put_futex_key(&key);
 
@@ -3435,6 +3465,7 @@ err_unlock:
 static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
 {
 	u32 uval, uninitialized_var(nval), mval;
+	int err;
 
 	/* Futex address must be 32bit aligned */
 	if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
@@ -3444,42 +3475,57 @@ retry:
 	if (get_user(uval, uaddr))
 		return -1;
 
-	if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
-		/*
-		 * Ok, this dying thread is truly holding a futex
-		 * of interest. Set the OWNER_DIED bit atomically
-		 * via cmpxchg, and if the value had FUTEX_WAITERS
-		 * set, wake up a waiter (if any). (We have to do a
-		 * futex_wake() even if OWNER_DIED is already set -
-		 * to handle the rare but possible case of recursive
-		 * thread-death.) The rest of the cleanup is done in
-		 * userspace.
-		 */
-		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
-		/*
-		 * We are not holding a lock here, but we want to have
-		 * the pagefault_disable/enable() protection because
-		 * we want to handle the fault gracefully. If the
-		 * access fails we try to fault in the futex with R/W
-		 * verification via get_user_pages. get_user() above
-		 * does not guarantee R/W access. If that fails we
-		 * give up and leave the futex locked.
-		 */
-		if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
+	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
+		return 0;
+
+	/*
+	 * Ok, this dying thread is truly holding a futex
+	 * of interest. Set the OWNER_DIED bit atomically
+	 * via cmpxchg, and if the value had FUTEX_WAITERS
+	 * set, wake up a waiter (if any). (We have to do a
+	 * futex_wake() even if OWNER_DIED is already set -
+	 * to handle the rare but possible case of recursive
+	 * thread-death.) The rest of the cleanup is done in
+	 * userspace.
+	 */
+	mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+
+	/*
+	 * We are not holding a lock here, but we want to have
+	 * the pagefault_disable/enable() protection because
+	 * we want to handle the fault gracefully. If the
+	 * access fails we try to fault in the futex with R/W
+	 * verification via get_user_pages. get_user() above
+	 * does not guarantee R/W access. If that fails we
+	 * give up and leave the futex locked.
+	 */
+	if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) {
+		switch (err) {
+		case -EFAULT:
 			if (fault_in_user_writeable(uaddr))
 				return -1;
 			goto retry;
-		}
-		if (nval != uval)
+
+		case -EAGAIN:
+			cond_resched();
 			goto retry;
 
-		/*
-		 * Wake robust non-PI futexes here. The wakeup of
-		 * PI futexes happens in exit_pi_state():
-		 */
-		if (!pi && (uval & FUTEX_WAITERS))
-			futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+		default:
+			WARN_ON_ONCE(1);
+			return err;
+		}
 	}
+
+	if (nval != uval)
+		goto retry;
+
+	/*
+	 * Wake robust non-PI futexes here. The wakeup of
+	 * PI futexes happens in exit_pi_state():
+	 */
+	if (!pi && (uval & FUTEX_WAITERS))
+		futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+
 	return 0;
 }
 
-- 
GitLab


From 03110a5cb2161690ae5ac04994d47ed0cd6cef75 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Apr 2019 14:23:17 +0100
Subject: [PATCH 1585/1861] arm64: futex: Bound number of LDXR/STXR loops in
 FUTEX_WAKE_OP

Our futex implementation makes use of LDXR/STXR loops to perform atomic
updates to user memory from atomic context. This can lead to latency
problems if we end up spinning around the LL/SC sequence at the expense
of doing something useful.

Rework our futex atomic operations so that we return -EAGAIN if we fail
to update the futex word after 128 attempts. The core futex code will
reschedule if necessary and we'll try again later.

Cc: <stable@kernel.org>
Fixes: 6170a97460db ("arm64: Atomic operations")
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/futex.h | 55 +++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index e1d95f08f8e12..2d78ea6932b7b 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -23,26 +23,34 @@
 
 #include <asm/errno.h>
 
+#define FUTEX_MAX_LOOPS	128 /* What's the largest number you can think of? */
+
 #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg)		\
 do {									\
+	unsigned int loops = FUTEX_MAX_LOOPS;				\
+									\
 	uaccess_enable();						\
 	asm volatile(							\
 "	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
 "2:	stlxr	%w0, %w3, %2\n"						\
-"	cbnz	%w0, 1b\n"						\
-"	dmb	ish\n"							\
+"	cbz	%w0, 3f\n"						\
+"	sub	%w4, %w4, %w0\n"					\
+"	cbnz	%w4, 1b\n"						\
+"	mov	%w0, %w7\n"						\
 "3:\n"									\
+"	dmb	ish\n"							\
 "	.pushsection .fixup,\"ax\"\n"					\
 "	.align	2\n"							\
-"4:	mov	%w0, %w5\n"						\
+"4:	mov	%w0, %w6\n"						\
 "	b	3b\n"							\
 "	.popsection\n"							\
 	_ASM_EXTABLE(1b, 4b)						\
 	_ASM_EXTABLE(2b, 4b)						\
-	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)	\
-	: "r" (oparg), "Ir" (-EFAULT)					\
+	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp),	\
+	  "+r" (loops)							\
+	: "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN)			\
 	: "memory");							\
 	uaccess_disable();						\
 } while (0)
@@ -57,23 +65,23 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
 
 	switch (op) {
 	case FUTEX_OP_SET:
-		__futex_atomic_op("mov	%w3, %w4",
+		__futex_atomic_op("mov	%w3, %w5",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_ADD:
-		__futex_atomic_op("add	%w3, %w1, %w4",
+		__futex_atomic_op("add	%w3, %w1, %w5",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_OR:
-		__futex_atomic_op("orr	%w3, %w1, %w4",
+		__futex_atomic_op("orr	%w3, %w1, %w5",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	case FUTEX_OP_ANDN:
-		__futex_atomic_op("and	%w3, %w1, %w4",
+		__futex_atomic_op("and	%w3, %w1, %w5",
 				  ret, oldval, uaddr, tmp, ~oparg);
 		break;
 	case FUTEX_OP_XOR:
-		__futex_atomic_op("eor	%w3, %w1, %w4",
+		__futex_atomic_op("eor	%w3, %w1, %w5",
 				  ret, oldval, uaddr, tmp, oparg);
 		break;
 	default:
@@ -93,6 +101,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
 			      u32 oldval, u32 newval)
 {
 	int ret = 0;
+	unsigned int loops = FUTEX_MAX_LOOPS;
 	u32 val, tmp;
 	u32 __user *uaddr;
 
@@ -104,20 +113,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
 "	prfm	pstl1strm, %2\n"
 "1:	ldxr	%w1, %2\n"
-"	sub	%w3, %w1, %w4\n"
-"	cbnz	%w3, 3f\n"
-"2:	stlxr	%w3, %w5, %2\n"
-"	cbnz	%w3, 1b\n"
-"	dmb	ish\n"
+"	sub	%w3, %w1, %w5\n"
+"	cbnz	%w3, 4f\n"
+"2:	stlxr	%w3, %w6, %2\n"
+"	cbz	%w3, 3f\n"
+"	sub	%w4, %w4, %w3\n"
+"	cbnz	%w4, 1b\n"
+"	mov	%w0, %w8\n"
 "3:\n"
+"	dmb	ish\n"
+"4:\n"
 "	.pushsection .fixup,\"ax\"\n"
-"4:	mov	%w0, %w6\n"
-"	b	3b\n"
+"5:	mov	%w0, %w7\n"
+"	b	4b\n"
 "	.popsection\n"
-	_ASM_EXTABLE(1b, 4b)
-	_ASM_EXTABLE(2b, 4b)
-	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
-	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
+	_ASM_EXTABLE(1b, 5b)
+	_ASM_EXTABLE(2b, 5b)
+	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
+	: "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN)
 	: "memory");
 	uaccess_disable();
 
-- 
GitLab


From 8e4e0ac02b449297b86498ac24db5786ddd9f647 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 10 Apr 2019 11:49:11 +0100
Subject: [PATCH 1586/1861] arm64: futex: Avoid copying out uninitialised stack
 in failed cmpxchg()

Returning an error code from futex_atomic_cmpxchg_inatomic() indicates
that the caller should not make any use of *uval, and should instead act
upon on the value of the error code. Although this is implemented
correctly in our futex code, we needlessly copy uninitialised stack to
*uval in the error case, which can easily be avoided.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/futex.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 2d78ea6932b7b..bdb3c05070a2b 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -134,7 +134,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
 	: "memory");
 	uaccess_disable();
 
-	*uval = val;
+	if (!ret)
+		*uval = val;
+
 	return ret;
 }
 
-- 
GitLab


From 427503519739e779c0db8afe876c1b33f3ac60ae Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 10 Apr 2019 11:51:54 +0100
Subject: [PATCH 1587/1861] futex: Update comments and docs about return values
 of arch futex code

The architecture implementations of 'arch_futex_atomic_op_inuser()' and
'futex_atomic_cmpxchg_inatomic()' are permitted to return only -EFAULT,
-EAGAIN or -ENOSYS in the case of failure.

Update the comments in the asm-generic/ implementation and also a stray
reference in the robust futex documentation.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/robust-futexes.txt | 3 +--
 include/asm-generic/futex.h      | 8 ++++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
index 6c42c75103eb6..6361fb01c9c1e 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/robust-futexes.txt
@@ -218,5 +218,4 @@ All other architectures should build just fine too - but they won't have
 the new syscalls yet.
 
 Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
-inline function before writing up the syscalls (that function returns
--ENOSYS right now).
+inline function before writing up the syscalls.
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index fcb61b4659b39..8666fe7f35d77 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -23,7 +23,9 @@
  *
  * Return:
  * 0 - On success
- * <0 - On error
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Operation not supported
  */
 static inline int
 arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
@@ -85,7 +87,9 @@ out_pagefault_enable:
  *
  * Return:
  * 0 - On success
- * <0 - On error
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
  */
 static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
-- 
GitLab


From c1c477217882c610a2ba0268f5faf36c9c092528 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 23 Apr 2019 09:43:26 -0700
Subject: [PATCH 1588/1861] l2tp: use rcu_dereference_sk_user_data() in
 l2tp_udp_encap_recv()

Canonical way to fetch sk_user_data from an encap_rcv() handler called
from UDP stack in rcu protected section is to use rcu_dereference_sk_user_data(),
otherwise compiler might read it multiple times.

Fixes: d00fa9adc528 ("il2tp: fix races with tunnel socket close")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index fed6becc5daf8..aee33d1320184 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -909,7 +909,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct l2tp_tunnel *tunnel;
 
-	tunnel = l2tp_tunnel(sk);
+	tunnel = rcu_dereference_sk_user_data(sk);
 	if (tunnel == NULL)
 		goto pass_up;
 
-- 
GitLab


From 56c5bc1849de1311eda5bc506bddad504bfd14fc Mon Sep 17 00:00:00 2001
From: Fabien Dessenne <fabien.dessenne@st.com>
Date: Wed, 24 Apr 2019 11:35:49 +0200
Subject: [PATCH 1589/1861] net: ethernet: stmmac: manage the get_irq probe
 defer case

Manage the -EPROBE_DEFER error case for "stm32_pwr_wakeup"  IRQ.

Signed-off-by: Fabien Dessenne <fabien.dessenne@st.com>
Acked-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 062a600fa5a76..21428537e2314 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -333,6 +333,9 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
 	 */
 	dwmac->irq_pwr_wakeup = platform_get_irq_byname(pdev,
 							"stm32_pwr_wakeup");
+	if (dwmac->irq_pwr_wakeup == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
 	if (!dwmac->clk_eth_ck && dwmac->irq_pwr_wakeup >= 0) {
 		err = device_init_wakeup(&pdev->dev, true);
 		if (err) {
-- 
GitLab


From e5ce5e7267ddcbe13ab9ead2542524e1b7993e5a Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Mon, 15 Apr 2019 16:21:20 -0500
Subject: [PATCH 1590/1861] arm64: Provide a command line to disable spectre_v2
 mitigation

There are various reasons, such as benchmarking, to disable spectrev2
mitigation on a machine. Provide a command-line option to do so.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  8 ++++----
 arch/arm64/kernel/cpu_errata.c                  | 13 +++++++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cf82bac648cd9..6a929258faf71 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2905,10 +2905,10 @@
 			check bypass). With this option data leaks are possible
 			in the system.
 
-	nospectre_v2	[X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
-			(indirect branch prediction) vulnerability. System may
-			allow data leaks with this option, which is equivalent
-			to spectre_v2=off.
+	nospectre_v2	[X86,PPC_FSL_BOOK3E,ARM64] Disable all mitigations for
+			the Spectre variant 2 (indirect branch prediction)
+			vulnerability. System may allow data leaks with this
+			option.
 
 	nospec_store_bypass_disable
 			[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9950bb0cbd521..d2b2c69d31bb6 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -220,6 +220,14 @@ static void qcom_link_stack_sanitization(void)
 		     : "=&r" (tmp));
 }
 
+static bool __nospectre_v2;
+static int __init parse_nospectre_v2(char *str)
+{
+	__nospectre_v2 = true;
+	return 0;
+}
+early_param("nospectre_v2", parse_nospectre_v2);
+
 static void
 enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
 {
@@ -231,6 +239,11 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
 	if (!entry->matches(entry, SCOPE_LOCAL_CPU))
 		return;
 
+	if (__nospectre_v2) {
+		pr_info_once("spectrev2 mitigation disabled by command line option\n");
+		return;
+	}
+
 	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
 		return;
 
-- 
GitLab


From 3891ebccace188af075ce143d8b072b65e90f695 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <ykaukab@suse.de>
Date: Mon, 15 Apr 2019 16:21:21 -0500
Subject: [PATCH 1591/1861] arm64: Add sysfs vulnerability show for spectre-v1

spectre-v1 has been mitigated and the mitigation is always active.
Report this to userspace via sysfs

Signed-off-by: Mian Yousaf Kaukab <ykaukab@suse.de>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index d2b2c69d31bb6..cf623657cf3c6 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -755,3 +755,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 	}
 };
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
-- 
GitLab


From 1b3ccf4be0e7be8c4bd8522066b6cbc92591e912 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Mon, 15 Apr 2019 16:21:22 -0500
Subject: [PATCH 1592/1861] arm64: add sysfs vulnerability show for meltdown

We implement page table isolation as a mitigation for meltdown.
Report this to userspace via sysfs.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 58 ++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 4061de10cea6c..703ee8564fbda 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -947,7 +947,7 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
 	return has_cpuid_feature(entry, scope);
 }
 
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static bool __meltdown_safe = true;
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
 static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
@@ -967,6 +967,16 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 		{ /* sentinel */ }
 	};
 	char const *str = "command line option";
+	bool meltdown_safe;
+
+	meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list);
+
+	/* Defer to CPU feature registers */
+	if (has_cpuid_feature(entry, scope))
+		meltdown_safe = true;
+
+	if (!meltdown_safe)
+		__meltdown_safe = false;
 
 	/*
 	 * For reasons that aren't entirely clear, enabling KPTI on Cavium
@@ -978,6 +988,19 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 		__kpti_forced = -1;
 	}
 
+	/* Useful for KASLR robustness */
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) {
+		if (!__kpti_forced) {
+			str = "KASLR";
+			__kpti_forced = 1;
+		}
+	}
+
+	if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
+		pr_info_once("kernel page table isolation disabled by kernel configuration\n");
+		return false;
+	}
+
 	/* Forced? */
 	if (__kpti_forced) {
 		pr_info_once("kernel page table isolation forced %s by %s\n",
@@ -985,18 +1008,10 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 		return __kpti_forced > 0;
 	}
 
-	/* Useful for KASLR robustness */
-	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
-		return kaslr_offset() > 0;
-
-	/* Don't force KPTI for CPUs that are not vulnerable */
-	if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list))
-		return false;
-
-	/* Defer to CPU feature registers */
-	return !has_cpuid_feature(entry, scope);
+	return !meltdown_safe;
 }
 
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static void
 kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 {
@@ -1026,6 +1041,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 
 	return;
 }
+#else
+static void
+kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
+{
+}
+#endif	/* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 static int __init parse_kpti(char *str)
 {
@@ -1039,7 +1060,6 @@ static int __init parse_kpti(char *str)
 	return 0;
 }
 early_param("kpti", parse_kpti);
-#endif	/* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 static inline void __cpu_enable_hw_dbm(void)
@@ -1306,7 +1326,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
 		.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
 	},
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	{
 		.desc = "Kernel page table isolation (KPTI)",
 		.capability = ARM64_UNMAP_KERNEL_AT_EL0,
@@ -1322,7 +1341,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = unmap_kernel_at_el0,
 		.cpu_enable = kpti_install_ng_mappings,
 	},
-#endif
 	{
 		/* FP/SIMD is not implemented */
 		.capability = ARM64_HAS_NO_FPSIMD,
@@ -2101,3 +2119,15 @@ static int __init enable_mrs_emulation(void)
 }
 
 core_initcall(enable_mrs_emulation);
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	if (__meltdown_safe)
+		return sprintf(buf, "Not affected\n");
+
+	if (arm64_kernel_unmapped_at_el0())
+		return sprintf(buf, "Mitigation: PTI\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
-- 
GitLab


From 73f38166095947f3b86b02fbed6bd592223a7ac8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 15 Apr 2019 16:21:23 -0500
Subject: [PATCH 1593/1861] arm64: Advertise mitigation of Spectre-v2, or lack
 thereof

We currently have a list of CPUs affected by Spectre-v2, for which
we check that the firmware implements ARCH_WORKAROUND_1. It turns
out that not all firmwares do implement the required mitigation,
and that we fail to let the user know about it.

Instead, let's slightly revamp our checks, and rely on a whitelist
of cores that are known to be non-vulnerable, and let the user know
the status of the mitigation in the kernel log.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 109 +++++++++++++++++----------------
 1 file changed, 56 insertions(+), 53 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index cf623657cf3c6..032f1a4dbea2d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -131,9 +131,9 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
 	__flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
 }
 
-static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
-				      const char *hyp_vecs_start,
-				      const char *hyp_vecs_end)
+static void install_bp_hardening_cb(bp_hardening_cb_t fn,
+				    const char *hyp_vecs_start,
+				    const char *hyp_vecs_end)
 {
 	static DEFINE_RAW_SPINLOCK(bp_lock);
 	int cpu, slot = -1;
@@ -169,7 +169,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
 #define __smccc_workaround_1_smc_start		NULL
 #define __smccc_workaround_1_smc_end		NULL
 
-static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
+static void install_bp_hardening_cb(bp_hardening_cb_t fn,
 				      const char *hyp_vecs_start,
 				      const char *hyp_vecs_end)
 {
@@ -177,23 +177,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
 }
 #endif	/* CONFIG_KVM_INDIRECT_VECTORS */
 
-static void  install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
-				     bp_hardening_cb_t fn,
-				     const char *hyp_vecs_start,
-				     const char *hyp_vecs_end)
-{
-	u64 pfr0;
-
-	if (!entry->matches(entry, SCOPE_LOCAL_CPU))
-		return;
-
-	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
-	if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
-		return;
-
-	__install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end);
-}
-
 #include <uapi/linux/psci.h>
 #include <linux/arm-smccc.h>
 #include <linux/psci.h>
@@ -228,31 +211,27 @@ static int __init parse_nospectre_v2(char *str)
 }
 early_param("nospectre_v2", parse_nospectre_v2);
 
-static void
-enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
+/*
+ * -1: No workaround
+ *  0: No workaround required
+ *  1: Workaround installed
+ */
+static int detect_harden_bp_fw(void)
 {
 	bp_hardening_cb_t cb;
 	void *smccc_start, *smccc_end;
 	struct arm_smccc_res res;
 	u32 midr = read_cpuid_id();
 
-	if (!entry->matches(entry, SCOPE_LOCAL_CPU))
-		return;
-
-	if (__nospectre_v2) {
-		pr_info_once("spectrev2 mitigation disabled by command line option\n");
-		return;
-	}
-
 	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
-		return;
+		return -1;
 
 	switch (psci_ops.conduit) {
 	case PSCI_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 		if ((int)res.a0 < 0)
-			return;
+			return -1;
 		cb = call_hvc_arch_workaround_1;
 		/* This is a guest, no need to patch KVM vectors */
 		smccc_start = NULL;
@@ -263,23 +242,23 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 		if ((int)res.a0 < 0)
-			return;
+			return -1;
 		cb = call_smc_arch_workaround_1;
 		smccc_start = __smccc_workaround_1_smc_start;
 		smccc_end = __smccc_workaround_1_smc_end;
 		break;
 
 	default:
-		return;
+		return -1;
 	}
 
 	if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
 	    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
 		cb = qcom_link_stack_sanitization;
 
-	install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
+	install_bp_hardening_cb(cb, smccc_start, smccc_end);
 
-	return;
+	return 1;
 }
 #endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
 
@@ -521,24 +500,48 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 	CAP_MIDR_RANGE_LIST(midr_list)
 
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
-
 /*
- * List of CPUs where we need to issue a psci call to
- * harden the branch predictor.
+ * List of CPUs that do not need any Spectre-v2 mitigation at all.
  */
-static const struct midr_range arm64_bp_harden_smccc_cpus[] = {
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
-	MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
-	MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
-	MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
-	MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
-	MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER),
-	{},
+static const struct midr_range spectre_v2_safe_list[] = {
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+	{ /* sentinel */ }
 };
 
+static bool __maybe_unused
+check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	int need_wa;
+
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+	/* If the CPU has CSV2 set, we're safe */
+	if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1),
+						 ID_AA64PFR0_CSV2_SHIFT))
+		return false;
+
+	/* Alternatively, we have a list of unaffected CPUs */
+	if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
+		return false;
+
+	/* Fallback to firmware detection */
+	need_wa = detect_harden_bp_fw();
+	if (!need_wa)
+		return false;
+
+	/* forced off */
+	if (__nospectre_v2) {
+		pr_info_once("spectrev2 mitigation disabled by command line option\n");
+		return false;
+	}
+
+	if (need_wa < 0)
+		pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n");
+
+	return (need_wa > 0);
+}
 #endif
 
 #ifdef CONFIG_HARDEN_EL2_VECTORS
@@ -717,8 +720,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 	{
 		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		.cpu_enable = enable_smccc_arch_workaround_1,
-		ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.matches = check_branch_predictor,
 	},
 #endif
 #ifdef CONFIG_HARDEN_EL2_VECTORS
-- 
GitLab


From 517953c2c47f9c00a002f588ac856a5bc70cede3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 15 Apr 2019 16:21:24 -0500
Subject: [PATCH 1594/1861] arm64: Use firmware to detect CPUs that are not
 affected by Spectre-v2

The SMCCC ARCH_WORKAROUND_1 service can indicate that although the
firmware knows about the Spectre-v2 mitigation, this particular
CPU is not vulnerable, and it is thus not necessary to call
the firmware on this CPU.

Let's use this information to our benefit.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 032f1a4dbea2d..60cf87c4deb73 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -230,22 +230,36 @@ static int detect_harden_bp_fw(void)
 	case PSCI_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		if ((int)res.a0 < 0)
+		switch ((int)res.a0) {
+		case 1:
+			/* Firmware says we're just fine */
+			return 0;
+		case 0:
+			cb = call_hvc_arch_workaround_1;
+			/* This is a guest, no need to patch KVM vectors */
+			smccc_start = NULL;
+			smccc_end = NULL;
+			break;
+		default:
 			return -1;
-		cb = call_hvc_arch_workaround_1;
-		/* This is a guest, no need to patch KVM vectors */
-		smccc_start = NULL;
-		smccc_end = NULL;
+		}
 		break;
 
 	case PSCI_CONDUIT_SMC:
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-		if ((int)res.a0 < 0)
+		switch ((int)res.a0) {
+		case 1:
+			/* Firmware says we're just fine */
+			return 0;
+		case 0:
+			cb = call_smc_arch_workaround_1;
+			smccc_start = __smccc_workaround_1_smc_start;
+			smccc_end = __smccc_workaround_1_smc_end;
+			break;
+		default:
 			return -1;
-		cb = call_smc_arch_workaround_1;
-		smccc_start = __smccc_workaround_1_smc_start;
-		smccc_end = __smccc_workaround_1_smc_end;
+		}
 		break;
 
 	default:
-- 
GitLab


From 8c1e3d2bb44cbb998cb28ff9a18f105fee7f1eb3 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Mon, 15 Apr 2019 16:21:25 -0500
Subject: [PATCH 1595/1861] arm64: Always enable spectre-v2 vulnerability
 detection

Ensure we are always able to detect whether or not the CPU is affected
by Spectre-v2, so that we can later advertise this to userspace.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 60cf87c4deb73..a9c3ad4f79486 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -109,7 +109,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
 
 atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
 
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -270,11 +269,11 @@ static int detect_harden_bp_fw(void)
 	    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
 		cb = qcom_link_stack_sanitization;
 
-	install_bp_hardening_cb(cb, smccc_start, smccc_end);
+	if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR))
+		install_bp_hardening_cb(cb, smccc_start, smccc_end);
 
 	return 1;
 }
-#endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
 
 #ifdef CONFIG_ARM64_SSBD
 DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
@@ -513,7 +512,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 	.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,			\
 	CAP_MIDR_RANGE_LIST(midr_list)
 
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 /*
  * List of CPUs that do not need any Spectre-v2 mitigation at all.
  */
@@ -545,6 +543,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 	if (!need_wa)
 		return false;
 
+	if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) {
+		pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n");
+		__hardenbp_enab = false;
+		return false;
+	}
+
 	/* forced off */
 	if (__nospectre_v2) {
 		pr_info_once("spectrev2 mitigation disabled by command line option\n");
@@ -556,7 +560,6 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 
 	return (need_wa > 0);
 }
-#endif
 
 #ifdef CONFIG_HARDEN_EL2_VECTORS
 
@@ -731,13 +734,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
 	},
 #endif
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 	{
 		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = check_branch_predictor,
 	},
-#endif
 #ifdef CONFIG_HARDEN_EL2_VECTORS
 	{
 		.desc = "EL2 vector hardening",
-- 
GitLab


From 88ef66a28391ea7b624bfb7508a5b015c13b28f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Wed, 24 Apr 2019 19:12:46 +0200
Subject: [PATCH 1596/1861] qmi_wwan: new Wistron, ZTE and D-Link devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding device entries found in vendor modified versions of this
driver.  Function maps for some of the devices follow:

WNC D16Q1, D16Q5, D18Q1 LTE CAT3 module (1435:0918)

MI_00 Qualcomm HS-USB Diagnostics
MI_01 Android Debug interface
MI_02 Qualcomm HS-USB Modem
MI_03 Qualcomm Wireless HS-USB Ethernet Adapter
MI_04 Qualcomm Wireless HS-USB Ethernet Adapter
MI_05 Qualcomm Wireless HS-USB Ethernet Adapter
MI_06 USB Mass Storage Device

 T:  Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=480 MxCh= 0
 D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
 P:  Vendor=1435 ProdID=0918 Rev= 2.32
 S:  Manufacturer=Android
 S:  Product=Android
 S:  SerialNumber=0123456789ABCDEF
 C:* #Ifs= 7 Cfg#= 1 Atr=80 MxPwr=500mA
 I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
 E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none)
 E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
 E:  Ad=84(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
 E:  Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
 E:  Ad=86(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
 E:  Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 E:  Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
 E:  Ad=88(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
 E:  Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
 E:  Ad=8a(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
 E:  Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
 E:  Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms

WNC D18 LTE CAT3 module (1435:d182)

MI_00 Qualcomm HS-USB Diagnostics
MI_01 Androd Debug interface
MI_02 Qualcomm HS-USB Modem
MI_03 Qualcomm HS-USB NMEA
MI_04 Qualcomm Wireless HS-USB Ethernet Adapter
MI_05 Qualcomm Wireless HS-USB Ethernet Adapter
MI_06 USB Mass Storage Device

ZM8510/ZM8620/ME3960 (19d2:0396)

MI_00 ZTE Mobile Broadband Diagnostics Port
MI_01 ZTE Mobile Broadband AT Port
MI_02 ZTE Mobile Broadband Modem
MI_03 ZTE Mobile Broadband NDIS Port (qmi_wwan)
MI_04 ZTE Mobile Broadband ADB Port

ME3620_X (19d2:1432)

MI_00 ZTE Diagnostics Device
MI_01 ZTE UI AT Interface
MI_02 ZTE Modem Device
MI_03 ZTE Mobile Broadband Network Adapter
MI_04 ZTE Composite ADB Interface

Reported-by: Lars Melin <larsm17@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 9195f3476b1d7..679e404a5224f 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1122,9 +1122,16 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x0846, 0x68d3, 8)},	/* Netgear Aircard 779S */
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
+	{QMI_FIXED_INTF(0x1435, 0x0918, 3)},	/* Wistron NeWeb D16Q1 */
+	{QMI_FIXED_INTF(0x1435, 0x0918, 4)},	/* Wistron NeWeb D16Q1 */
+	{QMI_FIXED_INTF(0x1435, 0x0918, 5)},	/* Wistron NeWeb D16Q1 */
+	{QMI_FIXED_INTF(0x1435, 0x3185, 4)},	/* Wistron NeWeb M18Q5 */
+	{QMI_FIXED_INTF(0x1435, 0xd111, 4)},	/* M9615A DM11-1 D51QC */
 	{QMI_FIXED_INTF(0x1435, 0xd181, 3)},	/* Wistron NeWeb D18Q1 */
 	{QMI_FIXED_INTF(0x1435, 0xd181, 4)},	/* Wistron NeWeb D18Q1 */
 	{QMI_FIXED_INTF(0x1435, 0xd181, 5)},	/* Wistron NeWeb D18Q1 */
+	{QMI_FIXED_INTF(0x1435, 0xd182, 4)},	/* Wistron NeWeb D18 */
+	{QMI_FIXED_INTF(0x1435, 0xd182, 5)},	/* Wistron NeWeb D18 */
 	{QMI_FIXED_INTF(0x1435, 0xd191, 4)},	/* Wistron NeWeb D19Q1 */
 	{QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)},	/* Fibocom NL668 series */
 	{QMI_FIXED_INTF(0x16d8, 0x6003, 0)},	/* CMOTech 6003 */
@@ -1180,6 +1187,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x19d2, 0x0265, 4)},	/* ONDA MT8205 4G LTE */
 	{QMI_FIXED_INTF(0x19d2, 0x0284, 4)},	/* ZTE MF880 */
 	{QMI_FIXED_INTF(0x19d2, 0x0326, 4)},	/* ZTE MF821D */
+	{QMI_FIXED_INTF(0x19d2, 0x0396, 3)},	/* ZTE ZM8620 */
 	{QMI_FIXED_INTF(0x19d2, 0x0412, 4)},	/* Telewell TW-LTE 4G */
 	{QMI_FIXED_INTF(0x19d2, 0x1008, 4)},	/* ZTE (Vodafone) K3570-Z */
 	{QMI_FIXED_INTF(0x19d2, 0x1010, 4)},	/* ZTE (Vodafone) K3571-Z */
@@ -1200,7 +1208,9 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x19d2, 0x1425, 2)},
 	{QMI_FIXED_INTF(0x19d2, 0x1426, 2)},	/* ZTE MF91 */
 	{QMI_FIXED_INTF(0x19d2, 0x1428, 2)},	/* Telewell TW-LTE 4G v2 */
+	{QMI_FIXED_INTF(0x19d2, 0x1432, 3)},	/* ZTE ME3620 */
 	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
+	{QMI_FIXED_INTF(0x2001, 0x7e16, 3)},	/* D-Link DWM-221 */
 	{QMI_FIXED_INTF(0x2001, 0x7e19, 4)},	/* D-Link DWM-221 B1 */
 	{QMI_FIXED_INTF(0x2001, 0x7e35, 4)},	/* D-Link DWM-222 */
 	{QMI_FIXED_INTF(0x2020, 0x2031, 4)},	/* Olicard 600 */
-- 
GitLab


From d2532e27b5638bb2e2dd52b80b7ea2ec65135377 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Mon, 15 Apr 2019 16:21:26 -0500
Subject: [PATCH 1597/1861] arm64: add sysfs vulnerability show for spectre-v2

Track whether all the cores in the machine are vulnerable to Spectre-v2,
and whether all the vulnerable cores have been mitigated. We then expose
this information to userspace via sysfs.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a9c3ad4f79486..d2bbafa04b3c7 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -512,6 +512,10 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 	.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,			\
 	CAP_MIDR_RANGE_LIST(midr_list)
 
+/* Track overall mitigation state. We are only mitigated if all cores are ok */
+static bool __hardenbp_enab = true;
+static bool __spectrev2_safe = true;
+
 /*
  * List of CPUs that do not need any Spectre-v2 mitigation at all.
  */
@@ -522,6 +526,10 @@ static const struct midr_range spectre_v2_safe_list[] = {
 	{ /* sentinel */ }
 };
 
+/*
+ * Track overall bp hardening for all heterogeneous cores in the machine.
+ * We are only considered "safe" if all booted cores are known safe.
+ */
 static bool __maybe_unused
 check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 {
@@ -543,6 +551,8 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 	if (!need_wa)
 		return false;
 
+	__spectrev2_safe = false;
+
 	if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) {
 		pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n");
 		__hardenbp_enab = false;
@@ -552,11 +562,14 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 	/* forced off */
 	if (__nospectre_v2) {
 		pr_info_once("spectrev2 mitigation disabled by command line option\n");
+		__hardenbp_enab = false;
 		return false;
 	}
 
-	if (need_wa < 0)
+	if (need_wa < 0) {
 		pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n");
+		__hardenbp_enab = false;
+	}
 
 	return (need_wa > 0);
 }
@@ -779,3 +792,15 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
 {
 	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 }
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	if (__spectrev2_safe)
+		return sprintf(buf, "Not affected\n");
+
+	if (__hardenbp_enab)
+		return sprintf(buf, "Mitigation: Branch predictor hardening\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
-- 
GitLab


From d42281b6e49510f078ace15a8ea10f71e6262581 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Mon, 15 Apr 2019 16:21:27 -0500
Subject: [PATCH 1598/1861] arm64: Always enable ssb vulnerability detection

Ensure we are always able to detect whether or not the CPU is affected
by SSB, so that we can later advertise this to userspace.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
[will: Use IS_ENABLED instead of #ifdef]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 4 ----
 arch/arm64/kernel/cpu_errata.c      | 9 +++++----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index e505e1fbd2b93..6ccdc97e5d6a8 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -638,11 +638,7 @@ static inline int arm64_get_ssbd_state(void)
 #endif
 }
 
-#ifdef CONFIG_ARM64_SSBD
 void arm64_set_ssbd_mitigation(bool state);
-#else
-static inline void arm64_set_ssbd_mitigation(bool state) {}
-#endif
 
 extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index d2bbafa04b3c7..b6132783e8cfd 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -275,7 +275,6 @@ static int detect_harden_bp_fw(void)
 	return 1;
 }
 
-#ifdef CONFIG_ARM64_SSBD
 DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
 
 int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
@@ -348,6 +347,11 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
 
 void arm64_set_ssbd_mitigation(bool state)
 {
+	if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
+		pr_info_once("SSBD disabled by kernel configuration\n");
+		return;
+	}
+
 	if (this_cpu_has_cap(ARM64_SSBS)) {
 		if (state)
 			asm volatile(SET_PSTATE_SSBS(0));
@@ -467,7 +471,6 @@ out_printmsg:
 
 	return required;
 }
-#endif	/* CONFIG_ARM64_SSBD */
 
 static void __maybe_unused
 cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
@@ -759,14 +762,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
 	},
 #endif
-#ifdef CONFIG_ARM64_SSBD
 	{
 		.desc = "Speculative Store Bypass Disable",
 		.capability = ARM64_SSBD,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = has_ssbd_mitigation,
 	},
-#endif
 #ifdef CONFIG_ARM64_ERRATUM_1188873
 	{
 		/* Cortex-A76 r0p0 to r2p0 */
-- 
GitLab


From b987222654f84f7b4ca95b3a55eca784cb30235b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Thu, 4 Apr 2019 23:59:25 +0200
Subject: [PATCH 1599/1861] tracing: Fix buffer_ref pipe ops

This fixes multiple issues in buffer_pipe_buf_ops:

 - The ->steal() handler must not return zero unless the pipe buffer has
   the only reference to the page. But generic_pipe_buf_steal() assumes
   that every reference to the pipe is tracked by the page's refcount,
   which isn't true for these buffers - buffer_pipe_buf_get(), which
   duplicates a buffer, doesn't touch the page's refcount.
   Fix it by using generic_pipe_buf_nosteal(), which refuses every
   attempted theft. It should be easy to actually support ->steal, but the
   only current users of pipe_buf_steal() are the virtio console and FUSE,
   and they also only use it as an optimization. So it's probably not worth
   the effort.
 - The ->get() and ->release() handlers can be invoked concurrently on pipe
   buffers backed by the same struct buffer_ref. Make them safe against
   concurrency by using refcount_t.
 - The pointers stored in ->private were only zeroed out when the last
   reference to the buffer_ref was dropped. As far as I know, this
   shouldn't be necessary anyway, but if we do it, let's always do it.

Link: http://lkml.kernel.org/r/20190404215925.253531-1-jannh@google.com

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 fs/splice.c               |  4 ++--
 include/linux/pipe_fs_i.h |  1 +
 kernel/trace/trace.c      | 28 ++++++++++++++--------------
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index 3ee7e82df48f2..e75807380caa9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = {
 	.get = generic_pipe_buf_get,
 };
 
-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
-				    struct pipe_buffer *buf)
+int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+			     struct pipe_buffer *buf)
 {
 	return 1;
 }
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 787d224ff43e1..a830e9a00eb92 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -174,6 +174,7 @@ void free_pipe_info(struct pipe_inode_info *);
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 void pipe_buf_mark_unmergeable(struct pipe_buffer *buf);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 21153e64bf1c3..0cfa13a600863 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7025,19 +7025,23 @@ struct buffer_ref {
 	struct ring_buffer	*buffer;
 	void			*page;
 	int			cpu;
-	int			ref;
+	refcount_t		refcount;
 };
 
+static void buffer_ref_release(struct buffer_ref *ref)
+{
+	if (!refcount_dec_and_test(&ref->refcount))
+		return;
+	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+	kfree(ref);
+}
+
 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
 				    struct pipe_buffer *buf)
 {
 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
 
-	if (--ref->ref)
-		return;
-
-	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
-	kfree(ref);
+	buffer_ref_release(ref);
 	buf->private = 0;
 }
 
@@ -7046,14 +7050,14 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 {
 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
 
-	ref->ref++;
+	refcount_inc(&ref->refcount);
 }
 
 /* Pipe buffer operations for a buffer. */
 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
 	.confirm		= generic_pipe_buf_confirm,
 	.release		= buffer_pipe_buf_release,
-	.steal			= generic_pipe_buf_steal,
+	.steal			= generic_pipe_buf_nosteal,
 	.get			= buffer_pipe_buf_get,
 };
 
@@ -7066,11 +7070,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 	struct buffer_ref *ref =
 		(struct buffer_ref *)spd->partial[i].private;
 
-	if (--ref->ref)
-		return;
-
-	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
-	kfree(ref);
+	buffer_ref_release(ref);
 	spd->partial[i].private = 0;
 }
 
@@ -7125,7 +7125,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 			break;
 		}
 
-		ref->ref = 1;
+		refcount_set(&ref->refcount, 1);
 		ref->buffer = iter->trace_buffer->buffer;
 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
 		if (IS_ERR(ref->page)) {
-- 
GitLab


From 91862cc7867bba4ee5c8fcf0ca2f1d30427b6129 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Fri, 19 Apr 2019 21:22:59 -0500
Subject: [PATCH 1600/1861] tracing: Fix a memory leak by early error exit in
 trace_pid_write()

In trace_pid_write(), the buffer for trace parser is allocated through
kmalloc() in trace_parser_get_init(). Later on, after the buffer is used,
it is then freed through kfree() in trace_parser_put(). However, it is
possible that trace_pid_write() is terminated due to unexpected errors,
e.g., ENOMEM. In that case, the allocated buffer will not be freed, which
is a memory leak bug.

To fix this issue, free the allocated buffer when an error is encountered.

Link: http://lkml.kernel.org/r/1555726979-15633-1-git-send-email-wang6495@umn.edu

Fixes: f4d34a87e9c10 ("tracing: Use pid bitmap instead of a pid array for set_event_pid")
Cc: stable@vger.kernel.org
Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0cfa13a600863..46f68fad63739 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -496,8 +496,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
 	 * not modified.
 	 */
 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
-	if (!pid_list)
+	if (!pid_list) {
+		trace_parser_put(&parser);
 		return -ENOMEM;
+	}
 
 	pid_list->pid_max = READ_ONCE(pid_max);
 
@@ -507,6 +509,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
 
 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
 	if (!pid_list->pids) {
+		trace_parser_put(&parser);
 		kfree(pid_list);
 		return -ENOMEM;
 	}
-- 
GitLab


From d6097c9e4454adf1f8f2c9547c2fa6060d55d952 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 23 Apr 2019 22:03:18 +0200
Subject: [PATCH 1601/1861] trace: Fix preempt_enable_no_resched() abuse

Unless the very next line is schedule(), or implies it, one must not use
preempt_enable_no_resched(). It can cause a preemption to go missing and
thereby cause arbitrary delays, breaking the PREEMPT=y invariant.

Link: http://lkml.kernel.org/r/20190423200318.GY14281@hirez.programming.kicks-ass.net

Cc: Waiman Long <longman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: the arch/x86 maintainers <x86@kernel.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: huang ying <huang.ying.caritas@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: stable@vger.kernel.org
Fixes: 2c2d7329d8af ("tracing/ftrace: use preempt_enable_no_resched_notrace in ring_buffer_time_stamp()")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 41b6f96e53662..4ee8d8aa3d0fd 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -762,7 +762,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
 
 	preempt_disable_notrace();
 	time = rb_time_stamp(buffer);
-	preempt_enable_no_resched_notrace();
+	preempt_enable_notrace();
 
 	return time;
 }
-- 
GitLab


From 4e43df38a2e6c876d3c8ecc4196ed67a895c425d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 24 Apr 2019 22:18:53 +0200
Subject: [PATCH 1602/1861] genetlink: use idr_alloc_cyclic for family->id
 assignment

When allocating the next family->id it makes more sense to use
idr_alloc_cyclic to avoid re-using a previously used family->id as much
as possible.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f0ec068e1d02f..cb69d35c8e6ad 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -362,8 +362,8 @@ int genl_register_family(struct genl_family *family)
 	} else
 		family->attrbuf = NULL;
 
-	family->id = idr_alloc(&genl_fam_idr, family,
-			       start, end + 1, GFP_KERNEL);
+	family->id = idr_alloc_cyclic(&genl_fam_idr, family,
+				      start, end + 1, GFP_KERNEL);
 	if (family->id < 0) {
 		err = family->id;
 		goto errout_free;
-- 
GitLab


From fdfdf86720a34527f777cbe0d8599bf0528fa146 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 25 Apr 2019 00:33:00 +0200
Subject: [PATCH 1603/1861] net: phy: marvell: Fix buffer overrun with stats
 counters

marvell_get_sset_count() returns how many statistics counters there
are. If the PHY supports fibre, there are 3, otherwise two.

marvell_get_strings() does not make this distinction, and always
returns 3 strings. This then often results in writing past the end
of the buffer for the strings.

Fixes: 2170fef78a40 ("Marvell phy: add field to get errors from fiber link.")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 3ccba37bd6dde..f76c4048b9780 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1489,9 +1489,10 @@ static int marvell_get_sset_count(struct phy_device *phydev)
 
 static void marvell_get_strings(struct phy_device *phydev, u8 *data)
 {
+	int count = marvell_get_sset_count(phydev);
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) {
+	for (i = 0; i < count; i++) {
 		strlcpy(data + i * ETH_GSTRING_LEN,
 			marvell_hw_stats[i].string, ETH_GSTRING_LEN);
 	}
@@ -1519,9 +1520,10 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i)
 static void marvell_get_stats(struct phy_device *phydev,
 			      struct ethtool_stats *stats, u64 *data)
 {
+	int count = marvell_get_sset_count(phydev);
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++)
+	for (i = 0; i < count; i++)
 		data[i] = marvell_get_stat(phydev, i);
 }
 
-- 
GitLab


From 89c02e69fc5245f8a2f34b58b42d43a737af1a5e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 25 Apr 2019 22:23:37 -0700
Subject: [PATCH 1604/1861] mm/memory_hotplug.c: drop memory device reference
 after find_memory_block()

Right now we are using find_memory_block() to get the node id for the
pfn range to online.  We are missing to drop a reference to the memory
block device.  While the device still gets unregistered via
device_unregister(), resulting in no user visible problem, the device is
never released via device_release(), resulting in a memory leak.  Fix
that by properly using a put_device().

Link: http://lkml.kernel.org/r/20190411110955.1430-1-david@redhat.com
Fixes: d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Pankaj Gupta <pagupta@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Arun KS <arunks@codeaurora.org>
Cc: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0082d699be94b..b236069ff0d82 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -874,6 +874,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	 */
 	mem = find_memory_block(__pfn_to_section(pfn));
 	nid = mem->nid;
+	put_device(&mem->dev);
 
 	/* associate pfn range with the zone */
 	zone = move_pfn_range(online_type, nid, pfn, nr_pages);
-- 
GitLab


From e153abc0739ff77bd89c9ba1688cdb963464af97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= <jglisse@redhat.com>
Date: Thu, 25 Apr 2019 22:23:41 -0700
Subject: [PATCH 1605/1861] zram: pass down the bvec we need to read into in
 the work struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When scheduling work item to read page we need to pass down the proper
bvec struct which points to the page to read into.  Before this patch it
uses a randomly initialized bvec (only if PAGE_SIZE != 4096) which is
wrong.

Note that without this patch on arch/kernel where PAGE_SIZE != 4096
userspace could read random memory through a zram block device (thought
userspace probably would have no control on the address being read).

Link: http://lkml.kernel.org/r/20190408183219.26377-1-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 399cad7daae77..d58a359a66225 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -774,18 +774,18 @@ struct zram_work {
 	struct zram *zram;
 	unsigned long entry;
 	struct bio *bio;
+	struct bio_vec bvec;
 };
 
 #if PAGE_SIZE != 4096
 static void zram_sync_read(struct work_struct *work)
 {
-	struct bio_vec bvec;
 	struct zram_work *zw = container_of(work, struct zram_work, work);
 	struct zram *zram = zw->zram;
 	unsigned long entry = zw->entry;
 	struct bio *bio = zw->bio;
 
-	read_from_bdev_async(zram, &bvec, entry, bio);
+	read_from_bdev_async(zram, &zw->bvec, entry, bio);
 }
 
 /*
@@ -798,6 +798,7 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 {
 	struct zram_work work;
 
+	work.bvec = *bvec;
 	work.zram = zram;
 	work.entry = entry;
 	work.bio = bio;
-- 
GitLab


From ae3d6a323347940f0548bbb4b17f0bb2e9164169 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 25 Apr 2019 22:23:44 -0700
Subject: [PATCH 1606/1861] lib/Kconfig.debug: fix build error without
 CONFIG_BLOCK

If CONFIG_TEST_KMOD is set to M, while CONFIG_BLOCK is not set, XFS and
BTRFS can not be compiled successly.

Link: http://lkml.kernel.org/r/20190410075434.35220-1-yuehaibing@huawei.com
Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reported-by: Hulk Robot <hulkci@huawei.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 00dbcdbc9a0d3..d5a4a4036d2f8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1929,6 +1929,7 @@ config TEST_KMOD
 	depends on m
 	depends on BLOCK && (64BIT || LBDAF)	  # for XFS, BTRFS
 	depends on NETDEVICES && NET_CORE && INET # for TUN
+	depends on BLOCK
 	select TEST_LKM
 	select XFS_FS
 	select TUN
-- 
GitLab


From e789803507b2e154ed452865ee981b038654e98d Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Thu, 25 Apr 2019 22:23:47 -0700
Subject: [PATCH 1607/1861] lib/test_vmalloc.c: do not create cpumask_t
 variable on stack

On my "Intel(R) Xeon(R) W-2135 CPU @ 3.70GHz" system(12 CPUs) i get the
warning from the compiler about frame size:

   warning: the frame size of 1096 bytes is larger than 1024 bytes [-Wframe-larger-than=]

the size of cpumask_t depends on number of CPUs, therefore just make use
of cpumask_of() in set_cpus_allowed_ptr() as a second argument.

Link: http://lkml.kernel.org/r/20190418193925.9361-1-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_vmalloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 83cdcaa82bf6c..f832b095afba0 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -383,14 +383,14 @@ static void shuffle_array(int *arr, int n)
 static int test_func(void *private)
 {
 	struct test_driver *t = private;
-	cpumask_t newmask = CPU_MASK_NONE;
 	int random_array[ARRAY_SIZE(test_case_array)];
 	int index, i, j, ret;
 	ktime_t kt;
 	u64 delta;
 
-	cpumask_set_cpu(t->cpu, &newmask);
-	set_cpus_allowed_ptr(current, &newmask);
+	ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu));
+	if (ret < 0)
+		pr_err("Failed to set affinity to %d CPU\n", t->cpu);
 
 	for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
 		random_array[i] = i;
-- 
GitLab


From 24512228b7a3f412b5a51f189df302616b021c33 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Thu, 25 Apr 2019 22:23:51 -0700
Subject: [PATCH 1608/1861] mm: do not boost watermarks to avoid fragmentation
 for the DISCONTIG memory model

Mikulas Patocka reported that commit 1c30844d2dfe ("mm: reclaim small
amounts of memory when an external fragmentation event occurs") "broke"
memory management on parisc.

The machine is not NUMA but the DISCONTIG model creates three pgdats
even though it's a UMA machine for the following ranges

        0) Start 0x0000000000000000 End 0x000000003fffffff Size   1024 MB
        1) Start 0x0000000100000000 End 0x00000001bfdfffff Size   3070 MB
        2) Start 0x0000004040000000 End 0x00000040ffffffff Size   3072 MB

Mikulas reported:

	With the patch 1c30844d2, the kernel will incorrectly reclaim the
	first zone when it fills up, ignoring the fact that there are two
	completely free zones. Basiscally, it limits cache size to 1GiB.

	For example, if I run:
	# dd if=/dev/sda of=/dev/null bs=1M count=2048

	- with the proper kernel, there should be "Buffers - 2GiB"
	when this command finishes. With the patch 1c30844d2, buffers
	will consume just 1GiB or slightly more, because the kernel was
	incorrectly reclaiming them.

The page allocator and reclaim makes assumptions that pgdats really
represent NUMA nodes and zones represent ranges and makes decisions on
that basis.  Watermark boosting for small pgdats leads to unexpected
results even though this would have behaved reasonably on SPARSEMEM.

DISCONTIG is essentially deprecated and even parisc plans to move to
SPARSEMEM so there is no need to be fancy, this patch simply disables
watermark boosting by default on DISCONTIGMEM.

Link: http://lkml.kernel.org/r/20190419094335.GJ18914@techsingularity.net
Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs")
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Tested-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/vm.txt | 16 ++++++++--------
 mm/page_alloc.c             | 13 +++++++++++++
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 6af24cdb25ccb..3f13d8599337e 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -866,14 +866,14 @@ The intent is that compaction has less work to do in the future and to
 increase the success rate of future high-order allocations such as SLUB
 allocations, THP and hugetlbfs pages.
 
-To make it sensible with respect to the watermark_scale_factor parameter,
-the unit is in fractions of 10,000. The default value of 15,000 means
-that up to 150% of the high watermark will be reclaimed in the event of
-a pageblock being mixed due to fragmentation. The level of reclaim is
-determined by the number of fragmentation events that occurred in the
-recent past. If this value is smaller than a pageblock then a pageblocks
-worth of pages will be reclaimed (e.g.  2MB on 64-bit x86). A boost factor
-of 0 will disable the feature.
+To make it sensible with respect to the watermark_scale_factor
+parameter, the unit is in fractions of 10,000. The default value of
+15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
+watermark will be reclaimed in the event of a pageblock being mixed due
+to fragmentation. The level of reclaim is determined by the number of
+fragmentation events that occurred in the recent past. If this value is
+smaller than a pageblock then a pageblocks worth of pages will be reclaimed
+(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
 
 =============================================================
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c6ce20aaf80bb..6c6d9f1c404e0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -266,7 +266,20 @@ compound_page_dtor * const compound_page_dtors[] = {
 
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
+#ifdef CONFIG_DISCONTIGMEM
+/*
+ * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges
+ * are not on separate NUMA nodes. Functionally this works but with
+ * watermark_boost_factor, it can reclaim prematurely as the ranges can be
+ * quite small. By default, do not boost watermarks on discontigmem as in
+ * many cases very high-order allocations like THP are likely to be
+ * unsupported and the premature reclaim offsets the advantage of long-term
+ * fragmentation avoidance.
+ */
+int watermark_boost_factor __read_mostly;
+#else
 int watermark_boost_factor __read_mostly = 15000;
+#endif
 int watermark_scale_factor = 10;
 
 static unsigned long nr_kernel_pages __initdata;
-- 
GitLab


From ee8ab0eeb49bd3982090c8f14dc9cc65bcd13c5c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Thu, 25 Apr 2019 22:23:54 -0700
Subject: [PATCH 1609/1861] mm, page_alloc: always use a captured page
 regardless of compaction result

During the development of commit 5e1f0f098b46 ("mm, compaction: capture
a page under direct compaction"), a paranoid check was added to ensure
that if a captured page was available after compaction that it was
consistent with the final state of compaction.  The intent was to catch
serious programming bugs such as using a stale page pointer and causing
corruption problems.

However, it is possible to get a captured page even if compaction was
unsuccessful if an interrupt triggered and happened to free pages in
interrupt context that got merged into a suitable high-order page.  It's
highly unlikely but Li Wang did report the following warning on s390
occuring when testing OOM handling.  Note that the warning is slightly
edited for clarity.

  WARNING: CPU: 0 PID: 9783 at mm/page_alloc.c:3777 __alloc_pages_direct_compact+0x182/0x190
  Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs
    lockd grace fscache sunrpc pkey ghash_s390 prng xts aes_s390
    des_s390 des_generic sha512_s390 zcrypt_cex4 zcrypt vmur binfmt_misc
    ip_tables xfs libcrc32c dasd_fba_mod qeth_l2 dasd_eckd_mod dasd_mod
    qeth qdio lcs ctcm ccwgroup fsm dm_mirror dm_region_hash dm_log
    dm_mod
  CPU: 0 PID: 9783 Comm: copy.sh Kdump: loaded Not tainted 5.1.0-rc 5 #1

This patch simply removes the check entirely instead of trying to be
clever about pages freed from interrupt context.  If a serious
programming error was introduced, it is highly likely to be caught by
prep_new_page() instead.

Link: http://lkml.kernel.org/r/20190419085133.GH18914@techsingularity.net
Fixes: 5e1f0f098b46 ("mm, compaction: capture a page under direct compaction")
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Li Wang <liwang@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6c6d9f1c404e0..d167c48d913cb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3786,11 +3786,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	memalloc_noreclaim_restore(noreclaim_flag);
 	psi_memstall_leave(&pflags);
 
-	if (*compact_result <= COMPACT_INACTIVE) {
-		WARN_ON_ONCE(page);
-		return NULL;
-	}
-
 	/*
 	 * At least in one zone compaction wasn't deferred or skipped, so let's
 	 * count a compaction stall
-- 
GitLab


From 8139ad043d632c0e9e12d760068a7a8e91659aa1 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Thu, 25 Apr 2019 22:23:58 -0700
Subject: [PATCH 1610/1861] mm/page_alloc.c: avoid potential NULL pointer
 dereference

ac.preferred_zoneref->zone passed to alloc_flags_nofragment() can be NULL.
'zone' pointer unconditionally derefernced in alloc_flags_nofragment().
Bail out on NULL zone to avoid potential crash.  Currently we don't see
any crashes only because alloc_flags_nofragment() has another bug which
allows compiler to optimize away all accesses to 'zone'.

Link: http://lkml.kernel.org/r/20190423120806.3503-1-aryabinin@virtuozzo.com
Fixes: 6bb154504f8b ("mm, page_alloc: spread allocations across zones before introducing fragmentation")
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d167c48d913cb..9992ca7f29f1e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3432,6 +3432,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
 		alloc_flags |= ALLOC_KSWAPD;
 
 #ifdef CONFIG_ZONE_DMA32
+	if (!zone)
+		return alloc_flags;
+
 	if (zone_idx(zone) != ZONE_NORMAL)
 		goto out;
 
-- 
GitLab


From 8118b82eb756e271929697e8ada5f637dc443af1 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Thu, 25 Apr 2019 22:24:01 -0700
Subject: [PATCH 1611/1861] mm/page_alloc.c: fix never set ALLOC_NOFRAGMENT
 flag

Commit 0a79cdad5eb2 ("mm: use alloc_flags to record if kswapd can wake")
removed setting of the ALLOC_NOFRAGMENT flag.  Bring it back.

The runtime effect is that ALLOC_NOFRAGMENT behaviour is restored so
that allocations are spread across local zones to avoid fragmentation
due to mixing pageblocks as long as possible.

Link: http://lkml.kernel.org/r/20190423120806.3503-2-aryabinin@virtuozzo.com
Fixes: 0a79cdad5eb2 ("mm: use alloc_flags to record if kswapd can wake")
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9992ca7f29f1e..c02cff1ed56eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3436,7 +3436,7 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
 		return alloc_flags;
 
 	if (zone_idx(zone) != ZONE_NORMAL)
-		goto out;
+		return alloc_flags;
 
 	/*
 	 * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and
@@ -3445,9 +3445,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
 	 */
 	BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
 	if (nr_online_nodes > 1 && !populated_zone(--zone))
-		goto out;
+		return alloc_flags;
 
-out:
+	alloc_flags |= ALLOC_NOFRAGMENT;
 #endif /* CONFIG_ZONE_DMA32 */
 	return alloc_flags;
 }
-- 
GitLab


From 89189557b47b35683a27c80ee78aef18248eefb4 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 25 Apr 2019 22:24:05 -0700
Subject: [PATCH 1612/1861] fs/proc/proc_sysctl.c: Fix a NULL pointer
 dereference

Syzkaller report this:

  sysctl could not get directory: /net//bridge -12
  kasan: CONFIG_KASAN_INLINE enabled
  kasan: GPF could be caused by NULL-ptr deref or user memory access
  general protection fault: 0000 [#1] SMP KASAN PTI
  CPU: 1 PID: 7027 Comm: syz-executor.0 Tainted: G         C        5.1.0-rc3+ #8
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
  RIP: 0010:__write_once_size include/linux/compiler.h:220 [inline]
  RIP: 0010:__rb_change_child include/linux/rbtree_augmented.h:144 [inline]
  RIP: 0010:__rb_erase_augmented include/linux/rbtree_augmented.h:186 [inline]
  RIP: 0010:rb_erase+0x5f4/0x19f0 lib/rbtree.c:459
  Code: 00 0f 85 60 13 00 00 48 89 1a 48 83 c4 18 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 f2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 75 0c 00 00 4d 85 ed 4c 89 2e 74 ce 4c 89 ea 48
  RSP: 0018:ffff8881bb507778 EFLAGS: 00010206
  RAX: dffffc0000000000 RBX: ffff8881f224b5b8 RCX: ffffffff818f3f6a
  RDX: 000000000000000a RSI: 0000000000000050 RDI: ffff8881f224b568
  RBP: 0000000000000000 R08: ffffed10376a0ef4 R09: ffffed10376a0ef4
  R10: 0000000000000001 R11: ffffed10376a0ef4 R12: ffff8881f224b558
  R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
  FS:  00007f3e7ce13700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fd60fbe9398 CR3: 00000001cb55c001 CR4: 00000000007606e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  PKRU: 55555554
  Call Trace:
   erase_entry fs/proc/proc_sysctl.c:178 [inline]
   erase_header+0xe3/0x160 fs/proc/proc_sysctl.c:207
   start_unregistering fs/proc/proc_sysctl.c:331 [inline]
   drop_sysctl_table+0x558/0x880 fs/proc/proc_sysctl.c:1631
   get_subdir fs/proc/proc_sysctl.c:1022 [inline]
   __register_sysctl_table+0xd65/0x1090 fs/proc/proc_sysctl.c:1335
   br_netfilter_init+0x68/0x1000 [br_netfilter]
   do_one_initcall+0xbc/0x47d init/main.c:901
   do_init_module+0x1b5/0x547 kernel/module.c:3456
   load_module+0x6405/0x8c10 kernel/module.c:3804
   __do_sys_finit_module+0x162/0x190 kernel/module.c:3898
   do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290
   entry_SYSCALL_64_after_hwframe+0x49/0xbe
  Modules linked in: br_netfilter(+) backlight comedi(C) hid_sensor_hub max3100 ti_ads8688 udc_core fddi snd_mona leds_gpio rc_streamzap mtd pata_netcell nf_log_common rc_winfast udp_tunnel snd_usbmidi_lib snd_usb_toneport snd_usb_line6 snd_rawmidi snd_seq_device snd_hwdep videobuf2_v4l2 videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops rc_gadmei_rm008z 8250_of smm665 hid_tmff hid_saitek hwmon_vid rc_ati_tv_wonder_hd_600 rc_core pata_pdc202xx_old dn_rtmsg as3722 ad714x_i2c ad714x snd_soc_cs4265 hid_kensington panel_ilitek_ili9322 drm drm_panel_orientation_quirks ipack cdc_phonet usbcore phonet hid_jabra hid extcon_arizona can_dev industrialio_triggered_buffer kfifo_buf industrialio adm1031 i2c_mux_ltc4306 i2c_mux ipmi_msghandler mlxsw_core snd_soc_cs35l34 snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer ac97_bus snd_compress snd soundcore gpio_da9055 uio ecdh_generic mdio_thunder of_mdio fixed_phy libphy mdio_cavium iptable_security iptable_raw iptable_mangle
   iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev tpm kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel ide_pci_generic piix aes_x86_64 crypto_simd cryptd ide_core glue_helper input_leds psmouse intel_agp intel_gtt serio_raw ata_generic i2c_piix4 agpgart pata_acpi parport_pc parport floppy rtc_cmos sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: br_netfilter]
  Dumping ftrace buffer:
     (ftrace buffer empty)
  ---[ end trace 68741688d5fbfe85 ]---

commit 23da9588037e ("fs/proc/proc_sysctl.c: fix NULL pointer
dereference in put_links") forgot to handle start_unregistering() case,
while header->parent is NULL, it calls erase_header() and as seen in the
above syzkaller call trace, accessing &header->parent->root will trigger
a NULL pointer dereference.

As that commit explained, there is also no need to call
start_unregistering() if header->parent is NULL.

Link: http://lkml.kernel.org/r/20190409153622.28112-1-yuehaibing@huawei.com
Fixes: 23da9588037e ("fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links")
Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between sysctl_table_sets")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reported-by: Hulk Robot <hulkci@huawei.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_sysctl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d653907275419..7325baa8f9d47 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header)
 	if (--header->nreg)
 		return;
 
-	if (parent)
+	if (parent) {
 		put_links(header);
-	start_unregistering(header);
+		start_unregistering(header);
+	}
+
 	if (!--header->count)
 		kfree_rcu(header, rcu);
 
-- 
GitLab


From 3a349763cf11e63534b8f2d302f2d0c790566497 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 26 Apr 2019 17:22:01 -0700
Subject: [PATCH 1613/1861] Input: synaptics-rmi4 - write config register
 values to the right offset

Currently any changed config register values don't take effect, as the
function to write them back is called with the wrong register offset.

Fixes: ff8f83708b3e (Input: synaptics-rmi4 - add support for 2D
                     sensors and F11)
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_f11.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index df64d6aed4f7e..93901ebd122a5 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1230,7 +1230,7 @@ static int rmi_f11_initialize(struct rmi_function *fn)
 	}
 
 	rc = f11_write_control_regs(fn, &f11->sens_query,
-			   &f11->dev_controls, fn->fd.query_base_addr);
+			   &f11->dev_controls, fn->fd.control_base_addr);
 	if (rc)
 		dev_warn(&fn->dev, "Failed to write control registers\n");
 
-- 
GitLab


From baf76f0c58aec435a3a864075b8f6d8ee5d1f17e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 25 Apr 2019 16:13:58 -0700
Subject: [PATCH 1614/1861] slip: make slhc_free() silently accept an error
 pointer

This way, slhc_free() accepts what slhc_init() returns, whether that is
an error or not.

In particular, the pattern in sl_alloc_bufs() is

        slcomp = slhc_init(16, 16);
        ...
        slhc_free(slcomp);

for the error handling path, and rather than complicate that code, just
make it ok to always free what was returned by the init function.

That's what the code used to do before commit 4ab42d78e37a ("ppp, slip:
Validate VJ compression slot parameters completely") when slhc_init()
just returned NULL for the error case, with no actual indication of the
details of the error.

Reported-by: syzbot+45474c076a4927533d2e@syzkaller.appspotmail.com
Fixes: 4ab42d78e37a ("ppp, slip: Validate VJ compression slot parameters completely")
Acked-by: Ben Hutchings <ben@decadent.org.uk>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/slip/slhc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
index f4e93f5fc2043..ea90db3c77058 100644
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -153,7 +153,7 @@ out_fail:
 void
 slhc_free(struct slcompress *comp)
 {
-	if ( comp == NULLSLCOMPR )
+	if ( IS_ERR_OR_NULL(comp) )
 		return;
 
 	if ( comp->tstate != NULLSLSTATE )
-- 
GitLab


From f2fde6a5bcfcfcfca8eef59666b20454da055ad7 Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Fri, 26 Apr 2019 17:23:58 -0700
Subject: [PATCH 1615/1861] KVM: VMX: Move RSB stuffing to before the first RET
 after VM-Exit

The not-so-recent change to move VMX's VM-Exit handing to a dedicated
"function" unintentionally exposed KVM to a speculative attack from the
guest by executing a RET prior to stuffing the RSB.  Make RSB stuffing
happen immediately after VM-Exit, before any unpaired returns.

Alternatively, the VM-Exit path could postpone full RSB stuffing until
its current location by stuffing the RSB only as needed, or by avoiding
returns in the VM-Exit path entirely, but both alternatives are beyond
ugly since vmx_vmexit() has multiple indirect callers (by way of
vmx_vmenter()).  And putting the RSB stuffing immediately after VM-Exit
makes it much less likely to be re-broken in the future.

Note, the cost of PUSH/POP could be avoided in the normal flow by
pairing the PUSH RAX with the POP RAX in __vmx_vcpu_run() and adding an
a POP to nested_vmx_check_vmentry_hw(), but such a weird/subtle
dependency is likely to cause problems in the long run, and PUSH/POP
will take all of a few cycles, which is peanuts compared to the number
of cycles required to fill the RSB.

Fixes: 453eafbe65f7 ("KVM: VMX: Move VM-Enter + VM-Exit handling to non-inline sub-routines")
Reported-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmenter.S | 12 ++++++++++++
 arch/x86/kvm/vmx/vmx.c     |  3 ---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 7b272738c5768..d4cb1945b2e3b 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -3,6 +3,7 @@
 #include <asm/asm.h>
 #include <asm/bitsperlong.h>
 #include <asm/kvm_vcpu_regs.h>
+#include <asm/nospec-branch.h>
 
 #define WORD_SIZE (BITS_PER_LONG / 8)
 
@@ -77,6 +78,17 @@ ENDPROC(vmx_vmenter)
  * referred to by VMCS.HOST_RIP.
  */
 ENTRY(vmx_vmexit)
+#ifdef CONFIG_RETPOLINE
+	ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
+	/* Preserve guest's RAX, it's used to stuff the RSB. */
+	push %_ASM_AX
+
+	/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+	FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+
+	pop %_ASM_AX
+.Lvmexit_skip_rsb:
+#endif
 	ret
 ENDPROC(vmx_vmexit)
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bcdd69d90a8c3..0c955bb286fff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6462,9 +6462,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
 
-	/* Eliminate branch target predictions from guest mode */
-	vmexit_fill_RSB();
-
 	/* All fields are clean at this point */
 	if (static_branch_unlikely(&enable_evmcs))
 		current_evmcs->hv_clean_fields |=
-- 
GitLab


From b4e30e8e7ea1d1e35ffd64ca46f7d9a7f227b4bf Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 25 Apr 2019 22:31:50 -0400
Subject: [PATCH 1616/1861] bnxt_en: Improve multicast address setup logic.

The driver builds a list of multicast addresses and sends it to the
firmware when the driver's ndo_set_rx_mode() is called.  In rare
cases, the firmware can fail this call if internal resources to
add multicast addresses are exhausted.  In that case, we should
try the call again by setting the ALL_MCAST flag which is more
guaranteed to succeed.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4c586ba4364ba..42fd273d457ee 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8961,8 +8961,15 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
 
 skip_uc:
 	rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+	if (rc && vnic->mc_list_count) {
+		netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
+			    rc);
+		vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
+		vnic->mc_list_count = 0;
+		rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+	}
 	if (rc)
-		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n",
+		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %d\n",
 			   rc);
 
 	return rc;
-- 
GitLab


From f9099d611449836a51a65f40ea7dc9cb5f2f665e Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Thu, 25 Apr 2019 22:31:51 -0400
Subject: [PATCH 1617/1861] bnxt_en: Free short FW command HWRM memory in error
 path in bnxt_init_one()

In the bnxt_init_one() error path, short FW command request memory
is not freed. This patch fixes it.

Fixes: e605db801bde ("bnxt_en: Support for Short Firmware Message")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 42fd273d457ee..5d02f5999df76 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10692,6 +10692,7 @@ init_err_cleanup_tc:
 	bnxt_clear_int_mode(bp);
 
 init_err_pci_clean:
+	bnxt_free_hwrm_short_cmd_req(bp);
 	bnxt_free_hwrm_resources(bp);
 	bnxt_free_ctx_mem(bp);
 	kfree(bp->ctx);
-- 
GitLab


From 1f83391bd6fc48f92f627b0ec0bce686d100c6a5 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 25 Apr 2019 22:31:52 -0400
Subject: [PATCH 1618/1861] bnxt_en: Fix possible crash in
 bnxt_hwrm_ring_free() under error conditions.

If we encounter errors during open and proceed to clean up,
bnxt_hwrm_ring_free() may crash if the rings we try to free have never
been allocated.  bnxt_cp_ring_for_rx() or bnxt_cp_ring_for_tx()
may reference pointers that have not been allocated.

Fix it by checking for valid fw_ring_id first before calling
bnxt_cp_ring_for_rx() or bnxt_cp_ring_for_tx().

Fixes: 2c61d2117ecb ("bnxt_en: Add helper functions to get firmware CP ring ID.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5d02f5999df76..b03669f67a0c5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5135,10 +5135,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
 	for (i = 0; i < bp->tx_nr_rings; i++) {
 		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
 		struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
-		u32 cmpl_ring_id;
 
-		cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr);
 		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+			u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr);
+
 			hwrm_ring_free_send_msg(bp, ring,
 						RING_FREE_REQ_RING_TYPE_TX,
 						close_path ? cmpl_ring_id :
@@ -5151,10 +5151,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 		struct bnxt_ring_struct *ring = &rxr->rx_ring_struct;
 		u32 grp_idx = rxr->bnapi->index;
-		u32 cmpl_ring_id;
 
-		cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
 		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+			u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+
 			hwrm_ring_free_send_msg(bp, ring,
 						RING_FREE_REQ_RING_TYPE_RX,
 						close_path ? cmpl_ring_id :
@@ -5173,10 +5173,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 		struct bnxt_ring_struct *ring = &rxr->rx_agg_ring_struct;
 		u32 grp_idx = rxr->bnapi->index;
-		u32 cmpl_ring_id;
 
-		cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
 		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+			u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+
 			hwrm_ring_free_send_msg(bp, ring, type,
 						close_path ? cmpl_ring_id :
 						INVALID_HW_RING_ID);
-- 
GitLab


From ad361adf0d08f1135f3845c6b3a36be7cc0bfda5 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 25 Apr 2019 22:31:53 -0400
Subject: [PATCH 1619/1861] bnxt_en: Pass correct extended TX port statistics
 size to firmware.

If driver determines that extended TX port statistics are not supported
or allocation of the data structure fails, make sure to pass 0 TX stats
size to firmware to disable it.  The firmware returned TX stats size should
also be set to 0 for consistency.  This will prevent
bnxt_get_ethtool_stats() from accessing the NULL TX stats pointer in
case there is mismatch between firmware and driver.

Fixes: 36e53349b60b ("bnxt_en: Add additional extended port statistics.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b03669f67a0c5..a9172b2feb1d1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6753,6 +6753,7 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
 	struct hwrm_queue_pri2cos_qcfg_input req2 = {0};
 	struct hwrm_port_qstats_ext_input req = {0};
 	struct bnxt_pf_info *pf = &bp->pf;
+	u32 tx_stat_size;
 	int rc;
 
 	if (!(bp->flags & BNXT_FLAG_PORT_STATS_EXT))
@@ -6762,13 +6763,16 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
 	req.port_id = cpu_to_le16(pf->port_id);
 	req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
 	req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_ext_map);
-	req.tx_stat_size = cpu_to_le16(sizeof(struct tx_port_stats_ext));
+	tx_stat_size = bp->hw_tx_port_stats_ext ?
+		       sizeof(*bp->hw_tx_port_stats_ext) : 0;
+	req.tx_stat_size = cpu_to_le16(tx_stat_size);
 	req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_ext_map);
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc) {
 		bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8;
-		bp->fw_tx_stats_ext_size = le16_to_cpu(resp->tx_stat_size) / 8;
+		bp->fw_tx_stats_ext_size = tx_stat_size ?
+			le16_to_cpu(resp->tx_stat_size) / 8 : 0;
 	} else {
 		bp->fw_rx_stats_ext_size = 0;
 		bp->fw_tx_stats_ext_size = 0;
-- 
GitLab


From 3f93cd3f098e284c851acb89265ebe35b994a5c8 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 25 Apr 2019 22:31:54 -0400
Subject: [PATCH 1620/1861] bnxt_en: Fix statistics context reservation logic.

In an earlier commit that fixes the number of stats contexts to
reserve for the RDMA driver, we added a function parameter to pass in
the number of stats contexts to all the relevant functions.  The passed
in parameter should have been used to set the enables field of the
firmware message.

Fixes: 780baad44f0f ("bnxt_en: Reserve 1 stat_ctx for RDMA driver.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a9172b2feb1d1..b6cb7b8ceab97 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5315,17 +5315,16 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
 	req->num_tx_rings = cpu_to_le16(tx_rings);
 	if (BNXT_NEW_RM(bp)) {
 		enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
+		enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
 		if (bp->flags & BNXT_FLAG_CHIP_P5) {
 			enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0;
 			enables |= tx_rings + ring_grps ?
-				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-				   FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
 			enables |= rx_rings ?
 				FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
 		} else {
 			enables |= cp_rings ?
-				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-				   FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
 			enables |= ring_grps ?
 				   FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
 				   FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
@@ -5365,14 +5364,13 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
 	enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
 	enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
 			      FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
+	enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
 	if (bp->flags & BNXT_FLAG_CHIP_P5) {
 		enables |= tx_rings + ring_grps ?
-			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-			   FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
 	} else {
 		enables |= cp_rings ?
-			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-			   FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
 		enables |= ring_grps ?
 			   FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
 	}
-- 
GitLab


From 0b397b17a4120cb80f7bf89eb30587b3dd9b0d1d Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 25 Apr 2019 22:31:55 -0400
Subject: [PATCH 1621/1861] bnxt_en: Fix uninitialized variable usage in
 bnxt_rx_pkt().

In bnxt_rx_pkt(), if the driver encounters BD errors, it will recycle
the buffers and jump to the end where the uninitailized variable "len"
is referenced.  Fix it by adding a new jump label that will skip
the length update.  This is the most correct fix since the length
may not be valid when we get this type of error.

Fixes: 6a8788f25625 ("bnxt_en: add support for software dynamic interrupt moderation")
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b6cb7b8ceab97..52ade133b57cf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1625,7 +1625,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 			netdev_warn(bp->dev, "RX buffer error %x\n", rx_err);
 			bnxt_sched_reset(bp, rxr);
 		}
-		goto next_rx;
+		goto next_rx_no_len;
 	}
 
 	len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT;
@@ -1706,12 +1706,13 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	rc = 1;
 
 next_rx:
-	rxr->rx_prod = NEXT_RX(prod);
-	rxr->rx_next_cons = NEXT_RX(cons);
-
 	cpr->rx_packets += 1;
 	cpr->rx_bytes += len;
 
+next_rx_no_len:
+	rxr->rx_prod = NEXT_RX(prod);
+	rxr->rx_next_cons = NEXT_RX(cons);
+
 next_rx_no_prod_no_len:
 	*raw_cons = tmp_raw_cons;
 
-- 
GitLab


From 97e1caa517e22d62a283b876fb8aa5f4672c83dd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 25 Apr 2019 17:35:09 -0700
Subject: [PATCH 1622/1861] net/tls: don't copy negative amounts of data in
 reencrypt

There is no guarantee the record starts before the skb frags.
If we don't check for this condition copy amount will get
negative, leading to reads and writes to random memory locations.
Familiar hilarity ensues.

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index cc0256939eb63..96357060addc5 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -628,14 +628,16 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
 	else
 		err = 0;
 
-	copy = min_t(int, skb_pagelen(skb) - offset,
-		     rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+	if (skb_pagelen(skb) > offset) {
+		copy = min_t(int, skb_pagelen(skb) - offset,
+			     rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
 
-	if (skb->decrypted)
-		skb_store_bits(skb, offset, buf, copy);
+		if (skb->decrypted)
+			skb_store_bits(skb, offset, buf, copy);
 
-	offset += copy;
-	buf += copy;
+		offset += copy;
+		buf += copy;
+	}
 
 	skb_walk_frags(skb, skb_iter) {
 		copy = min_t(int, skb_iter->len,
-- 
GitLab


From eb3d38d5adb520435d4e4af32529ccb13ccc9935 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 25 Apr 2019 17:35:10 -0700
Subject: [PATCH 1623/1861] net/tls: fix copy to fragments in reencrypt

Fragments may contain data from other records so we have to account
for that when we calculate the destination and max length of copy we
can perform.  Note that 'offset' is the offset within the message,
so it can't be passed as offset within the frag..

Here skb_store_bits() would have realised the call is wrong and
simply not copy data.

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 96357060addc5..14dedb24fa7b6 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -597,7 +597,7 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
 static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
 {
 	struct strp_msg *rxm = strp_msg(skb);
-	int err = 0, offset = rxm->offset, copy, nsg;
+	int err = 0, offset = rxm->offset, copy, nsg, data_len, pos;
 	struct sk_buff *skb_iter, *unused;
 	struct scatterlist sg[1];
 	char *orig_buf, *buf;
@@ -628,9 +628,10 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
 	else
 		err = 0;
 
+	data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+
 	if (skb_pagelen(skb) > offset) {
-		copy = min_t(int, skb_pagelen(skb) - offset,
-			     rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+		copy = min_t(int, skb_pagelen(skb) - offset, data_len);
 
 		if (skb->decrypted)
 			skb_store_bits(skb, offset, buf, copy);
@@ -639,16 +640,30 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
 		buf += copy;
 	}
 
+	pos = skb_pagelen(skb);
 	skb_walk_frags(skb, skb_iter) {
-		copy = min_t(int, skb_iter->len,
-			     rxm->full_len - offset + rxm->offset -
-			     TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+		int frag_pos;
+
+		/* Practically all frags must belong to msg if reencrypt
+		 * is needed with current strparser and coalescing logic,
+		 * but strparser may "get optimized", so let's be safe.
+		 */
+		if (pos + skb_iter->len <= offset)
+			goto done_with_frag;
+		if (pos >= data_len + rxm->offset)
+			break;
+
+		frag_pos = offset - pos;
+		copy = min_t(int, skb_iter->len - frag_pos,
+			     data_len + rxm->offset - offset);
 
 		if (skb_iter->decrypted)
-			skb_store_bits(skb_iter, offset, buf, copy);
+			skb_store_bits(skb_iter, frag_pos, buf, copy);
 
 		offset += copy;
 		buf += copy;
+done_with_frag:
+		pos += skb_iter->len;
 	}
 
 free_buf:
-- 
GitLab


From 21f1b8a6636c4dbde4aa1ec0343f42eaf653ffcc Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 26 Apr 2019 12:50:44 +0200
Subject: [PATCH 1624/1861] udp: fix GRO reception in case of length mismatch

Currently, the UDP GRO code path does bad things on some edge
conditions - Aggregation can happen even on packet with different
lengths.

Fix the above by rewriting the 'complete' condition for GRO
packets. While at it, note explicitly that we allow merging the
first packet per burst below gso_size.

Reported-by: Sean Tong <seantong114@gmail.com>
Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_offload.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 64f9715173ac8..d8776b2110c10 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -377,13 +377,14 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 
 		/* Terminate the flow on len mismatch or if it grow "too much".
 		 * Under small packet flood GRO count could elsewhere grow a lot
-		 * leading to execessive truesize values
+		 * leading to execessive truesize values.
+		 * On len mismatch merge the first packet shorter than gso_size,
+		 * otherwise complete the GRO packet.
 		 */
-		if (!skb_gro_receive(p, skb) &&
+		if (uh->len > uh2->len || skb_gro_receive(p, skb) ||
+		    uh->len != uh2->len ||
 		    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
 			pp = p;
-		else if (uh->len != uh2->len)
-			pp = p;
 
 		return pp;
 	}
-- 
GitLab


From 0700d3d117a7f110ddddbd83873e13652f69c54b Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Fri, 26 Apr 2019 16:13:54 +0800
Subject: [PATCH 1625/1861] ALSA: hda/realtek - Fixed Dell AIO speaker noise

Fixed Dell AIO speaker noise.
spec->gen.auto_mute_via_amp = 1, this option was solved speaker white
noise at boot.
codec->power_save_node = 0, this option was solved speaker noise at
resume back.

Fixes: 9226665159f0 ("ALSA: hda/realtek - Fix Dell AIO LineOut issue")
Signed-off-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 070749b50eff8..7da7fa4f42016 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5450,6 +5450,8 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec,
 		return;
 
 	spec->gen.preferred_dacs = preferred_pairs;
+	spec->gen.auto_mute_via_amp = 1;
+	codec->power_save_node = 0;
 }
 
 /* The DAC of NID 0x3 will introduce click/pop noise on headphones, so invalidate it */
-- 
GitLab


From de1887c064b9996ac03120d90d0a909a3f678f98 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Tue, 16 Apr 2019 12:57:21 +0300
Subject: [PATCH 1626/1861] iwlwifi: mvm: check for length correctness in
 iwl_mvm_create_skb()

We don't check for the validity of the lengths in the packet received
from the firmware.  If the MPDU length received in the rx descriptor
is too short to contain the header length and the crypt length
together, we may end up trying to copy a negative number of bytes
(headlen - hdrlen < 0) which will underflow and cause us to try to
copy a huge amount of data.  This causes oopses such as this one:

BUG: unable to handle kernel paging request at ffff896be2970000
PGD 5e201067 P4D 5e201067 PUD 5e205067 PMD 16110d063 PTE 8000000162970161
Oops: 0003 [#1] PREEMPT SMP NOPTI
CPU: 2 PID: 1824 Comm: irq/134-iwlwifi Not tainted 4.19.33-04308-geea41cf4930f #1
Hardware name: [...]
RIP: 0010:memcpy_erms+0x6/0x10
Code: 90 90 90 90 eb 1e 0f 1f 00 48 89 f8 48 89 d1 48 c1 e9 03 83 e2 07 f3 48 a5 89 d1 f3 a4 c3 66 0f 1f 44 00 00 48 89 f8 48 89 d1 <f3> a4 c3
 0f 1f 80 00 00 00 00 48 89 f8 48 83 fa 20 72 7e 40 38 fe
RSP: 0018:ffffa4630196fc60 EFLAGS: 00010287
RAX: ffff896be2924618 RBX: ffff896bc8ecc600 RCX: 00000000fffb4610
RDX: 00000000fffffff8 RSI: ffff896a835e2a38 RDI: ffff896be2970000
RBP: ffffa4630196fd30 R08: ffff896bc8ecc600 R09: ffff896a83597000
R10: ffff896bd6998400 R11: 000000000200407f R12: ffff896a83597050
R13: 00000000fffffff8 R14: 0000000000000010 R15: ffff896a83597038
FS:  0000000000000000(0000) GS:ffff896be8280000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff896be2970000 CR3: 000000005dc12002 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 iwl_mvm_rx_mpdu_mq+0xb51/0x121b [iwlmvm]
 iwl_pcie_rx_handle+0x58c/0xa89 [iwlwifi]
 iwl_pcie_irq_rx_msix_handler+0xd9/0x12a [iwlwifi]
 irq_thread_fn+0x24/0x49
 irq_thread+0xb0/0x122
 kthread+0x138/0x140
 ret_from_fork+0x1f/0x40

Fix that by checking the lengths for correctness and trigger a warning
to show that we have received wrong data.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 28 ++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 1e03acf30762d..b516fd1867ecf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -169,9 +169,9 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
 }
 
 /* iwl_mvm_create_skb Adds the rxb to a new skb */
-static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
-			       u16 len, u8 crypt_len,
-			       struct iwl_rx_cmd_buffer *rxb)
+static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb,
+			      struct ieee80211_hdr *hdr, u16 len, u8 crypt_len,
+			      struct iwl_rx_cmd_buffer *rxb)
 {
 	struct iwl_rx_packet *pkt = rxb_addr(rxb);
 	struct iwl_rx_mpdu_desc *desc = (void *)pkt->data;
@@ -204,6 +204,20 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
 	 * present before copying packet data.
 	 */
 	hdrlen += crypt_len;
+
+	if (WARN_ONCE(headlen < hdrlen,
+		      "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n",
+		      hdrlen, len, crypt_len)) {
+		/*
+		 * We warn and trace because we want to be able to see
+		 * it in trace-cmd as well.
+		 */
+		IWL_DEBUG_RX(mvm,
+			     "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n",
+			     hdrlen, len, crypt_len);
+		return -EINVAL;
+	}
+
 	skb_put_data(skb, hdr, hdrlen);
 	skb_put_data(skb, (u8 *)hdr + hdrlen + pad_len, headlen - hdrlen);
 
@@ -216,6 +230,8 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
 		skb_add_rx_frag(skb, 0, rxb_steal_page(rxb), offset,
 				fraglen, rxb->truesize);
 	}
+
+	return 0;
 }
 
 static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
@@ -1671,7 +1687,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 			rx_status->boottime_ns = ktime_get_boot_ns();
 	}
 
-	iwl_mvm_create_skb(skb, hdr, len, crypt_len, rxb);
+	if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) {
+		kfree_skb(skb);
+		goto out;
+	}
+
 	if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc))
 		iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue,
 						sta, csi);
-- 
GitLab


From 5c9adef9789148d382d7d1307c3d6bfaf51d143d Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 21 Apr 2019 17:58:11 +0300
Subject: [PATCH 1627/1861] iwlwifi: fix driver operation for 5350

We introduced a bug that prevented this old device from
working. The driver would simply not be able to complete
the INIT flow while spewing this warning:

 CSR addresses aren't configured
 WARNING: CPU: 0 PID: 819 at drivers/net/wireless/intel/iwlwifi/pcie/drv.c:917
 iwl_pci_probe+0x160/0x1e0 [iwlwifi]

Cc: stable@vger.kernel.org # v4.18+
Fixes: a8cbb46f831d ("iwlwifi: allow different csr flags for different device families")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Fixes: c8f1b51e506d ("iwlwifi: allow different csr flags for different device families")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/5000.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/5000.c b/drivers/net/wireless/intel/iwlwifi/cfg/5000.c
index 575a7022d045b..3846064d51a5a 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/5000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/5000.c
@@ -1,7 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -136,6 +136,7 @@ const struct iwl_cfg iwl5350_agn_cfg = {
 	.ht_params = &iwl5000_ht_params,
 	.led_mode = IWL_LED_BLINK,
 	.internal_wimax_coex = true,
+	.csr = &iwl_csr_v1,
 };
 
 #define IWL_DEVICE_5150						\
-- 
GitLab


From d156e67d3f58c5d3c7ebe1bec80657db534f32d4 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Thu, 25 Apr 2019 10:03:34 +0300
Subject: [PATCH 1628/1861] iwlwifi: mvm: fix merge damage in
 iwl_mvm_vif_dbgfs_register()

When I rebased Greg's patch, I accidentally left the old if block that
was already there.  Remove it.

Fixes: 154d4899e411 ("iwlwifi: mvm: properly check debugfs dentry before using it")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 738eddb2e7acb..6925527d8457a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -780,12 +780,6 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 		return;
 	}
 
-	if (!mvmvif->dbgfs_dir) {
-		IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n",
-			dbgfs_dir);
-		return;
-	}
-
 	if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM &&
 	    ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) ||
 	     (vif->type == NL80211_IFTYPE_STATION && vif->p2p)))
-- 
GitLab


From e5c812e84f0dece3400d5caf42522287e6ef139f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 28 Apr 2019 18:04:11 +0200
Subject: [PATCH 1629/1861] ALSA: line6: use dynamic buffers

The line6 driver uses a lot of USB buffers off of the stack, which is
not allowed on many systems, causing the driver to crash on some of
them.  Fix this up by dynamically allocating the buffers with kmalloc()
which allows for proper DMA-able memory.

Reported-by: Christo Gouws <gouws.christo@gmail.com>
Reported-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Christo Gouws <gouws.christo@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/line6/driver.c   | 60 ++++++++++++++++++++++----------------
 sound/usb/line6/podhd.c    | 21 +++++++------
 sound/usb/line6/toneport.c | 24 +++++++++++----
 3 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index 7afe8fae49391..b61f65bed4e48 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -351,12 +351,16 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
 {
 	struct usb_device *usbdev = line6->usbdev;
 	int ret;
-	unsigned char len;
+	unsigned char *len;
 	unsigned count;
 
 	if (address > 0xffff || datalen > 0xff)
 		return -EINVAL;
 
+	len = kmalloc(sizeof(*len), GFP_KERNEL);
+	if (!len)
+		return -ENOMEM;
+
 	/* query the serial number: */
 	ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67,
 			      USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
@@ -365,7 +369,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
 
 	if (ret < 0) {
 		dev_err(line6->ifcdev, "read request failed (error %d)\n", ret);
-		return ret;
+		goto exit;
 	}
 
 	/* Wait for data length. We'll get 0xff until length arrives. */
@@ -375,28 +379,29 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
 		ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), 0x67,
 				      USB_TYPE_VENDOR | USB_RECIP_DEVICE |
 				      USB_DIR_IN,
-				      0x0012, 0x0000, &len, 1,
+				      0x0012, 0x0000, len, 1,
 				      LINE6_TIMEOUT * HZ);
 		if (ret < 0) {
 			dev_err(line6->ifcdev,
 				"receive length failed (error %d)\n", ret);
-			return ret;
+			goto exit;
 		}
 
-		if (len != 0xff)
+		if (*len != 0xff)
 			break;
 	}
 
-	if (len == 0xff) {
+	ret = -EIO;
+	if (*len == 0xff) {
 		dev_err(line6->ifcdev, "read failed after %d retries\n",
 			count);
-		return -EIO;
-	} else if (len != datalen) {
+		goto exit;
+	} else if (*len != datalen) {
 		/* should be equal or something went wrong */
 		dev_err(line6->ifcdev,
 			"length mismatch (expected %d, got %d)\n",
-			(int)datalen, (int)len);
-		return -EIO;
+			(int)datalen, (int)*len);
+		goto exit;
 	}
 
 	/* receive the result: */
@@ -405,12 +410,12 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
 			      0x0013, 0x0000, data, datalen,
 			      LINE6_TIMEOUT * HZ);
 
-	if (ret < 0) {
+	if (ret < 0)
 		dev_err(line6->ifcdev, "read failed (error %d)\n", ret);
-		return ret;
-	}
 
-	return 0;
+exit:
+	kfree(len);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(line6_read_data);
 
@@ -422,12 +427,16 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data,
 {
 	struct usb_device *usbdev = line6->usbdev;
 	int ret;
-	unsigned char status;
+	unsigned char *status;
 	int count;
 
 	if (address > 0xffff || datalen > 0xffff)
 		return -EINVAL;
 
+	status = kmalloc(sizeof(*status), GFP_KERNEL);
+	if (!status)
+		return -ENOMEM;
+
 	ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67,
 			      USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
 			      0x0022, address, data, datalen,
@@ -436,7 +445,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data,
 	if (ret < 0) {
 		dev_err(line6->ifcdev,
 			"write request failed (error %d)\n", ret);
-		return ret;
+		goto exit;
 	}
 
 	for (count = 0; count < LINE6_READ_WRITE_MAX_RETRIES; count++) {
@@ -447,28 +456,29 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data,
 				      USB_TYPE_VENDOR | USB_RECIP_DEVICE |
 				      USB_DIR_IN,
 				      0x0012, 0x0000,
-				      &status, 1, LINE6_TIMEOUT * HZ);
+				      status, 1, LINE6_TIMEOUT * HZ);
 
 		if (ret < 0) {
 			dev_err(line6->ifcdev,
 				"receiving status failed (error %d)\n", ret);
-			return ret;
+			goto exit;
 		}
 
-		if (status != 0xff)
+		if (*status != 0xff)
 			break;
 	}
 
-	if (status == 0xff) {
+	if (*status == 0xff) {
 		dev_err(line6->ifcdev, "write failed after %d retries\n",
 			count);
-		return -EIO;
-	} else if (status != 0) {
+		ret = -EIO;
+	} else if (*status != 0) {
 		dev_err(line6->ifcdev, "write failed (error %d)\n", ret);
-		return -EIO;
+		ret = -EIO;
 	}
-
-	return 0;
+exit:
+	kfree(status);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(line6_write_data);
 
diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c
index 36ed9c85c0eb2..5f3c87264e667 100644
--- a/sound/usb/line6/podhd.c
+++ b/sound/usb/line6/podhd.c
@@ -225,28 +225,32 @@ static void podhd_startup_start_workqueue(struct timer_list *t)
 static int podhd_dev_start(struct usb_line6_podhd *pod)
 {
 	int ret;
-	u8 init_bytes[8];
+	u8 *init_bytes;
 	int i;
 	struct usb_device *usbdev = pod->line6.usbdev;
 
+	init_bytes = kmalloc(8, GFP_KERNEL);
+	if (!init_bytes)
+		return -ENOMEM;
+
 	ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0),
 					0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
 					0x11, 0,
 					NULL, 0, LINE6_TIMEOUT * HZ);
 	if (ret < 0) {
 		dev_err(pod->line6.ifcdev, "read request failed (error %d)\n", ret);
-		return ret;
+		goto exit;
 	}
 
 	/* NOTE: looks like some kind of ping message */
 	ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), 0x67,
 					USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
 					0x11, 0x0,
-					&init_bytes, 3, LINE6_TIMEOUT * HZ);
+					init_bytes, 3, LINE6_TIMEOUT * HZ);
 	if (ret < 0) {
 		dev_err(pod->line6.ifcdev,
 			"receive length failed (error %d)\n", ret);
-		return ret;
+		goto exit;
 	}
 
 	pod->firmware_version =
@@ -255,7 +259,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod)
 	for (i = 0; i <= 16; i++) {
 		ret = line6_read_data(&pod->line6, 0xf000 + 0x08 * i, init_bytes, 8);
 		if (ret < 0)
-			return ret;
+			goto exit;
 	}
 
 	ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0),
@@ -263,10 +267,9 @@ static int podhd_dev_start(struct usb_line6_podhd *pod)
 					USB_TYPE_STANDARD | USB_RECIP_DEVICE | USB_DIR_OUT,
 					1, 0,
 					NULL, 0, LINE6_TIMEOUT * HZ);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+exit:
+	kfree(init_bytes);
+	return ret;
 }
 
 static void podhd_startup_workqueue(struct work_struct *work)
diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c
index f47ba94e6f4a1..19bee725de00d 100644
--- a/sound/usb/line6/toneport.c
+++ b/sound/usb/line6/toneport.c
@@ -365,16 +365,21 @@ static bool toneport_has_source_select(struct usb_line6_toneport *toneport)
 /*
 	Setup Toneport device.
 */
-static void toneport_setup(struct usb_line6_toneport *toneport)
+static int toneport_setup(struct usb_line6_toneport *toneport)
 {
-	u32 ticks;
+	u32 *ticks;
 	struct usb_line6 *line6 = &toneport->line6;
 	struct usb_device *usbdev = line6->usbdev;
 
+	ticks = kmalloc(sizeof(*ticks), GFP_KERNEL);
+	if (!ticks)
+		return -ENOMEM;
+
 	/* sync time on device with host: */
 	/* note: 32-bit timestamps overflow in year 2106 */
-	ticks = (u32)ktime_get_real_seconds();
-	line6_write_data(line6, 0x80c6, &ticks, 4);
+	*ticks = (u32)ktime_get_real_seconds();
+	line6_write_data(line6, 0x80c6, ticks, 4);
+	kfree(ticks);
 
 	/* enable device: */
 	toneport_send_cmd(usbdev, 0x0301, 0x0000);
@@ -389,6 +394,7 @@ static void toneport_setup(struct usb_line6_toneport *toneport)
 		toneport_update_led(toneport);
 
 	mod_timer(&toneport->timer, jiffies + TONEPORT_PCM_DELAY * HZ);
+	return 0;
 }
 
 /*
@@ -451,7 +457,9 @@ static int toneport_init(struct usb_line6 *line6,
 			return err;
 	}
 
-	toneport_setup(toneport);
+	err = toneport_setup(toneport);
+	if (err)
+		return err;
 
 	/* register audio system: */
 	return snd_card_register(line6->card);
@@ -463,7 +471,11 @@ static int toneport_init(struct usb_line6 *line6,
 */
 static int toneport_reset_resume(struct usb_interface *interface)
 {
-	toneport_setup(usb_get_intfdata(interface));
+	int err;
+
+	err = toneport_setup(usb_get_intfdata(interface));
+	if (err)
+		return err;
 	return line6_resume(interface);
 }
 #endif
-- 
GitLab


From b1da6a51871c6929dced1a7fad81990988b36ed6 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 24 Apr 2019 18:39:57 +0200
Subject: [PATCH 1630/1861] fsnotify: Fix NULL ptr deref in fanotify_get_fsid()

fanotify_get_fsid() is reading mark->connector->fsid under srcu. It can
happen that it sees mark not fully initialized or mark that is already
detached from the object list. In these cases mark->connector
can be NULL leading to NULL ptr dereference. Fix the problem by
being careful when reading mark->connector and check it for being NULL.
Also use WRITE_ONCE when writing the mark just to prevent compiler from
doing something stupid.

Reported-by: syzbot+15927486a4f1bfcbaf91@syzkaller.appspotmail.com
Fixes: 77115225acc6 ("fanotify: cache fsid in fsnotify_mark_connector")
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c | 14 ++++++++++++--
 fs/notify/mark.c              | 12 ++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 6b9c275489971..63c6bb1f8c4da 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -346,10 +346,16 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
 	__kernel_fsid_t fsid = {};
 
 	fsnotify_foreach_obj_type(type) {
+		struct fsnotify_mark_connector *conn;
+
 		if (!fsnotify_iter_should_report_type(iter_info, type))
 			continue;
 
-		fsid = iter_info->marks[type]->connector->fsid;
+		conn = READ_ONCE(iter_info->marks[type]->connector);
+		/* Mark is just getting destroyed or created? */
+		if (!conn)
+			continue;
+		fsid = conn->fsid;
 		if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
 			continue;
 		return fsid;
@@ -408,8 +414,12 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 			return 0;
 	}
 
-	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID))
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
 		fsid = fanotify_get_fsid(iter_info);
+		/* Racing with mark destruction or creation? */
+		if (!fsid.val[0] && !fsid.val[1])
+			return 0;
+	}
 
 	event = fanotify_alloc_event(group, inode, mask, data, data_type,
 				     &fsid);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d593d42695618..22acb0a79b532 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -239,13 +239,13 @@ static void fsnotify_drop_object(unsigned int type, void *objp)
 
 void fsnotify_put_mark(struct fsnotify_mark *mark)
 {
-	struct fsnotify_mark_connector *conn;
+	struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector);
 	void *objp = NULL;
 	unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
 	bool free_conn = false;
 
 	/* Catch marks that were actually never attached to object */
-	if (!mark->connector) {
+	if (!conn) {
 		if (refcount_dec_and_test(&mark->refcnt))
 			fsnotify_final_mark_destroy(mark);
 		return;
@@ -255,10 +255,9 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 	 * We have to be careful so that traversals of obj_list under lock can
 	 * safely grab mark reference.
 	 */
-	if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock))
+	if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock))
 		return;
 
-	conn = mark->connector;
 	hlist_del_init_rcu(&mark->obj_list);
 	if (hlist_empty(&conn->list)) {
 		objp = fsnotify_detach_connector_from_object(conn, &type);
@@ -266,7 +265,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 	} else {
 		__fsnotify_recalc_mask(conn);
 	}
-	mark->connector = NULL;
+	WRITE_ONCE(mark->connector, NULL);
 	spin_unlock(&conn->lock);
 
 	fsnotify_drop_object(type, objp);
@@ -620,7 +619,7 @@ restart:
 	/* mark should be the last entry.  last is the current last entry */
 	hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
 added:
-	mark->connector = conn;
+	WRITE_ONCE(mark->connector, conn);
 out_err:
 	spin_unlock(&conn->lock);
 	spin_unlock(&mark->lock);
@@ -808,6 +807,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
 	refcount_set(&mark->refcnt, 1);
 	fsnotify_get_group(group);
 	mark->group = group;
+	WRITE_ONCE(mark->connector, NULL);
 }
 
 /*
-- 
GitLab


From 37624b58542fb9f2d9a70e6ea006ef8a5f66c30b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Apr 2019 17:04:13 -0700
Subject: [PATCH 1631/1861] Linux 5.1-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index abe13538a8c04..2b99679148dc7 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
-- 
GitLab


From ee948837d7fa89127373c139766aacf6b02a9225 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 28 Apr 2019 21:34:21 -0400
Subject: [PATCH 1632/1861] [fix] get rid of checking for absent device name in
 vfs_get_tree()

It has no business being there, it's checked by relevant ->get_tree()
as it is *and* it returns the wrong error for no reason whatsoever.

Fixes: f3a09c92018a "introduce fs_context methods"
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 583a0124bc394..2739f57515f81 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1467,11 +1467,6 @@ int vfs_get_tree(struct fs_context *fc)
 	struct super_block *sb;
 	int error;
 
-	if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) {
-		errorf(fc, "Filesystem requires source device");
-		return -ENOENT;
-	}
-
 	if (fc->root)
 		return -EBUSY;
 
-- 
GitLab


From f69e749a49353d96af1a293f56b5b56de59c668a Mon Sep 17 00:00:00 2001
From: Alexander Lochmann <alexander.lochmann@tu-dortmund.de>
Date: Fri, 14 Dec 2018 11:55:52 +0100
Subject: [PATCH 1633/1861] Abort file_remove_privs() for non-reg. files

file_remove_privs() might be called for non-regular files, e.g.
blkdev inode. There is no reason to do its job on things
like blkdev inodes, pipes, or cdevs. Hence, abort if
file does not refer to a regular inode.

AV: more to the point, for devices there might be any number of
inodes refering to given device.  Which one to strip the permissions
from, even if that made any sense in the first place?  All of them
will be observed with contents modified, after all.

Found by LockDoc (Alexander Lochmann, Horst Schirmeier and Olaf
Spinczyk)

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Alexander Lochmann <alexander.lochmann@tu-dortmund.de>
Signed-off-by: Horst Schirmeier <horst.schirmeier@tu-dortmund.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index e9d97add2b36c..9a453f3637f85 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1817,8 +1817,13 @@ int file_remove_privs(struct file *file)
 	int kill;
 	int error = 0;
 
-	/* Fast path for nothing security related */
-	if (IS_NOSEC(inode))
+	/*
+	 * Fast path for nothing security related.
+	 * As well for non-regular files, e.g. blkdev inodes.
+	 * For example, blkdev_write_iter() might get here
+	 * trying to remove privs which it is not allowed to.
+	 */
+	if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
 		return 0;
 
 	kill = dentry_needs_remove_privs(dentry);
-- 
GitLab


From d15d356887e770c5f2dcf963b52c7cb510c9e42d Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@redhat.com>
Date: Tue, 23 Apr 2019 00:26:52 +0800
Subject: [PATCH 1634/1861] perf/x86: Make perf callchains work without
 CONFIG_FRAME_POINTER

Currently perf callchain doesn't work well with ORC unwinder
when sampling from trace point. We'll get useless in kernel callchain
like this:

perf  6429 [000]    22.498450:             kmem:mm_page_alloc: page=0x176a17 pfn=1534487 order=0 migratetype=0 gfp_flags=GFP_KERNEL
    ffffffffbe23e32e __alloc_pages_nodemask+0x22e (/lib/modules/5.1.0-rc3+/build/vmlinux)
	7efdf7f7d3e8 __poll+0x18 (/usr/lib64/libc-2.28.so)
	5651468729c1 [unknown] (/usr/bin/perf)
	5651467ee82a main+0x69a (/usr/bin/perf)
	7efdf7eaf413 __libc_start_main+0xf3 (/usr/lib64/libc-2.28.so)
    5541f689495641d7 [unknown] ([unknown])

The root cause is that, for trace point events, it doesn't provide a
real snapshot of the hardware registers. Instead perf tries to get
required caller's registers and compose a fake register snapshot
which suppose to contain enough information for start a unwinding.
However without CONFIG_FRAME_POINTER, if failed to get caller's BP as the
frame pointer, so current frame pointer is returned instead. We get
a invalid register combination which confuse the unwinder, and end the
stacktrace early.

So in such case just don't try dump BP, and let the unwinder start
directly when the register is not a real snapshot. Use SP
as the skip mark, unwinder will skip all the frames until it meet
the frame of the trace point caller.

Tested with frame pointer unwinder and ORC unwinder, this makes perf
callchain get the full kernel space stacktrace again like this:

perf  6503 [000]  1567.570191:             kmem:mm_page_alloc: page=0x16c904 pfn=1493252 order=0 migratetype=0 gfp_flags=GFP_KERNEL
    ffffffffb523e2ae __alloc_pages_nodemask+0x22e (/lib/modules/5.1.0-rc3+/build/vmlinux)
    ffffffffb52383bd __get_free_pages+0xd (/lib/modules/5.1.0-rc3+/build/vmlinux)
    ffffffffb52fd28a __pollwait+0x8a (/lib/modules/5.1.0-rc3+/build/vmlinux)
    ffffffffb521426f perf_poll+0x2f (/lib/modules/5.1.0-rc3+/build/vmlinux)
    ffffffffb52fe3e2 do_sys_poll+0x252 (/lib/modules/5.1.0-rc3+/build/vmlinux)
    ffffffffb52ff027 __x64_sys_poll+0x37 (/lib/modules/5.1.0-rc3+/build/vmlinux)
    ffffffffb500418b do_syscall_64+0x5b (/lib/modules/5.1.0-rc3+/build/vmlinux)
    ffffffffb5a0008c entry_SYSCALL_64_after_hwframe+0x44 (/lib/modules/5.1.0-rc3+/build/vmlinux)
	7f71e92d03e8 __poll+0x18 (/usr/lib64/libc-2.28.so)
	55a22960d9c1 [unknown] (/usr/bin/perf)
	55a22958982a main+0x69a (/usr/bin/perf)
	7f71e9202413 __libc_start_main+0xf3 (/usr/lib64/libc-2.28.so)
    5541f689495641d7 [unknown] ([unknown])

Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Kairui Song <kasong@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Young <dyoung@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190422162652.15483-1-kasong@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c            | 21 +++++++++++++++++----
 arch/x86/include/asm/perf_event.h |  7 +------
 arch/x86/include/asm/stacktrace.h | 13 -------------
 include/linux/perf_event.h        | 14 ++++++++++----
 4 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index de1a924a49145..f315425d8468f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2382,6 +2382,15 @@ void arch_perf_update_userpage(struct perf_event *event,
 	cyc2ns_read_end();
 }
 
+/*
+ * Determine whether the regs were taken from an irq/exception handler rather
+ * than from perf_arch_fetch_caller_regs().
+ */
+static bool perf_hw_regs(struct pt_regs *regs)
+{
+	return regs->flags & X86_EFLAGS_FIXED;
+}
+
 void
 perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
@@ -2393,11 +2402,15 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 		return;
 	}
 
-	if (perf_callchain_store(entry, regs->ip))
-		return;
+	if (perf_hw_regs(regs)) {
+		if (perf_callchain_store(entry, regs->ip))
+			return;
+		unwind_start(&state, current, regs, NULL);
+	} else {
+		unwind_start(&state, current, NULL, (void *)regs->sp);
+	}
 
-	for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
-	     unwind_next_frame(&state)) {
+	for (; !unwind_done(&state); unwind_next_frame(&state)) {
 		addr = unwind_get_return_address(&state);
 		if (!addr || perf_callchain_store(entry, addr))
 			return;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 04768a3a54548..1392d5e6e8d67 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -308,14 +308,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
  */
 #define perf_arch_fetch_caller_regs(regs, __ip)		{	\
 	(regs)->ip = (__ip);					\
-	(regs)->bp = caller_frame_pointer();			\
+	(regs)->sp = (unsigned long)__builtin_frame_address(0);	\
 	(regs)->cs = __KERNEL_CS;				\
 	regs->flags = 0;					\
-	asm volatile(						\
-		_ASM_MOV "%%"_ASM_SP ", %0\n"			\
-		: "=m" ((regs)->sp)				\
-		:: "memory"					\
-	);							\
 }
 
 struct perf_guest_switch_msr {
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f335aad404a47..beef7ad9e43a4 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -98,19 +98,6 @@ struct stack_frame_ia32 {
     u32 return_address;
 };
 
-static inline unsigned long caller_frame_pointer(void)
-{
-	struct stack_frame *frame;
-
-	frame = __builtin_frame_address(0);
-
-#ifdef CONFIG_FRAME_POINTER
-	frame = frame->next_frame;
-#endif
-
-	return (unsigned long)frame;
-}
-
 void show_opcodes(struct pt_regs *regs, const char *loglvl);
 void show_ip(struct pt_regs *regs, const char *loglvl);
 #endif /* _ASM_X86_STACKTRACE_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f3864e1c55695..cf023db0e8a2f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1058,12 +1058,18 @@ static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned lo
 #endif
 
 /*
- * Take a snapshot of the regs. Skip ip and frame pointer to
- * the nth caller. We only need a few of the regs:
+ * When generating a perf sample in-line, instead of from an interrupt /
+ * exception, we lack a pt_regs. This is typically used from software events
+ * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints.
+ *
+ * We typically don't need a full set, but (for x86) do require:
  * - ip for PERF_SAMPLE_IP
  * - cs for user_mode() tests
- * - bp for callchains
- * - eflags, for future purposes, just in case
+ * - sp for PERF_SAMPLE_CALLCHAIN
+ * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs())
+ *
+ * NOTE: assumes @regs is otherwise already 0 filled; this is important for
+ * things like PERF_SAMPLE_REGS_INTR.
  */
 static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 {
-- 
GitLab


From 9b019acb72e4b5741d88e8936d6f200ed44b66b2 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 12 Apr 2019 14:26:13 +1000
Subject: [PATCH 1635/1861] sched/nohz: Run NOHZ idle load balancer on
 HK_FLAG_MISC CPUs

The NOHZ idle balancer runs on the lowest idle CPU. This can
interfere with isolated CPUs, so confine it to HK_FLAG_MISC
housekeeping CPUs.

HK_FLAG_SCHED is not used for this because it is not set anywhere
at the moment. This could be folded into HK_FLAG_SCHED once that
option is fixed.

The problem was observed with increased jitter on an application
running on CPU0, caused by NOHZ idle load balancing being run on
CPU1 (an SMT sibling).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190412042613.28930-1-npiggin@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 13bafe350abf9..7b0da7007da33 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9519,22 +9519,26 @@ static inline int on_null_domain(struct rq *rq)
  * - When one of the busy CPUs notice that there may be an idle rebalancing
  *   needed, they will kick the idle load balancer, which then does idle
  *   load balancing for all the idle CPUs.
+ * - HK_FLAG_MISC CPUs are used for this task, because HK_FLAG_SCHED not set
+ *   anywhere yet.
  */
 
 static inline int find_new_ilb(void)
 {
-	int ilb = cpumask_first(nohz.idle_cpus_mask);
+	int ilb;
 
-	if (ilb < nr_cpu_ids && idle_cpu(ilb))
-		return ilb;
+	for_each_cpu_and(ilb, nohz.idle_cpus_mask,
+			      housekeeping_cpumask(HK_FLAG_MISC)) {
+		if (idle_cpu(ilb))
+			return ilb;
+	}
 
 	return nr_cpu_ids;
 }
 
 /*
- * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
- * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
- * CPU (if there is one).
+ * Kick a CPU to do the nohz balancing, if it is time for it. We pick any
+ * idle CPU in the HK_FLAG_MISC housekeeping set (if there is one).
  */
 static void kick_ilb(unsigned int flags)
 {
-- 
GitLab


From 948f83768a180ec8e85c4a8ff269d5e433d10815 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 2 Apr 2019 18:02:44 +0200
Subject: [PATCH 1636/1861] locking/lockdep: Test all incompatible scenarios at
 once in check_irq_usage()

check_prev_add_irq() tests all incompatible scenarios one after the
other while adding a lock (@next) to a tree dependency (@prev):

	LOCK_USED_IN_HARDIRQ          vs         LOCK_ENABLED_HARDIRQ
	LOCK_USED_IN_HARDIRQ_READ     vs         LOCK_ENABLED_HARDIRQ
	LOCK_USED_IN_SOFTIRQ          vs         LOCK_ENABLED_SOFTIRQ
	LOCK_USED_IN_SOFTIRQ_READ     vs         LOCK_ENABLED_SOFTIRQ

Also for these four scenarios, we must at least iterate the @prev
backward dependency. Then if it matches the relevant LOCK_USED_* bit,
we must also iterate the @next forward dependency.

Therefore in the best case we iterate 4 times, in the worst case 8 times.

A different approach can let us divide the number of branch iterations
by 4:

1) Iterate through @prev backward dependencies and accumulate all the IRQ
   uses in a single mask. In the best case where the current lock hasn't
   been used in IRQ, we stop here.

2) Iterate through @next forward dependencies and try to find a lock
   whose usage is exclusive to the accumulated usages gathered in the
   previous step. If we find one (call it @lockA), we have found an
   incompatible use, otherwise we stop here. Only bad locking scenario
   go further. So a sane verification stop here.

3) Iterate again through @prev backward dependency and find the lock
   whose usage matches @lockA in term of incompatibility. Call that
   lock @lockB.

4) Report the incompatible usages of @lockA and @lockB

If no incompatible use is found, the verification never goes beyond
step 2 which means at most two iterations.

The following compares the execution measurements of the function
check_prev_add_irq():

            Number of  calls   | Avg (ns)  | Stdev (ns) | Total time (ns)
  ------------------------------------------------------------------------
  Mainline         8452        |  2652     |    11962   |    22415143
  This patch       8452        |  1518     |     7090   |    12835602

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190402160244.32434-5-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c           | 228 ++++++++++++++++++++---------
 kernel/locking/lockdep_internals.h |   6 +
 2 files changed, 167 insertions(+), 67 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 5e149dd782989..25ecc6d3058b1 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1676,6 +1676,14 @@ check_redundant(struct lock_list *root, struct lock_class *target,
 }
 
 #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+
+static inline int usage_accumulate(struct lock_list *entry, void *mask)
+{
+	*(unsigned long *)mask |= entry->class->usage_mask;
+
+	return 0;
+}
+
 /*
  * Forwards and backwards subgraph searching, for the purposes of
  * proving that two subgraphs can be connected by a new dependency
@@ -1687,8 +1695,6 @@ static inline int usage_match(struct lock_list *entry, void *mask)
 	return entry->class->usage_mask & *(unsigned long *)mask;
 }
 
-
-
 /*
  * Find a node in the forwards-direction dependency sub-graph starting
  * at @root->class that matches @bit.
@@ -1922,39 +1928,6 @@ print_bad_irq_dependency(struct task_struct *curr,
 	return 0;
 }
 
-static int
-check_usage(struct task_struct *curr, struct held_lock *prev,
-	    struct held_lock *next, enum lock_usage_bit bit_backwards,
-	    enum lock_usage_bit bit_forwards, const char *irqclass)
-{
-	int ret;
-	struct lock_list this, that;
-	struct lock_list *uninitialized_var(target_entry);
-	struct lock_list *uninitialized_var(target_entry1);
-
-	this.parent = NULL;
-
-	this.class = hlock_class(prev);
-	ret = find_usage_backwards(&this, lock_flag(bit_backwards), &target_entry);
-	if (ret < 0)
-		return print_bfs_bug(ret);
-	if (ret == 1)
-		return ret;
-
-	that.parent = NULL;
-	that.class = hlock_class(next);
-	ret = find_usage_forwards(&that, lock_flag(bit_forwards), &target_entry1);
-	if (ret < 0)
-		return print_bfs_bug(ret);
-	if (ret == 1)
-		return ret;
-
-	return print_bad_irq_dependency(curr, &this, &that,
-			target_entry, target_entry1,
-			prev, next,
-			bit_backwards, bit_forwards, irqclass);
-}
-
 static const char *state_names[] = {
 #define LOCKDEP_STATE(__STATE) \
 	__stringify(__STATE),
@@ -1977,6 +1950,13 @@ static inline const char *state_name(enum lock_usage_bit bit)
 		return state_names[bit >> LOCK_USAGE_DIR_MASK];
 }
 
+/*
+ * The bit number is encoded like:
+ *
+ *  bit0: 0 exclusive, 1 read lock
+ *  bit1: 0 used in irq, 1 irq enabled
+ *  bit2-n: state
+ */
 static int exclusive_bit(int new_bit)
 {
 	int state = new_bit & LOCK_USAGE_STATE_MASK;
@@ -1988,45 +1968,160 @@ static int exclusive_bit(int new_bit)
 	return state | (dir ^ LOCK_USAGE_DIR_MASK);
 }
 
+/*
+ * Observe that when given a bitmask where each bitnr is encoded as above, a
+ * right shift of the mask transforms the individual bitnrs as -1 and
+ * conversely, a left shift transforms into +1 for the individual bitnrs.
+ *
+ * So for all bits whose number have LOCK_ENABLED_* set (bitnr1 == 1), we can
+ * create the mask with those bit numbers using LOCK_USED_IN_* (bitnr1 == 0)
+ * instead by subtracting the bit number by 2, or shifting the mask right by 2.
+ *
+ * Similarly, bitnr1 == 0 becomes bitnr1 == 1 by adding 2, or shifting left 2.
+ *
+ * So split the mask (note that LOCKF_ENABLED_IRQ_ALL|LOCKF_USED_IN_IRQ_ALL is
+ * all bits set) and recompose with bitnr1 flipped.
+ */
+static unsigned long invert_dir_mask(unsigned long mask)
+{
+	unsigned long excl = 0;
+
+	/* Invert dir */
+	excl |= (mask & LOCKF_ENABLED_IRQ_ALL) >> LOCK_USAGE_DIR_MASK;
+	excl |= (mask & LOCKF_USED_IN_IRQ_ALL) << LOCK_USAGE_DIR_MASK;
+
+	return excl;
+}
+
+/*
+ * As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all
+ * bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*).
+ * And then mask out all bitnr0.
+ */
+static unsigned long exclusive_mask(unsigned long mask)
+{
+	unsigned long excl = invert_dir_mask(mask);
+
+	/* Strip read */
+	excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK;
+	excl &= ~LOCKF_IRQ_READ;
+
+	return excl;
+}
+
+/*
+ * Retrieve the _possible_ original mask to which @mask is
+ * exclusive. Ie: this is the opposite of exclusive_mask().
+ * Note that 2 possible original bits can match an exclusive
+ * bit: one has LOCK_USAGE_READ_MASK set, the other has it
+ * cleared. So both are returned for each exclusive bit.
+ */
+static unsigned long original_mask(unsigned long mask)
+{
+	unsigned long excl = invert_dir_mask(mask);
+
+	/* Include read in existing usages */
+	excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK;
+
+	return excl;
+}
+
+/*
+ * Find the first pair of bit match between an original
+ * usage mask and an exclusive usage mask.
+ */
+static int find_exclusive_match(unsigned long mask,
+				unsigned long excl_mask,
+				enum lock_usage_bit *bitp,
+				enum lock_usage_bit *excl_bitp)
+{
+	int bit, excl;
+
+	for_each_set_bit(bit, &mask, LOCK_USED) {
+		excl = exclusive_bit(bit);
+		if (excl_mask & lock_flag(excl)) {
+			*bitp = bit;
+			*excl_bitp = excl;
+			return 0;
+		}
+	}
+	return -1;
+}
+
+/*
+ * Prove that the new dependency does not connect a hardirq-safe(-read)
+ * lock with a hardirq-unsafe lock - to achieve this we search
+ * the backwards-subgraph starting at <prev>, and the
+ * forwards-subgraph starting at <next>:
+ */
 static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
-			   struct held_lock *next, enum lock_usage_bit bit)
+			   struct held_lock *next)
 {
+	unsigned long usage_mask = 0, forward_mask, backward_mask;
+	enum lock_usage_bit forward_bit = 0, backward_bit = 0;
+	struct lock_list *uninitialized_var(target_entry1);
+	struct lock_list *uninitialized_var(target_entry);
+	struct lock_list this, that;
+	int ret;
+
 	/*
-	 * Prove that the new dependency does not connect a hardirq-safe
-	 * lock with a hardirq-unsafe lock - to achieve this we search
-	 * the backwards-subgraph starting at <prev>, and the
-	 * forwards-subgraph starting at <next>:
+	 * Step 1: gather all hard/soft IRQs usages backward in an
+	 * accumulated usage mask.
 	 */
-	if (!check_usage(curr, prev, next, bit,
-			   exclusive_bit(bit), state_name(bit)))
-		return 0;
+	this.parent = NULL;
+	this.class = hlock_class(prev);
 
-	bit++; /* _READ */
+	ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
+	if (ret < 0)
+		return print_bfs_bug(ret);
+
+	usage_mask &= LOCKF_USED_IN_IRQ_ALL;
+	if (!usage_mask)
+		return 1;
 
 	/*
-	 * Prove that the new dependency does not connect a hardirq-safe-read
-	 * lock with a hardirq-unsafe lock - to achieve this we search
-	 * the backwards-subgraph starting at <prev>, and the
-	 * forwards-subgraph starting at <next>:
+	 * Step 2: find exclusive uses forward that match the previous
+	 * backward accumulated mask.
 	 */
-	if (!check_usage(curr, prev, next, bit,
-			   exclusive_bit(bit), state_name(bit)))
-		return 0;
+	forward_mask = exclusive_mask(usage_mask);
 
-	return 1;
-}
+	that.parent = NULL;
+	that.class = hlock_class(next);
 
-static int
-check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
-		struct held_lock *next)
-{
-#define LOCKDEP_STATE(__STATE)						\
-	if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE))	\
-		return 0;
-#include "lockdep_states.h"
-#undef LOCKDEP_STATE
+	ret = find_usage_forwards(&that, forward_mask, &target_entry1);
+	if (ret < 0)
+		return print_bfs_bug(ret);
+	if (ret == 1)
+		return ret;
 
-	return 1;
+	/*
+	 * Step 3: we found a bad match! Now retrieve a lock from the backward
+	 * list whose usage mask matches the exclusive usage mask from the
+	 * lock found on the forward list.
+	 */
+	backward_mask = original_mask(target_entry1->class->usage_mask);
+
+	ret = find_usage_backwards(&this, backward_mask, &target_entry);
+	if (ret < 0)
+		return print_bfs_bug(ret);
+	if (DEBUG_LOCKS_WARN_ON(ret == 1))
+		return 1;
+
+	/*
+	 * Step 4: narrow down to a pair of incompatible usage bits
+	 * and report it.
+	 */
+	ret = find_exclusive_match(target_entry->class->usage_mask,
+				   target_entry1->class->usage_mask,
+				   &backward_bit, &forward_bit);
+	if (DEBUG_LOCKS_WARN_ON(ret == -1))
+		return 1;
+
+	return print_bad_irq_dependency(curr, &this, &that,
+			target_entry, target_entry1,
+			prev, next,
+			backward_bit, forward_bit,
+			state_name(backward_bit));
 }
 
 static void inc_chains(void)
@@ -2043,9 +2138,8 @@ static void inc_chains(void)
 
 #else
 
-static inline int
-check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
-		struct held_lock *next)
+static inline int check_irq_usage(struct task_struct *curr,
+				  struct held_lock *prev, struct held_lock *next)
 {
 	return 1;
 }
@@ -2225,7 +2319,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	else if (unlikely(ret < 0))
 		return print_bfs_bug(ret);
 
-	if (!check_prev_add_irq(curr, prev, next))
+	if (!check_irq_usage(curr, prev, next))
 		return 0;
 
 	/*
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 2b3ffd4117ad2..150ec3f0c5b5d 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -66,6 +66,12 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
 	0;
 #undef LOCKDEP_STATE
 
+#define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ)
+#define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ)
+
+#define LOCKF_IRQ (LOCKF_ENABLED_IRQ | LOCKF_USED_IN_IRQ)
+#define LOCKF_IRQ_READ (LOCKF_ENABLED_IRQ_READ | LOCKF_USED_IN_IRQ_READ)
+
 /*
  * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
  * .data and .bss to fit in required 32MB limit for the kernel. With
-- 
GitLab


From ad282a8117d5048398f506f20b092c14b3b3c43f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 29 Mar 2019 17:08:52 -0700
Subject: [PATCH 1637/1861] locking/static_key: Add support for deferred static
 branches

Add deferred static branches.  We can't unfortunately use the
nice trick of encapsulating the entire structure in true/false
variants, because the inside has to be either struct static_key_true
or struct static_key_false.  Use defines to pass the appropriate
members to the helpers separately.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: alexei.starovoitov@gmail.com
Cc: ard.biesheuvel@linaro.org
Cc: oss-drivers@netronome.com
Cc: yamada.masahiro@socionext.com
Link: https://lkml.kernel.org/r/20190330000854.30142-2-jakub.kicinski@netronome.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label_ratelimit.h | 64 ++++++++++++++++++++++++++--
 kernel/jump_label.c                  | 17 +++++---
 2 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
index a49f2b45b3f0f..42710d5949ba3 100644
--- a/include/linux/jump_label_ratelimit.h
+++ b/include/linux/jump_label_ratelimit.h
@@ -12,21 +12,79 @@ struct static_key_deferred {
 	struct delayed_work work;
 };
 
-extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
-extern void static_key_deferred_flush(struct static_key_deferred *key);
+struct static_key_true_deferred {
+	struct static_key_true key;
+	unsigned long timeout;
+	struct delayed_work work;
+};
+
+struct static_key_false_deferred {
+	struct static_key_false key;
+	unsigned long timeout;
+	struct delayed_work work;
+};
+
+#define static_key_slow_dec_deferred(x)					\
+	__static_key_slow_dec_deferred(&(x)->key, &(x)->work, (x)->timeout)
+#define static_branch_slow_dec_deferred(x)				\
+	__static_key_slow_dec_deferred(&(x)->key.key, &(x)->work, (x)->timeout)
+
+#define static_key_deferred_flush(x)					\
+	__static_key_deferred_flush((x), &(x)->work)
+
+extern void
+__static_key_slow_dec_deferred(struct static_key *key,
+			       struct delayed_work *work,
+			       unsigned long timeout);
+extern void __static_key_deferred_flush(void *key, struct delayed_work *work);
 extern void
 jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
 
+extern void jump_label_update_timeout(struct work_struct *work);
+
+#define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl)			\
+	struct static_key_true_deferred name = {			\
+		.key =		{ STATIC_KEY_INIT_TRUE },		\
+		.timeout =	(rl),					\
+		.work =	__DELAYED_WORK_INITIALIZER((name).work,		\
+						   jump_label_update_timeout, \
+						   0),			\
+	}
+
+#define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl)			\
+	struct static_key_false_deferred name = {			\
+		.key =		{ STATIC_KEY_INIT_FALSE },		\
+		.timeout =	(rl),					\
+		.work =	__DELAYED_WORK_INITIALIZER((name).work,		\
+						   jump_label_update_timeout, \
+						   0),			\
+	}
+
+#define static_branch_deferred_inc(x)	static_branch_inc(&(x)->key)
+
 #else	/* !CONFIG_JUMP_LABEL */
 struct static_key_deferred {
 	struct static_key  key;
 };
+struct static_key_true_deferred {
+	struct static_key_true key;
+};
+struct static_key_false_deferred {
+	struct static_key_false key;
+};
+#define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl)	\
+	struct static_key_true_deferred name = { STATIC_KEY_TRUE_INIT }
+#define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl)	\
+	struct static_key_false_deferred name = { STATIC_KEY_FALSE_INIT }
+
+#define static_branch_slow_dec_deferred(x)	static_branch_dec(&(x)->key)
+
 static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
 {
 	STATIC_KEY_CHECK_USE(key);
 	static_key_slow_dec(&key->key);
 }
-static inline void static_key_deferred_flush(struct static_key_deferred *key)
+static inline void static_key_deferred_flush(void *key)
 {
 	STATIC_KEY_CHECK_USE(key);
 }
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index a799b1ac6b2fe..73bbbaddbd9c0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -244,12 +244,13 @@ static void __static_key_slow_dec(struct static_key *key,
 	cpus_read_unlock();
 }
 
-static void jump_label_update_timeout(struct work_struct *work)
+void jump_label_update_timeout(struct work_struct *work)
 {
 	struct static_key_deferred *key =
 		container_of(work, struct static_key_deferred, work.work);
 	__static_key_slow_dec(&key->key, 0, NULL);
 }
+EXPORT_SYMBOL_GPL(jump_label_update_timeout);
 
 void static_key_slow_dec(struct static_key *key)
 {
@@ -264,19 +265,21 @@ void static_key_slow_dec_cpuslocked(struct static_key *key)
 	__static_key_slow_dec_cpuslocked(key, 0, NULL);
 }
 
-void static_key_slow_dec_deferred(struct static_key_deferred *key)
+void __static_key_slow_dec_deferred(struct static_key *key,
+				    struct delayed_work *work,
+				    unsigned long timeout)
 {
 	STATIC_KEY_CHECK_USE(key);
-	__static_key_slow_dec(&key->key, key->timeout, &key->work);
+	__static_key_slow_dec(key, timeout, work);
 }
-EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
+EXPORT_SYMBOL_GPL(__static_key_slow_dec_deferred);
 
-void static_key_deferred_flush(struct static_key_deferred *key)
+void __static_key_deferred_flush(void *key, struct delayed_work *work)
 {
 	STATIC_KEY_CHECK_USE(key);
-	flush_delayed_work(&key->work);
+	flush_delayed_work(work);
 }
-EXPORT_SYMBOL_GPL(static_key_deferred_flush);
+EXPORT_SYMBOL_GPL(__static_key_deferred_flush);
 
 void jump_label_rate_limit(struct static_key_deferred *key,
 		unsigned long rl)
-- 
GitLab


From b92e793bbe4a1c49dbf78d8d526561e7a7dd568a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 29 Mar 2019 17:08:53 -0700
Subject: [PATCH 1638/1861] locking/static_key: Factor out the fast path of
 static_key_slow_dec()

static_key_slow_dec() checks if the atomic enable count is larger
than 1, and if so there decrements it before taking the jump_label_lock.
Move this logic into a helper for reuse in rate limitted keys.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: alexei.starovoitov@gmail.com
Cc: ard.biesheuvel@linaro.org
Cc: oss-drivers@netronome.com
Cc: yamada.masahiro@socionext.com
Link: https://lkml.kernel.org/r/20190330000854.30142-3-jakub.kicinski@netronome.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/jump_label.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 73bbbaddbd9c0..02c3d11264dd2 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -202,13 +202,13 @@ void static_key_disable(struct static_key *key)
 }
 EXPORT_SYMBOL_GPL(static_key_disable);
 
-static void __static_key_slow_dec_cpuslocked(struct static_key *key,
-					   unsigned long rate_limit,
-					   struct delayed_work *work)
+static bool static_key_slow_try_dec(struct static_key *key)
 {
 	int val;
 
-	lockdep_assert_cpus_held();
+	val = atomic_fetch_add_unless(&key->enabled, -1, 1);
+	if (val == 1)
+		return false;
 
 	/*
 	 * The negative count check is valid even when a negative
@@ -217,11 +217,18 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key,
 	 * returns is unbalanced, because all other static_key_slow_inc()
 	 * instances block while the update is in progress.
 	 */
-	val = atomic_fetch_add_unless(&key->enabled, -1, 1);
-	if (val != 1) {
-		WARN(val < 0, "jump label: negative count!\n");
+	WARN(val < 0, "jump label: negative count!\n");
+	return true;
+}
+
+static void __static_key_slow_dec_cpuslocked(struct static_key *key,
+					   unsigned long rate_limit,
+					   struct delayed_work *work)
+{
+	lockdep_assert_cpus_held();
+
+	if (static_key_slow_try_dec(key))
 		return;
-	}
 
 	jump_label_lock();
 	if (atomic_dec_and_test(&key->enabled)) {
-- 
GitLab


From 94b5f312cfb4a66055d9b688dc9ab6b297eb9dcc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 29 Mar 2019 17:08:54 -0700
Subject: [PATCH 1639/1861] locking/static_key: Don't take sleeping locks in
 __static_key_slow_dec_deferred()

Changing jump_label state is protected by jump_label_lock().
Rate limited static_key_slow_dec(), however, will never
directly call jump_label_update(), it will schedule a delayed
work instead.  Therefore it's unnecessary to take both the
cpus_read_lock() and jump_label_lock().

This allows static_key_slow_dec_deferred() to be called
from atomic contexts, like socket destructing in net/tls,
without the need for another indirection.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: alexei.starovoitov@gmail.com
Cc: ard.biesheuvel@linaro.org
Cc: oss-drivers@netronome.com
Cc: yamada.masahiro@socionext.com
Link: https://lkml.kernel.org/r/20190330000854.30142-4-jakub.kicinski@netronome.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/jump_label.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 02c3d11264dd2..de6efdecc70d0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -221,9 +221,7 @@ static bool static_key_slow_try_dec(struct static_key *key)
 	return true;
 }
 
-static void __static_key_slow_dec_cpuslocked(struct static_key *key,
-					   unsigned long rate_limit,
-					   struct delayed_work *work)
+static void __static_key_slow_dec_cpuslocked(struct static_key *key)
 {
 	lockdep_assert_cpus_held();
 
@@ -231,23 +229,15 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key,
 		return;
 
 	jump_label_lock();
-	if (atomic_dec_and_test(&key->enabled)) {
-		if (rate_limit) {
-			atomic_inc(&key->enabled);
-			schedule_delayed_work(work, rate_limit);
-		} else {
-			jump_label_update(key);
-		}
-	}
+	if (atomic_dec_and_test(&key->enabled))
+		jump_label_update(key);
 	jump_label_unlock();
 }
 
-static void __static_key_slow_dec(struct static_key *key,
-				  unsigned long rate_limit,
-				  struct delayed_work *work)
+static void __static_key_slow_dec(struct static_key *key)
 {
 	cpus_read_lock();
-	__static_key_slow_dec_cpuslocked(key, rate_limit, work);
+	__static_key_slow_dec_cpuslocked(key);
 	cpus_read_unlock();
 }
 
@@ -255,21 +245,21 @@ void jump_label_update_timeout(struct work_struct *work)
 {
 	struct static_key_deferred *key =
 		container_of(work, struct static_key_deferred, work.work);
-	__static_key_slow_dec(&key->key, 0, NULL);
+	__static_key_slow_dec(&key->key);
 }
 EXPORT_SYMBOL_GPL(jump_label_update_timeout);
 
 void static_key_slow_dec(struct static_key *key)
 {
 	STATIC_KEY_CHECK_USE(key);
-	__static_key_slow_dec(key, 0, NULL);
+	__static_key_slow_dec(key);
 }
 EXPORT_SYMBOL_GPL(static_key_slow_dec);
 
 void static_key_slow_dec_cpuslocked(struct static_key *key)
 {
 	STATIC_KEY_CHECK_USE(key);
-	__static_key_slow_dec_cpuslocked(key, 0, NULL);
+	__static_key_slow_dec_cpuslocked(key);
 }
 
 void __static_key_slow_dec_deferred(struct static_key *key,
@@ -277,7 +267,11 @@ void __static_key_slow_dec_deferred(struct static_key *key,
 				    unsigned long timeout)
 {
 	STATIC_KEY_CHECK_USE(key);
-	__static_key_slow_dec(key, timeout, work);
+
+	if (static_key_slow_try_dec(key))
+		return;
+
+	schedule_delayed_work(work, timeout);
 }
 EXPORT_SYMBOL_GPL(__static_key_slow_dec_deferred);
 
-- 
GitLab


From 712e9ad0a2bd03e6685e9154985129d3f90efb6f Mon Sep 17 00:00:00 2001
From: Vabhav Sharma <vabhav.sharma@nxp.com>
Date: Fri, 26 Apr 2019 06:55:55 +0000
Subject: [PATCH 1640/1861] cpufreq: qoriq: add support for lx2160a

Enable support of NXP SoC lx2160a to handle the
lx2160a SoC.

Signed-off-by: Tang Yuantian <andy.tang@nxp.com>
Signed-off-by: Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
Signed-off-by: Vabhav Sharma <vabhav.sharma@nxp.com>
Acked-by: Scott Wood <oss@buserror.net>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/qoriq-cpufreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index d308c4de467d1..71b640c8c1a50 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -285,6 +285,7 @@ static const struct of_device_id node_matches[] __initconst = {
 	{ .compatible = "fsl,ls1046a-clockgen", },
 	{ .compatible = "fsl,ls1088a-clockgen", },
 	{ .compatible = "fsl,ls2080a-clockgen", },
+	{ .compatible = "fsl,lx2160a-clockgen", },
 	{ .compatible = "fsl,p4080-clockgen", },
 	{ .compatible = "fsl,qoriq-clockgen-1.0", },
 	{ .compatible = "fsl,qoriq-clockgen-2.0", },
-- 
GitLab


From 75b0f8473fed6941e0ae01da082ec8b3d58adb9b Mon Sep 17 00:00:00 2001
From: dongjian <dongjian@yulong.com>
Date: Sun, 28 Apr 2019 16:54:17 +0800
Subject: [PATCH 1641/1861] cpufreq: centrino: Fix centrino_setpolicy()
 kerneldoc comment

The code is using centrino_target() rather than centrino_setpolicy().

Signed-off-by: dongjian <dongjian@yulong.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/speedstep-centrino.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index a1fb735685dbf..e086b2dd4072d 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -412,7 +412,7 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
 }
 
 /**
- * centrino_setpolicy - set a new CPUFreq policy
+ * centrino_target - set a new CPUFreq policy
  * @policy: new policy
  * @index: index of target frequency
  *
-- 
GitLab


From 6339a3889ad4d0dd930ed7a1e873fb81d3e690f7 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Mon, 1 Apr 2019 12:13:42 +0200
Subject: [PATCH 1642/1861] s390/kexec_file: Fix potential segment overlap in
 ELF loader

When loading an ELF image via kexec_file the segment alignment is ignored
in the calculation for the load address of the next segment. When there are
multiple segments this can lead to segment overlap and thus load failure.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Fixes: 8be018827154 ("s390/kexec_file: Add ELF loader")
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/kexec_elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 5a286b012043b..1d1c77c647d25 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -53,7 +53,7 @@ static int kexec_file_add_elf_kernel(struct kimage *image,
 		if (ret)
 			return ret;
 
-		data->memsz += buf.memsz;
+		data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
 	}
 
 	return 0;
-- 
GitLab


From 729829d775c9a5217abc784b2f16087d79c4eec8 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Mon, 1 Apr 2019 12:48:43 +0200
Subject: [PATCH 1643/1861] s390/kexec_file: Fix detection of text segment in
 ELF loader

To register data for the next kernel (command line, oldmem_base, etc.) the
current kernel needs to find the ELF segment that contains head.S. This is
currently done by checking ifor 'phdr->p_paddr == 0'. This works fine for
the current kernel build but in theory the first few pages could be
skipped. Make the detection more robust by checking if the entry point lies
within the segment.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/kexec_elf.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 1d1c77c647d25..5cf340b778f18 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -19,10 +19,15 @@ static int kexec_file_add_elf_kernel(struct kimage *image,
 	struct kexec_buf buf;
 	const Elf_Ehdr *ehdr;
 	const Elf_Phdr *phdr;
+	Elf_Addr entry;
 	int i, ret;
 
 	ehdr = (Elf_Ehdr *)kernel;
 	buf.image = image;
+	if (image->type == KEXEC_TYPE_CRASH)
+		entry = STARTUP_KDUMP_OFFSET;
+	else
+		entry = ehdr->e_entry;
 
 	phdr = (void *)ehdr + ehdr->e_phoff;
 	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
@@ -35,7 +40,7 @@ static int kexec_file_add_elf_kernel(struct kimage *image,
 		buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
 		buf.memsz = phdr->p_memsz;
 
-		if (phdr->p_paddr == 0) {
+		if (entry - phdr->p_paddr < phdr->p_memsz) {
 			data->kernel_buf = buf.buffer;
 			data->memsz += STARTUP_NORMAL_OFFSET;
 
-- 
GitLab


From 61f3f8fc223524692aaa87cf46d95ca3015e83a9 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Tue, 5 Mar 2019 17:18:13 +0100
Subject: [PATCH 1644/1861] s390/purgatory: Reduce purgatory size

The purgatory is compiled into the vmlinux and keept in memory all the time
during runtime. Thus any section not needed to load the purgatory
unnecessarily bloats up its foot print in file- and memorysize. Reduce the
purgatory size by stripping the unneeded sections from the purgatory.

This reduces the purgatories size by ~33%.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/purgatory/Makefile        | 14 +++++---
 arch/s390/purgatory/purgatory.lds.S | 54 +++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 4 deletions(-)
 create mode 100644 arch/s390/purgatory/purgatory.lds.S

diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index ce6a3f75065bf..175ff6c71735d 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y
 
 purgatory-y := head.o purgatory.o string.o sha256.o mem.o
 
-targets += $(purgatory-y) purgatory.ro kexec-purgatory.c
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro kexec-purgatory.c
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
 $(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
@@ -16,8 +16,6 @@ $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
 $(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib
-LDFLAGS_purgatory.ro += -z nodefaultlib
 KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
 KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
 KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
@@ -25,9 +23,17 @@ KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
 KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
 
-$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T
+$(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE
 		$(call if_changed,ld)
 
+OBJCOPYFLAGS_purgatory.ro := -O elf64-s390
+OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*'
+OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment'
+OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
+$(obj)/purgatory.ro: $(obj)/purgatory FORCE
+		$(call if_changed,objcopy)
+
 quiet_cmd_bin2c = BIN2C   $@
       cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
 
diff --git a/arch/s390/purgatory/purgatory.lds.S b/arch/s390/purgatory/purgatory.lds.S
new file mode 100644
index 0000000000000..482eb4fbcef19
--- /dev/null
+++ b/arch/s390/purgatory/purgatory.lds.S
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+
+ENTRY(purgatory_start)
+
+SECTIONS
+{
+	. = 0;
+	.head.text : {
+		_head = . ;
+		HEAD_TEXT
+		_ehead = . ;
+	}
+	.text :	{
+		_text = .;	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+
+	. = ALIGN(256);
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		. = ALIGN(8);	/* For convenience during zeroing */
+		_ebss = .;
+	}
+	_end = .;
+
+	/* Sections to be discarded */
+	/DISCARD/ : {
+		*(.eh_frame)
+		*(*__ksymtab*)
+		*(___kcrctab*)
+	}
+}
-- 
GitLab


From 4c0f032d496385fa8071e404a1bc33f4abbc2f81 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Tue, 5 Mar 2019 18:08:36 +0100
Subject: [PATCH 1645/1861] s390/purgatory: Omit use of bin2c

Omit use of script/bin2c hack. Directly include into assembler file instead.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/purgatory/Makefile          |  9 +++------
 arch/s390/purgatory/kexec-purgatory.S | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 6 deletions(-)
 create mode 100644 arch/s390/purgatory/kexec-purgatory.S

diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 175ff6c71735d..b20934d919863 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y
 
 purgatory-y := head.o purgatory.o string.o sha256.o mem.o
 
-targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro kexec-purgatory.c
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
 $(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
@@ -34,10 +34,7 @@ OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
 $(obj)/purgatory.ro: $(obj)/purgatory FORCE
 		$(call if_changed,objcopy)
 
-quiet_cmd_bin2c = BIN2C   $@
-      cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
-
-$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
-	$(call if_changed,bin2c)
+$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
+	$(call if_changed_rule,as_o_S)
 
 obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o
diff --git a/arch/s390/purgatory/kexec-purgatory.S b/arch/s390/purgatory/kexec-purgatory.S
new file mode 100644
index 0000000000000..8293753100aea
--- /dev/null
+++ b/arch/s390/purgatory/kexec-purgatory.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+	.section .rodata, "a"
+
+	.align	8
+kexec_purgatory:
+	.globl	kexec_purgatory
+	.incbin	"arch/s390/purgatory/purgatory.ro"
+.Lkexec_purgatroy_end:
+
+	.align	8
+kexec_purgatory_size:
+	.globl	kexec_purgatory_size
+	.quad	.Lkexec_purgatroy_end - kexec_purgatory
-- 
GitLab


From d0d249d75dda1b101624316a52d117be07b8ccff Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Wed, 6 Mar 2019 17:36:26 +0100
Subject: [PATCH 1646/1861] s390/kexec_file: Simplify parmarea access

Access the parmarea in head.S via a struct instead of individual offsets.

While at it make the fields in the parmarea .quads.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/head.S                 | 11 ++++++-----
 arch/s390/include/asm/kexec.h         |  7 ++++---
 arch/s390/include/asm/setup.h         | 10 ++++++++++
 arch/s390/kernel/kexec_elf.c          |  1 +
 arch/s390/kernel/kexec_image.c        |  1 +
 arch/s390/kernel/machine_kexec_file.c | 23 ++++++-----------------
 6 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index ce2cbbc417428..d585c4dbdac7c 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -323,13 +323,14 @@ ENTRY(startup_kdump)
 
 #
 # params at 10400 (setup.h)
+# Must be keept in sync with struct parmarea in setup.h
 #
 	.org	PARMAREA
-	.long	0,0			# IPL_DEVICE
-	.long	0,0			# INITRD_START
-	.long	0,0			# INITRD_SIZE
-	.long	0,0			# OLDMEM_BASE
-	.long	0,0			# OLDMEM_SIZE
+	.quad	0			# IPL_DEVICE
+	.quad	0			# INITRD_START
+	.quad	0			# INITRD_SIZE
+	.quad	0			# OLDMEM_BASE
+	.quad	0			# OLDMEM_SIZE
 
 	.org	COMMAND_LINE
 	.byte	"root=/dev/ram0 ro"
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 825dd0f7f2211..08dc2b74858d5 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -11,6 +11,7 @@
 
 #include <asm/processor.h>
 #include <asm/page.h>
+#include <asm/setup.h>
 /*
  * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
  * I.e. Maximum page that is mapped directly into kernel memory,
@@ -51,11 +52,11 @@ struct s390_load_data {
 	/* Pointer to the kernel buffer. Used to register cmdline etc.. */
 	void *kernel_buf;
 
+	/* Parmarea in the kernel buffer. */
+	struct parmarea *parm;
+
 	/* Total size of loaded segments in memory. Used as an offset. */
 	size_t memsz;
-
-	/* Load address of initrd. Used to register INITRD_START in kernel. */
-	unsigned long initrd_load_addr;
 };
 
 int kexec_file_add_purgatory(struct kimage *image,
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index efda97804aa4a..b603cc09c895c 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -65,6 +65,16 @@
 #define OLDMEM_SIZE	(*(unsigned long *)  (OLDMEM_SIZE_OFFSET))
 #define COMMAND_LINE	((char *)	     (COMMAND_LINE_OFFSET))
 
+struct parmarea {
+	unsigned long ipl_device;			/* 0x10400 */
+	unsigned long initrd_start;			/* 0x10408 */
+	unsigned long initrd_size;			/* 0x10410 */
+	unsigned long oldmem_base;			/* 0x10418 */
+	unsigned long oldmem_size;			/* 0x10420 */
+	char pad1[0x10480 - 0x10428];			/* 0x10428 - 0x10480 */
+	char command_line[ARCH_COMMAND_LINE_SIZE];	/* 0x10480 */
+};
+
 extern int noexec_disabled;
 extern int memory_end_set;
 extern unsigned long memory_end;
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 5cf340b778f18..1cdf90767cba5 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -42,6 +42,7 @@ static int kexec_file_add_elf_kernel(struct kimage *image,
 
 		if (entry - phdr->p_paddr < phdr->p_memsz) {
 			data->kernel_buf = buf.buffer;
+			data->parm = buf.buffer + PARMAREA;
 			data->memsz += STARTUP_NORMAL_OFFSET;
 
 			buf.buffer += STARTUP_NORMAL_OFFSET;
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index 3800852595e8a..d9025adc2bbbc 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -32,6 +32,7 @@ static int kexec_file_add_image_kernel(struct kimage *image,
 	ret = kexec_add_buffer(&buf);
 
 	data->kernel_buf = kernel;
+	data->parm = (void *)kernel + PARMAREA;
 	data->memsz += buf.memsz + STARTUP_NORMAL_OFFSET;
 
 	return ret;
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 32023b4f9dc00..8a85ecd8428cd 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -25,24 +25,12 @@ int *kexec_file_update_kernel(struct kimage *image,
 	if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE)
 		return ERR_PTR(-EINVAL);
 
-	if (image->cmdline_buf_len)
-		memcpy(data->kernel_buf + COMMAND_LINE_OFFSET,
-		       image->cmdline_buf, image->cmdline_buf_len);
+	memcpy(data->parm->command_line, image->cmdline_buf,
+	       image->cmdline_buf_len);
 
 	if (image->type == KEXEC_TYPE_CRASH) {
-		loc = (unsigned long *)(data->kernel_buf + OLDMEM_BASE_OFFSET);
-		*loc = crashk_res.start;
-
-		loc = (unsigned long *)(data->kernel_buf + OLDMEM_SIZE_OFFSET);
-		*loc = crashk_res.end - crashk_res.start + 1;
-	}
-
-	if (image->initrd_buf) {
-		loc = (unsigned long *)(data->kernel_buf + INITRD_START_OFFSET);
-		*loc = data->initrd_load_addr;
-
-		loc = (unsigned long *)(data->kernel_buf + INITRD_SIZE_OFFSET);
-		*loc = image->initrd_buf_len;
+		data->parm->oldmem_base = crashk_res.start;
+		data->parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
 	}
 
 	return NULL;
@@ -127,7 +115,8 @@ int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
 		buf.mem += crashk_res.start;
 	buf.memsz = buf.bufsz;
 
-	data->initrd_load_addr = buf.mem;
+	data->parm->initrd_start = buf.mem;
+	data->parm->initrd_size = buf.memsz;
 	data->memsz += buf.memsz;
 
 	ret = kexec_add_buffer(&buf);
-- 
GitLab


From 8e4964261374aaec9f4a83de076ceb11c8cdc044 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Thu, 7 Mar 2019 12:48:03 +0100
Subject: [PATCH 1647/1861] s390/kexec_file: Unify loader code

s390_image_load and s390_elf_load have the same code to load the different
components. Combine this functionality in one shared function.

While at it move kexec_file_update_kernel into the new function as well.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/kexec.h         | 10 ++---
 arch/s390/kernel/kexec_elf.c          | 31 +++----------
 arch/s390/kernel/kexec_image.c        | 37 ++++------------
 arch/s390/kernel/machine_kexec_file.c | 64 +++++++++++++++++----------
 4 files changed, 58 insertions(+), 84 deletions(-)

diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 08dc2b74858d5..a38a57ec6d8f1 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -59,13 +59,9 @@ struct s390_load_data {
 	size_t memsz;
 };
 
-int kexec_file_add_purgatory(struct kimage *image,
-			     struct s390_load_data *data);
-int kexec_file_add_initrd(struct kimage *image,
-			  struct s390_load_data *data,
-			  char *initrd, unsigned long initrd_len);
-int *kexec_file_update_kernel(struct kimage *iamge,
-			      struct s390_load_data *data);
+void *kexec_file_add_components(struct kimage *image,
+				int (*add_kernel)(struct kimage *image,
+						  struct s390_load_data *data));
 
 extern const struct kexec_file_ops s390_kexec_image_ops;
 extern const struct kexec_file_ops s390_kexec_elf_ops;
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 1cdf90767cba5..c74ff6b54344f 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -12,16 +12,17 @@
 #include <linux/kexec.h>
 #include <asm/setup.h>
 
-static int kexec_file_add_elf_kernel(struct kimage *image,
-				     struct s390_load_data *data,
-				     char *kernel, unsigned long kernel_len)
+static int kexec_file_add_kernel_elf(struct kimage *image,
+				     struct s390_load_data *data)
 {
 	struct kexec_buf buf;
 	const Elf_Ehdr *ehdr;
 	const Elf_Phdr *phdr;
 	Elf_Addr entry;
+	void *kernel;
 	int i, ret;
 
+	kernel = image->kernel_buf;
 	ehdr = (Elf_Ehdr *)kernel;
 	buf.image = image;
 	if (image->type == KEXEC_TYPE_CRASH)
@@ -62,7 +63,7 @@ static int kexec_file_add_elf_kernel(struct kimage *image,
 		data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
 	}
 
-	return 0;
+	return data->memsz ? 0 : -EINVAL;
 }
 
 static void *s390_elf_load(struct kimage *image,
@@ -70,11 +71,10 @@ static void *s390_elf_load(struct kimage *image,
 			   char *initrd, unsigned long initrd_len,
 			   char *cmdline, unsigned long cmdline_len)
 {
-	struct s390_load_data data = {0};
 	const Elf_Ehdr *ehdr;
 	const Elf_Phdr *phdr;
 	size_t size;
-	int i, ret;
+	int i;
 
 	/* image->fobs->probe already checked for valid ELF magic number. */
 	ehdr = (Elf_Ehdr *)kernel;
@@ -107,24 +107,7 @@ static void *s390_elf_load(struct kimage *image,
 	if (size > kernel_len)
 		return ERR_PTR(-EINVAL);
 
-	ret = kexec_file_add_elf_kernel(image, &data, kernel, kernel_len);
-	if (ret)
-		return ERR_PTR(ret);
-
-	if (!data.memsz)
-		return ERR_PTR(-EINVAL);
-
-	if (initrd) {
-		ret = kexec_file_add_initrd(image, &data, initrd, initrd_len);
-		if (ret)
-			return ERR_PTR(ret);
-	}
-
-	ret = kexec_file_add_purgatory(image, &data);
-	if (ret)
-		return ERR_PTR(ret);
-
-	return kexec_file_update_kernel(image, &data);
+	return kexec_file_add_components(image, kexec_file_add_kernel_elf);
 }
 
 static int s390_elf_probe(const char *buf, unsigned long len)
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index d9025adc2bbbc..d7e65eeae22f9 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -12,30 +12,26 @@
 #include <linux/kexec.h>
 #include <asm/setup.h>
 
-static int kexec_file_add_image_kernel(struct kimage *image,
-				       struct s390_load_data *data,
-				       char *kernel, unsigned long kernel_len)
+static int kexec_file_add_kernel_image(struct kimage *image,
+				       struct s390_load_data *data)
 {
 	struct kexec_buf buf;
-	int ret;
 
 	buf.image = image;
 
-	buf.buffer = kernel + STARTUP_NORMAL_OFFSET;
-	buf.bufsz = kernel_len - STARTUP_NORMAL_OFFSET;
+	buf.buffer = image->kernel_buf + STARTUP_NORMAL_OFFSET;
+	buf.bufsz = image->kernel_buf_len - STARTUP_NORMAL_OFFSET;
 
 	buf.mem = STARTUP_NORMAL_OFFSET;
 	if (image->type == KEXEC_TYPE_CRASH)
 		buf.mem += crashk_res.start;
 	buf.memsz = buf.bufsz;
 
-	ret = kexec_add_buffer(&buf);
-
-	data->kernel_buf = kernel;
-	data->parm = (void *)kernel + PARMAREA;
+	data->kernel_buf = image->kernel_buf;
+	data->parm = image->kernel_buf + PARMAREA;
 	data->memsz += buf.memsz + STARTUP_NORMAL_OFFSET;
 
-	return ret;
+	return kexec_add_buffer(&buf);
 }
 
 static void *s390_image_load(struct kimage *image,
@@ -43,24 +39,7 @@ static void *s390_image_load(struct kimage *image,
 			     char *initrd, unsigned long initrd_len,
 			     char *cmdline, unsigned long cmdline_len)
 {
-	struct s390_load_data data = {0};
-	int ret;
-
-	ret = kexec_file_add_image_kernel(image, &data, kernel, kernel_len);
-	if (ret)
-		return ERR_PTR(ret);
-
-	if (initrd) {
-		ret = kexec_file_add_initrd(image, &data, initrd, initrd_len);
-		if (ret)
-			return ERR_PTR(ret);
-	}
-
-	ret = kexec_file_add_purgatory(image, &data);
-	if (ret)
-		return ERR_PTR(ret);
-
-	return kexec_file_update_kernel(image, &data);
+	return kexec_file_add_components(image, kexec_file_add_kernel_image);
 }
 
 static int s390_image_probe(const char *buf, unsigned long len)
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 8a85ecd8428cd..08409d61aecaa 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -17,25 +17,6 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
 	NULL,
 };
 
-int *kexec_file_update_kernel(struct kimage *image,
-			      struct s390_load_data *data)
-{
-	unsigned long *loc;
-
-	if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE)
-		return ERR_PTR(-EINVAL);
-
-	memcpy(data->parm->command_line, image->cmdline_buf,
-	       image->cmdline_buf_len);
-
-	if (image->type == KEXEC_TYPE_CRASH) {
-		data->parm->oldmem_base = crashk_res.start;
-		data->parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
-	}
-
-	return NULL;
-}
-
 static int kexec_file_update_purgatory(struct kimage *image)
 {
 	u64 entry, type;
@@ -78,7 +59,8 @@ static int kexec_file_update_purgatory(struct kimage *image)
 	return ret;
 }
 
-int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data)
+static int kexec_file_add_purgatory(struct kimage *image,
+				    struct s390_load_data *data)
 {
 	struct kexec_buf buf;
 	int ret;
@@ -98,16 +80,16 @@ int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data)
 	return ret;
 }
 
-int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
-			  char *initrd, unsigned long initrd_len)
+static int kexec_file_add_initrd(struct kimage *image,
+				 struct s390_load_data *data)
 {
 	struct kexec_buf buf;
 	int ret;
 
 	buf.image = image;
 
-	buf.buffer = initrd;
-	buf.bufsz = initrd_len;
+	buf.buffer = image->initrd_buf;
+	buf.bufsz = image->initrd_buf_len;
 
 	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
 	buf.mem = data->memsz;
@@ -123,6 +105,40 @@ int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
 	return ret;
 }
 
+void *kexec_file_add_components(struct kimage *image,
+				int (*add_kernel)(struct kimage *image,
+						  struct s390_load_data *data))
+{
+	struct s390_load_data data = {0};
+	int ret;
+
+	ret = add_kernel(image, &data);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE)
+		return ERR_PTR(-EINVAL);
+	memcpy(data.parm->command_line, image->cmdline_buf,
+	       image->cmdline_buf_len);
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		data.parm->oldmem_base = crashk_res.start;
+		data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
+	}
+
+	if (image->initrd_buf) {
+		ret = kexec_file_add_initrd(image, &data);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	ret = kexec_file_add_purgatory(image, &data);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return NULL;
+}
+
 int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
 				     Elf_Shdr *section,
 				     const Elf_Shdr *relsec,
-- 
GitLab


From 653beba24d4cd281b078eab48c9bce956939061c Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Thu, 7 Mar 2019 15:56:34 +0100
Subject: [PATCH 1648/1861] s390/kexec_file: Load new kernel to absolute 0

The leading 64 kB of a kernel image doesn't contain any data needed to boot
the new kernel when it was loaded via kexec_file. Thus kexec_file currently
strips them off before loading the image. Keep the leading 64 kB in order
to be able to pass a ipl_report to the next kernel.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/kexec.h         |  6 ++++++
 arch/s390/kernel/kexec_elf.c          | 16 ++++------------
 arch/s390/kernel/kexec_image.c        |  9 +++++----
 arch/s390/kernel/machine_kexec_file.c | 12 ++++++++++--
 arch/s390/kernel/relocate_kernel.S    |  3 +++
 5 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index a38a57ec6d8f1..9ec077b0fb4d8 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -43,6 +43,9 @@
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_S390
 
+/* Allow kexec_file to load a segment to 0 */
+#define KEXEC_BUF_MEM_UNKNOWN -1
+
 /* Provide a dummy definition to avoid build failures. */
 static inline void crash_setup_regs(struct pt_regs *newregs,
 					struct pt_regs *oldregs) { }
@@ -52,6 +55,9 @@ struct s390_load_data {
 	/* Pointer to the kernel buffer. Used to register cmdline etc.. */
 	void *kernel_buf;
 
+	/* Load address of the kernel_buf. */
+	unsigned long kernel_mem;
+
 	/* Parmarea in the kernel buffer. */
 	struct parmarea *parm;
 
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index c74ff6b54344f..42bcd93f4318b 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -39,28 +39,20 @@ static int kexec_file_add_kernel_elf(struct kimage *image,
 		buf.bufsz = phdr->p_filesz;
 
 		buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+		if (image->type == KEXEC_TYPE_CRASH)
+			buf.mem += crashk_res.start;
 		buf.memsz = phdr->p_memsz;
+		data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
 
 		if (entry - phdr->p_paddr < phdr->p_memsz) {
 			data->kernel_buf = buf.buffer;
+			data->kernel_mem = buf.mem;
 			data->parm = buf.buffer + PARMAREA;
-			data->memsz += STARTUP_NORMAL_OFFSET;
-
-			buf.buffer += STARTUP_NORMAL_OFFSET;
-			buf.bufsz -= STARTUP_NORMAL_OFFSET;
-
-			buf.mem += STARTUP_NORMAL_OFFSET;
-			buf.memsz -= STARTUP_NORMAL_OFFSET;
 		}
 
-		if (image->type == KEXEC_TYPE_CRASH)
-			buf.mem += crashk_res.start;
-
 		ret = kexec_add_buffer(&buf);
 		if (ret)
 			return ret;
-
-		data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
 	}
 
 	return data->memsz ? 0 : -EINVAL;
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index d7e65eeae22f9..7281540605b70 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -19,17 +19,18 @@ static int kexec_file_add_kernel_image(struct kimage *image,
 
 	buf.image = image;
 
-	buf.buffer = image->kernel_buf + STARTUP_NORMAL_OFFSET;
-	buf.bufsz = image->kernel_buf_len - STARTUP_NORMAL_OFFSET;
+	buf.buffer = image->kernel_buf;
+	buf.bufsz = image->kernel_buf_len;
 
-	buf.mem = STARTUP_NORMAL_OFFSET;
+	buf.mem = 0;
 	if (image->type == KEXEC_TYPE_CRASH)
 		buf.mem += crashk_res.start;
 	buf.memsz = buf.bufsz;
 
 	data->kernel_buf = image->kernel_buf;
+	data->kernel_mem = buf.mem;
 	data->parm = image->kernel_buf + PARMAREA;
-	data->memsz += buf.memsz + STARTUP_NORMAL_OFFSET;
+	data->memsz += buf.memsz;
 
 	return kexec_add_buffer(&buf);
 }
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 08409d61aecaa..0e2a5a7a1b7c4 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -17,7 +17,8 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
 	NULL,
 };
 
-static int kexec_file_update_purgatory(struct kimage *image)
+static int kexec_file_update_purgatory(struct kimage *image,
+				       struct s390_load_data *data)
 {
 	u64 entry, type;
 	int ret;
@@ -76,7 +77,7 @@ static int kexec_file_add_purgatory(struct kimage *image,
 	if (ret)
 		return ret;
 
-	ret = kexec_file_update_purgatory(image);
+	ret = kexec_file_update_purgatory(image, data);
 	return ret;
 }
 
@@ -136,6 +137,13 @@ void *kexec_file_add_components(struct kimage *image,
 	if (ret)
 		return ERR_PTR(ret);
 
+	if (data.kernel_mem == 0) {
+		unsigned long restart_psw =  0x0008000080000000UL;
+		restart_psw += image->start;
+		memcpy(data.kernel_buf, &restart_psw, sizeof(restart_psw));
+		image->start = 0;
+	}
+
 	return NULL;
 }
 
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index c97c2d40fe15a..1b56f087ce2ce 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -58,10 +58,13 @@ ENTRY(relocate_kernel)
 		j	.base
 	.done:
 		sgr	%r0,%r0		# clear register r0
+		cghi	%r3,0
+		je	.diag
 		la	%r4,load_psw-.base(%r13)	# load psw-address into the register
 		o	%r3,4(%r4)	# or load address into psw
 		st	%r3,4(%r4)
 		mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
+	.diag:
 		diag	%r0,%r0,0x308
 
 		.align	8
-- 
GitLab


From e23a8020ce4e094e10d717d39a8ce799243bf8c1 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Tue, 26 Feb 2019 10:50:39 +0100
Subject: [PATCH 1649/1861] s390/kexec_file: Signature verification prototype

Add kernel signature verification to kexec_file. The verification is based
on module signature verification and works with kernel images signed via
scripts/sign-file.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig                       | 11 ++++
 arch/s390/configs/debug_defconfig       |  1 +
 arch/s390/configs/performance_defconfig |  1 +
 arch/s390/defconfig                     |  1 +
 arch/s390/include/asm/kexec.h           |  1 +
 arch/s390/kernel/kexec_elf.c            |  3 +
 arch/s390/kernel/kexec_image.c          |  3 +
 arch/s390/kernel/machine_kexec_file.c   | 74 +++++++++++++++++++++++++
 8 files changed, 95 insertions(+)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1c3fcf19c3af5..21e851b0a989b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -553,6 +553,17 @@ config ARCH_HAS_KEXEC_PURGATORY
 	def_bool y
 	depends on KEXEC_FILE
 
+config KEXEC_VERIFY_SIG
+	bool "Verify kernel signature during kexec_file_load() syscall"
+	depends on KEXEC_FILE && SYSTEM_DATA_VERIFICATION
+	help
+	  This option makes kernel signature verification mandatory for
+	  the kexec_file_load() syscall.
+
+	  In addition to that option, you need to enable signature
+	  verification for the corresponding kernel image type being
+	  loaded in order for this to work.
+
 config ARCH_RANDOM
 	def_bool y
 	prompt "s390 architectural random number generation API"
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 9824c7bad9d48..b0920b35f87b8 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -64,6 +64,7 @@ CONFIG_NUMA=y
 CONFIG_PREEMPT=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
 CONFIG_EXPOLINE=y
 CONFIG_EXPOLINE_AUTO=y
 CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 4fcbe5792744b..09aa5cb148737 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -65,6 +65,7 @@ CONFIG_NR_CPUS=512
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
 CONFIG_EXPOLINE=y
 CONFIG_EXPOLINE_AUTO=y
 CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 4d58a92b5d979..c59b922cb6c56 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -39,6 +39,7 @@ CONFIG_NR_CPUS=256
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
 CONFIG_CRASH_DUMP=y
 CONFIG_HIBERNATION=y
 CONFIG_PM_DEBUG=y
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 9ec077b0fb4d8..59026899e7669 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -65,6 +65,7 @@ struct s390_load_data {
 	size_t memsz;
 };
 
+int s390_verify_sig(const char *kernel, unsigned long kernel_len);
 void *kexec_file_add_components(struct kimage *image,
 				int (*add_kernel)(struct kimage *image,
 						  struct s390_load_data *data));
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 42bcd93f4318b..01e45d89b636e 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -125,4 +125,7 @@ static int s390_elf_probe(const char *buf, unsigned long len)
 const struct kexec_file_ops s390_kexec_elf_ops = {
 	.probe = s390_elf_probe,
 	.load = s390_elf_load,
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+	.verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
 };
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index 7281540605b70..c378bbac5b35a 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -54,4 +54,7 @@ static int s390_image_probe(const char *buf, unsigned long len)
 const struct kexec_file_ops s390_kexec_image_ops = {
 	.probe = s390_image_probe,
 	.load = s390_image_load,
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+	.verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
 };
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 0e2a5a7a1b7c4..e0f6e618e1bfc 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -8,7 +8,11 @@
  */
 
 #include <linux/elf.h>
+#include <linux/errno.h>
 #include <linux/kexec.h>
+#include <linux/module.h>
+#include <linux/verification.h>
+#include <asm/ipl.h>
 #include <asm/setup.h>
 
 const struct kexec_file_ops * const kexec_file_loaders[] = {
@@ -17,6 +21,76 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
 	NULL,
 };
 
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ *	- Signer's name
+ *	- Key identifier
+ *	- Signature data
+ *	- Information block
+ */
+struct module_signature {
+	u8	algo;		/* Public-key crypto algorithm [0] */
+	u8	hash;		/* Digest algorithm [0] */
+	u8	id_type;	/* Key identifier type [PKEY_ID_PKCS7] */
+	u8	signer_len;	/* Length of signer's name [0] */
+	u8	key_id_len;	/* Length of key identifier [0] */
+	u8	__pad[3];
+	__be32	sig_len;	/* Length of signature data */
+};
+
+#define PKEY_ID_PKCS7 2
+
+int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+	const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+	struct module_signature *ms;
+	unsigned long sig_len;
+
+	/* Skip signature verification when not secure IPLed. */
+	if (!ipl_secure_flag)
+		return 0;
+
+	if (marker_len > kernel_len)
+		return -EKEYREJECTED;
+
+	if (memcmp(kernel + kernel_len - marker_len, MODULE_SIG_STRING,
+		   marker_len))
+		return -EKEYREJECTED;
+	kernel_len -= marker_len;
+
+	ms = (void *)kernel + kernel_len - sizeof(*ms);
+	kernel_len -= sizeof(*ms);
+
+	sig_len = be32_to_cpu(ms->sig_len);
+	if (sig_len >= kernel_len)
+		return -EKEYREJECTED;
+	kernel_len -= sig_len;
+
+	if (ms->id_type != PKEY_ID_PKCS7)
+		return -EKEYREJECTED;
+
+	if (ms->algo != 0 ||
+	    ms->hash != 0 ||
+	    ms->signer_len != 0 ||
+	    ms->key_id_len != 0 ||
+	    ms->__pad[0] != 0 ||
+	    ms->__pad[1] != 0 ||
+	    ms->__pad[2] != 0) {
+		return -EBADMSG;
+	}
+
+	return verify_pkcs7_signature(kernel, kernel_len,
+				      kernel + kernel_len, sig_len,
+				      VERIFY_USE_PLATFORM_KEYRING,
+				      VERIFYING_MODULE_SIGNATURE,
+				      NULL, NULL);
+}
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
+
 static int kexec_file_update_purgatory(struct kimage *image,
 				       struct s390_load_data *data)
 {
-- 
GitLab


From 99feaa717e558cf4f2ad0faf53acac3cf9cc7438 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Mon, 18 Mar 2019 12:53:47 +0100
Subject: [PATCH 1650/1861] s390/kexec_file: Create ipl report and pass to next
 kernel

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ipl.h           |  2 +
 arch/s390/include/asm/kexec.h         |  2 +
 arch/s390/kernel/kexec_elf.c          |  5 ++
 arch/s390/kernel/kexec_image.c        |  5 ++
 arch/s390/kernel/machine_kexec_file.c | 81 ++++++++++++++++++++++++---
 5 files changed, 86 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index bf62af49af063..420d39ebdc46b 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -36,6 +36,8 @@ struct ipl_parameter_block {
 
 #define IPL_MAX_SUPPORTED_VERSION (0)
 
+#define IPL_RB_CERT_UNKNOWN ((unsigned short)-1)
+
 #define DIAG308_VMPARM_SIZE (64)
 #define DIAG308_SCPDATA_OFFSET offsetof(struct ipl_parameter_block, \
 					fcp.scp_data)
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 59026899e7669..305d3465574f1 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -63,6 +63,8 @@ struct s390_load_data {
 
 	/* Total size of loaded segments in memory. Used as an offset. */
 	size_t memsz;
+
+	struct ipl_report *report;
 };
 
 int s390_verify_sig(const char *kernel, unsigned long kernel_len);
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 01e45d89b636e..6d0635ceddd04 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
+#include <asm/ipl.h>
 #include <asm/setup.h>
 
 static int kexec_file_add_kernel_elf(struct kimage *image,
@@ -50,6 +51,10 @@ static int kexec_file_add_kernel_elf(struct kimage *image,
 			data->parm = buf.buffer + PARMAREA;
 		}
 
+		ipl_report_add_component(data->report, &buf,
+					 IPL_RB_COMPONENT_FLAG_SIGNED |
+					 IPL_RB_COMPONENT_FLAG_VERIFIED,
+					 IPL_RB_CERT_UNKNOWN);
 		ret = kexec_add_buffer(&buf);
 		if (ret)
 			return ret;
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index c378bbac5b35a..58318bf89fd90 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
+#include <asm/ipl.h>
 #include <asm/setup.h>
 
 static int kexec_file_add_kernel_image(struct kimage *image,
@@ -32,6 +33,10 @@ static int kexec_file_add_kernel_image(struct kimage *image,
 	data->parm = image->kernel_buf + PARMAREA;
 	data->memsz += buf.memsz;
 
+	ipl_report_add_component(data->report, &buf,
+				 IPL_RB_COMPONENT_FLAG_SIGNED |
+				 IPL_RB_COMPONENT_FLAG_VERIFIED,
+				 IPL_RB_CERT_UNKNOWN);
 	return kexec_add_buffer(&buf);
 }
 
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index e0f6e618e1bfc..48cab9600ed91 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -12,6 +12,7 @@
 #include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/verification.h>
+#include <asm/boot_data.h>
 #include <asm/ipl.h>
 #include <asm/setup.h>
 
@@ -150,9 +151,9 @@ static int kexec_file_add_purgatory(struct kimage *image,
 	ret = kexec_load_purgatory(image, &buf);
 	if (ret)
 		return ret;
+	data->memsz += buf.memsz;
 
-	ret = kexec_file_update_purgatory(image, data);
-	return ret;
+	return kexec_file_update_purgatory(image, data);
 }
 
 static int kexec_file_add_initrd(struct kimage *image,
@@ -177,7 +178,60 @@ static int kexec_file_add_initrd(struct kimage *image,
 	data->memsz += buf.memsz;
 
 	ret = kexec_add_buffer(&buf);
-	return ret;
+	if (ret)
+		return ret;
+
+	return ipl_report_add_component(data->report, &buf, 0, 0);
+}
+
+static int kexec_file_add_ipl_report(struct kimage *image,
+				     struct s390_load_data *data)
+{
+	__u32 *lc_ipl_parmblock_ptr;
+	unsigned int len, ncerts;
+	struct kexec_buf buf;
+	unsigned long addr;
+	void *ptr, *end;
+
+	buf.image = image;
+
+	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+	buf.mem = data->memsz;
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+
+	ptr = (void *)ipl_cert_list_addr;
+	end = ptr + ipl_cert_list_size;
+	ncerts = 0;
+	while (ptr < end) {
+		ncerts++;
+		len = *(unsigned int *)ptr;
+		ptr += sizeof(len);
+		ptr += len;
+	}
+
+	addr = data->memsz + data->report->size;
+	addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
+	ptr = (void *)ipl_cert_list_addr;
+	while (ptr < end) {
+		len = *(unsigned int *)ptr;
+		ptr += sizeof(len);
+		ipl_report_add_certificate(data->report, ptr, addr, len);
+		addr += len;
+		ptr += len;
+	}
+
+	buf.buffer = ipl_report_finish(data->report);
+	buf.bufsz = data->report->size;
+	buf.memsz = buf.bufsz;
+
+	data->memsz += buf.memsz;
+
+	lc_ipl_parmblock_ptr =
+		data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
+	*lc_ipl_parmblock_ptr = (__u32)buf.mem;
+
+	return kexec_add_buffer(&buf);
 }
 
 void *kexec_file_add_components(struct kimage *image,
@@ -187,12 +241,18 @@ void *kexec_file_add_components(struct kimage *image,
 	struct s390_load_data data = {0};
 	int ret;
 
+	data.report = ipl_report_init(&ipl_block);
+	if (IS_ERR(data.report))
+		return data.report;
+
 	ret = add_kernel(image, &data);
 	if (ret)
-		return ERR_PTR(ret);
+		goto out;
 
-	if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE)
-		return ERR_PTR(-EINVAL);
+	if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE) {
+		ret = -EINVAL;
+		goto out;
+	}
 	memcpy(data.parm->command_line, image->cmdline_buf,
 	       image->cmdline_buf_len);
 
@@ -204,12 +264,12 @@ void *kexec_file_add_components(struct kimage *image,
 	if (image->initrd_buf) {
 		ret = kexec_file_add_initrd(image, &data);
 		if (ret)
-			return ERR_PTR(ret);
+			goto out;
 	}
 
 	ret = kexec_file_add_purgatory(image, &data);
 	if (ret)
-		return ERR_PTR(ret);
+		goto out;
 
 	if (data.kernel_mem == 0) {
 		unsigned long restart_psw =  0x0008000080000000UL;
@@ -218,7 +278,10 @@ void *kexec_file_add_components(struct kimage *image,
 		image->start = 0;
 	}
 
-	return NULL;
+	ret = kexec_file_add_ipl_report(image, &data);
+out:
+	ipl_report_free(data.report);
+	return ERR_PTR(ret);
 }
 
 int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
-- 
GitLab


From 268a78404973594d1a7ec3a2b6a2474e0543a435 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Tue, 26 Mar 2019 15:45:53 +0100
Subject: [PATCH 1651/1861] s390/kexec_file: Disable kexec_load when IPLed
 secure

A kernel loaded via kexec_load cannot be verified. Thus disable kexec_load
systemcall in kernels which where IPLed securely. Use the IMA mechanism to
do so.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/Makefile   |  2 ++
 arch/s390/kernel/ima_arch.c | 14 ++++++++++++++
 include/linux/ima.h         |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 arch/s390/kernel/ima_arch.c

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 1222db6d4ee90..d28acd7ba81e9 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -77,6 +77,8 @@ obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 obj-$(CONFIG_KEXEC_FILE)	+= machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_KEXEC_FILE)	+= kexec_elf.o
 
+obj-$(CONFIG_IMA)		+= ima_arch.o
+
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf_common.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf.o perf_cpum_sf.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o perf_regs.o
diff --git a/arch/s390/kernel/ima_arch.c b/arch/s390/kernel/ima_arch.c
new file mode 100644
index 0000000000000..f3c3e6e1c5d38
--- /dev/null
+++ b/arch/s390/kernel/ima_arch.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ima.h>
+#include <asm/boot_data.h>
+
+bool arch_ima_get_secureboot(void)
+{
+	return ipl_secure_flag;
+}
+
+const char * const *arch_get_ima_policy(void)
+{
+	return NULL;
+}
diff --git a/include/linux/ima.h b/include/linux/ima.h
index dc12fbcf484ce..fd9f7cf4cdf51 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -31,7 +31,7 @@ extern void ima_post_path_mknod(struct dentry *dentry);
 extern void ima_add_kexec_buffer(struct kimage *image);
 #endif
 
-#if defined(CONFIG_X86) && defined(CONFIG_EFI)
+#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390)
 extern bool arch_ima_get_secureboot(void);
 extern const char * const *arch_get_ima_policy(void);
 #else
-- 
GitLab


From c9896acc7851109d4e84c1e3a54cb1b9794dea6b Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Mon, 8 Apr 2019 14:24:08 +0200
Subject: [PATCH 1652/1861] s390/ipl: Provide has_secure sysfs attribute

Provide an interface for userspace so it can find out if a machine is
capeable of doing secure boot. The interface is, for example, needed for
zipl so it can find out which file format it can/should write to disk.

Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/sclp.h   |  2 ++
 arch/s390/kernel/ipl.c         | 17 +++++++++++++++++
 drivers/s390/char/sclp.h       |  4 +++-
 drivers/s390/char/sclp_early.c |  2 ++
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index ef4c9dec06a47..ef5d8fa921226 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -79,6 +79,8 @@ struct sclp_info {
 	unsigned char has_kss : 1;
 	unsigned char has_gisaf : 1;
 	unsigned char has_diag318 : 1;
+	unsigned char has_sipl : 1;
+	unsigned char has_sipl_g2 : 1;
 	unsigned int ibc;
 	unsigned int mtid;
 	unsigned int mtid_cp;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 6f2bb64cf70ea..e123c0df83f1c 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -283,6 +283,20 @@ static ssize_t ipl_secure_show(struct kobject *kobj,
 static struct kobj_attribute sys_ipl_secure_attr =
 	__ATTR(secure, 0444, ipl_secure_show, NULL);
 
+static ssize_t ipl_has_secure_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	if (MACHINE_IS_LPAR)
+		return sprintf(page, "%i\n", !!sclp.has_sipl);
+	else if (MACHINE_IS_VM)
+		return sprintf(page, "%i\n", !!sclp.has_sipl_g2);
+	else
+		return sprintf(page, "%i\n", 0);
+}
+
+static struct kobj_attribute sys_ipl_has_secure_attr =
+	__ATTR(has_secure, 0444, ipl_has_secure_show, NULL);
+
 static ssize_t ipl_vm_parm_show(struct kobject *kobj,
 				struct kobj_attribute *attr, char *page)
 {
@@ -379,6 +393,7 @@ static struct attribute *ipl_fcp_attrs[] = {
 	&sys_ipl_fcp_br_lba_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
 	&sys_ipl_secure_attr.attr,
+	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
@@ -395,6 +410,7 @@ static struct attribute *ipl_ccw_attrs_vm[] = {
 	&sys_ipl_ccw_loadparm_attr.attr,
 	&sys_ipl_vm_parm_attr.attr,
 	&sys_ipl_secure_attr.attr,
+	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
@@ -403,6 +419,7 @@ static struct attribute *ipl_ccw_attrs_lpar[] = {
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
 	&sys_ipl_secure_attr.attr,
+	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 367e9d384d852..287382dc21c58 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -197,7 +197,9 @@ struct read_info_sccb {
 	u32	hmfai;			/* 124-127 */
 	u8	_pad_128[134 - 128];	/* 128-133 */
 	u8	byte_134;			/* 134 */
-	u8	_pad_135[4096 - 135];	/* 135-4095 */
+	u8	_pad_135;		/* 135 */
+	u16	cbl;			/* 136-137 */
+	u8	_pad_138[4096 - 138];	/* 138-4095 */
 } __packed __aligned(PAGE_SIZE);
 
 struct read_storage_sccb {
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 8332788681c40..dae9de3d7c962 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -40,6 +40,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
 	sclp.has_gisaf = !!(sccb->fac118 & 0x08);
 	sclp.has_hvs = !!(sccb->fac119 & 0x80);
 	sclp.has_kss = !!(sccb->fac98 & 0x01);
+	sclp.has_sipl = !!(sccb->cbl & 0x02);
+	sclp.has_sipl_g2 = !!(sccb->cbl & 0x04);
 	if (sccb->fac85 & 0x02)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
 	if (sccb->fac91 & 0x40)
-- 
GitLab


From 6324b4de6dca40361399d3e9a2f2f1cbe8e7e11e Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Tue, 12 Feb 2019 16:23:13 +0100
Subject: [PATCH 1653/1861] s390/pci: mark command line parser data __initdata

No point to keep that around.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index dc9bc82c072c6..3262d7ea66d01 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -958,7 +958,7 @@ static void zpci_mem_exit(void)
 	kmem_cache_destroy(zdev_fmb_cache);
 }
 
-static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_probe __initdata = 1;
 static unsigned int s390_pci_initialized;
 
 char * __init pcibios_setup(char *str)
-- 
GitLab


From 066ee72aecdcb8cb2fd5968c852f304d7b88e2be Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Tue, 12 Feb 2019 18:41:49 +0100
Subject: [PATCH 1654/1861] s390/pci: remove unused define

No users of pr_debug in that file.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 3262d7ea66d01..d88cee13425af 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -37,8 +37,6 @@
 #include <asm/pci_clp.h>
 #include <asm/pci_dma.h>
 
-#define DEBUG				/* enable pr_debug */
-
 #define	SIC_IRQ_MODE_ALL		0
 #define	SIC_IRQ_MODE_SINGLE		1
 
-- 
GitLab


From c840927cf5f24d080236775e4c3a934e778069f5 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Tue, 12 Feb 2019 18:39:46 +0100
Subject: [PATCH 1655/1861] s390/pci: move everything irq related to pci_irq.c

Move everything interrupt related from pci.c to pci_irq.c.
No functional change.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci.h |   6 +
 arch/s390/pci/Makefile      |   2 +-
 arch/s390/pci/pci.c         | 216 -----------------------------------
 arch/s390/pci/pci_irq.c     | 222 ++++++++++++++++++++++++++++++++++++
 4 files changed, 229 insertions(+), 217 deletions(-)
 create mode 100644 arch/s390/pci/pci_irq.c

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 4e0efebc56a95..71517451750b4 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -26,6 +26,9 @@ int pci_proc_domain(struct pci_bus *);
 #define ZPCI_BUS_NR			0	/* default bus number */
 #define ZPCI_DEVFN			0	/* default device number */
 
+#define ZPCI_NR_DMA_SPACES		1
+#define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
+
 /* PCI Function Controls */
 #define ZPCI_FC_FN_ENABLED		0x80
 #define ZPCI_FC_ERROR			0x40
@@ -219,6 +222,9 @@ struct zpci_dev *get_zdev_by_fid(u32);
 int zpci_dma_init(void);
 void zpci_dma_exit(void);
 
+int __init zpci_irq_init(void);
+void __init zpci_irq_exit(void);
+
 /* FMB */
 int zpci_fmb_enable_device(struct zpci_dev *);
 int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 22d0871291eef..748626a330282 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,5 +3,5 @@
 # Makefile for the s390 PCI subsystem.
 #
 
-obj-$(CONFIG_PCI)	+= pci.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI)	+= pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
 			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index d88cee13425af..716e773af7880 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -24,11 +24,8 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
 #include <linux/seq_file.h>
 #include <linux/pci.h>
-#include <linux/msi.h>
 
 #include <asm/isc.h>
 #include <asm/airq.h>
@@ -37,28 +34,13 @@
 #include <asm/pci_clp.h>
 #include <asm/pci_dma.h>
 
-#define	SIC_IRQ_MODE_ALL		0
-#define	SIC_IRQ_MODE_SINGLE		1
-
-#define ZPCI_NR_DMA_SPACES		1
-#define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
-
 /* list of all detected zpci devices */
 static LIST_HEAD(zpci_list);
 static DEFINE_SPINLOCK(zpci_list_lock);
 
-static struct irq_chip zpci_irq_chip = {
-	.name = "zPCI",
-	.irq_unmask = pci_msi_unmask_irq,
-	.irq_mask = pci_msi_mask_irq,
-};
-
 static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
 static DEFINE_SPINLOCK(zpci_domain_lock);
 
-static struct airq_iv *zpci_aisb_iv;
-static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
-
 #define ZPCI_IOMAP_ENTRIES						\
 	min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2),	\
 	    ZPCI_IOMAP_MAX_ENTRIES)
@@ -121,39 +103,6 @@ int pci_proc_domain(struct pci_bus *bus)
 }
 EXPORT_SYMBOL_GPL(pci_proc_domain);
 
-/* Modify PCI: Register adapter interruptions */
-static int zpci_set_airq(struct zpci_dev *zdev)
-{
-	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
-	struct zpci_fib fib = {0};
-	u8 status;
-
-	fib.isc = PCI_ISC;
-	fib.sum = 1;		/* enable summary notifications */
-	fib.noi = airq_iv_end(zdev->aibv);
-	fib.aibv = (unsigned long) zdev->aibv->vector;
-	fib.aibvo = 0;		/* each zdev has its own interrupt vector */
-	fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
-	fib.aisbo = zdev->aisb & 63;
-
-	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
-}
-
-/* Modify PCI: Unregister adapter interruptions */
-static int zpci_clear_airq(struct zpci_dev *zdev)
-{
-	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
-	struct zpci_fib fib = {0};
-	u8 cc, status;
-
-	cc = zpci_mod_fc(req, &fib, &status);
-	if (cc == 3 || (cc == 1 && status == 24))
-		/* Function already gone or IRQs already deregistered. */
-		cc = 0;
-
-	return cc ? -EIO : 0;
-}
-
 /* Modify PCI: Register I/O address translation parameters */
 int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
 		       u64 base, u64 limit, u64 iota)
@@ -352,136 +301,6 @@ static struct pci_ops pci_root_ops = {
 	.write = pci_write,
 };
 
-static void zpci_irq_handler(struct airq_struct *airq)
-{
-	unsigned long si, ai;
-	struct airq_iv *aibv;
-	int irqs_on = 0;
-
-	inc_irq_stat(IRQIO_PCI);
-	for (si = 0;;) {
-		/* Scan adapter summary indicator bit vector */
-		si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
-		if (si == -1UL) {
-			if (irqs_on++)
-				/* End of second scan with interrupts on. */
-				break;
-			/* First scan complete, reenable interrupts. */
-			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC))
-				break;
-			si = 0;
-			continue;
-		}
-
-		/* Scan the adapter interrupt vector for this device. */
-		aibv = zpci_aibv[si];
-		for (ai = 0;;) {
-			ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
-			if (ai == -1UL)
-				break;
-			inc_irq_stat(IRQIO_MSI);
-			airq_iv_lock(aibv, ai);
-			generic_handle_irq(airq_iv_get_data(aibv, ai));
-			airq_iv_unlock(aibv, ai);
-		}
-	}
-}
-
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	struct zpci_dev *zdev = to_zpci(pdev);
-	unsigned int hwirq, msi_vecs;
-	unsigned long aisb;
-	struct msi_desc *msi;
-	struct msi_msg msg;
-	int rc, irq;
-
-	zdev->aisb = -1UL;
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
-
-	/* Allocate adapter summary indicator bit */
-	aisb = airq_iv_alloc_bit(zpci_aisb_iv);
-	if (aisb == -1UL)
-		return -EIO;
-	zdev->aisb = aisb;
-
-	/* Create adapter interrupt vector */
-	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
-	if (!zdev->aibv)
-		return -ENOMEM;
-
-	/* Wire up shortcut pointer */
-	zpci_aibv[aisb] = zdev->aibv;
-
-	/* Request MSI interrupts */
-	hwirq = 0;
-	for_each_pci_msi_entry(msi, pdev) {
-		if (hwirq >= msi_vecs)
-			break;
-		irq = irq_alloc_desc(0);	/* Alloc irq on node 0 */
-		if (irq < 0)
-			return -ENOMEM;
-		rc = irq_set_msi_desc(irq, msi);
-		if (rc)
-			return rc;
-		irq_set_chip_and_handler(irq, &zpci_irq_chip,
-					 handle_simple_irq);
-		msg.data = hwirq;
-		msg.address_lo = zdev->msi_addr & 0xffffffff;
-		msg.address_hi = zdev->msi_addr >> 32;
-		pci_write_msi_msg(irq, &msg);
-		airq_iv_set_data(zdev->aibv, hwirq, irq);
-		hwirq++;
-	}
-
-	/* Enable adapter interrupts */
-	rc = zpci_set_airq(zdev);
-	if (rc)
-		return rc;
-
-	return (msi_vecs == nvec) ? 0 : msi_vecs;
-}
-
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	struct zpci_dev *zdev = to_zpci(pdev);
-	struct msi_desc *msi;
-	int rc;
-
-	/* Disable adapter interrupts */
-	rc = zpci_clear_airq(zdev);
-	if (rc)
-		return;
-
-	/* Release MSI interrupts */
-	for_each_pci_msi_entry(msi, pdev) {
-		if (!msi->irq)
-			continue;
-		if (msi->msi_attrib.is_msix)
-			__pci_msix_desc_mask_irq(msi, 1);
-		else
-			__pci_msi_desc_mask_irq(msi, 1, 1);
-		irq_set_msi_desc(msi->irq, NULL);
-		irq_free_desc(msi->irq);
-		msi->msg.address_lo = 0;
-		msi->msg.address_hi = 0;
-		msi->msg.data = 0;
-		msi->irq = 0;
-	}
-
-	if (zdev->aisb != -1UL) {
-		zpci_aibv[zdev->aisb] = NULL;
-		airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
-		zdev->aisb = -1UL;
-	}
-	if (zdev->aibv) {
-		airq_iv_release(zdev->aibv);
-		zdev->aibv = NULL;
-	}
-}
-
 #ifdef CONFIG_PCI_IOV
 static struct resource iov_res = {
 	.name	= "PCI IOV res",
@@ -531,41 +350,6 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
 	}
 }
 
-static struct airq_struct zpci_airq = {
-	.handler = zpci_irq_handler,
-	.isc = PCI_ISC,
-};
-
-static int __init zpci_irq_init(void)
-{
-	int rc;
-
-	rc = register_adapter_interrupt(&zpci_airq);
-	if (rc)
-		goto out;
-	/* Set summary to 1 to be called every time for the ISC. */
-	*zpci_airq.lsi_ptr = 1;
-
-	rc = -ENOMEM;
-	zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
-	if (!zpci_aisb_iv)
-		goto out_airq;
-
-	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
-	return 0;
-
-out_airq:
-	unregister_adapter_interrupt(&zpci_airq);
-out:
-	return rc;
-}
-
-static void zpci_irq_exit(void)
-{
-	airq_iv_release(zpci_aisb_iv);
-	unregister_adapter_interrupt(&zpci_airq);
-}
-
 static int zpci_alloc_iomap(struct zpci_dev *zdev)
 {
 	unsigned long entry;
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
new file mode 100644
index 0000000000000..3b129cd35b6e5
--- /dev/null
+++ b/arch/s390/pci/pci_irq.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+
+#include <asm/isc.h>
+#include <asm/airq.h>
+
+#define	SIC_IRQ_MODE_ALL		0
+#define	SIC_IRQ_MODE_SINGLE		1
+
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
+
+/* Modify PCI: Register adapter interruptions */
+static int zpci_set_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+	struct zpci_fib fib = {0};
+	u8 status;
+
+	fib.isc = PCI_ISC;
+	fib.sum = 1;		/* enable summary notifications */
+	fib.noi = airq_iv_end(zdev->aibv);
+	fib.aibv = (unsigned long) zdev->aibv->vector;
+	fib.aibvo = 0;		/* each zdev has its own interrupt vector */
+	fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+	fib.aisbo = zdev->aisb & 63;
+
+	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_clear_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+	struct zpci_fib fib = {0};
+	u8 cc, status;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc == 3 || (cc == 1 && status == 24))
+		/* Function already gone or IRQs already deregistered. */
+		cc = 0;
+
+	return cc ? -EIO : 0;
+}
+
+static struct irq_chip zpci_irq_chip = {
+	.name = "zPCI",
+	.irq_unmask = pci_msi_unmask_irq,
+	.irq_mask = pci_msi_mask_irq,
+};
+
+static void zpci_irq_handler(struct airq_struct *airq)
+{
+	unsigned long si, ai;
+	struct airq_iv *aibv;
+	int irqs_on = 0;
+
+	inc_irq_stat(IRQIO_PCI);
+	for (si = 0;;) {
+		/* Scan adapter summary indicator bit vector */
+		si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+		if (si == -1UL) {
+			if (irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, reenable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC))
+				break;
+			si = 0;
+			continue;
+		}
+
+		/* Scan the adapter interrupt vector for this device. */
+		aibv = zpci_aibv[si];
+		for (ai = 0;;) {
+			ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+			if (ai == -1UL)
+				break;
+			inc_irq_stat(IRQIO_MSI);
+			airq_iv_lock(aibv, ai);
+			generic_handle_irq(airq_iv_get_data(aibv, ai));
+			airq_iv_unlock(aibv, ai);
+		}
+	}
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+	unsigned int hwirq, msi_vecs;
+	unsigned long aisb;
+	struct msi_desc *msi;
+	struct msi_msg msg;
+	int rc, irq;
+
+	zdev->aisb = -1UL;
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+
+	/* Allocate adapter summary indicator bit */
+	aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+	if (aisb == -1UL)
+		return -EIO;
+	zdev->aisb = aisb;
+
+	/* Create adapter interrupt vector */
+	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+	if (!zdev->aibv)
+		return -ENOMEM;
+
+	/* Wire up shortcut pointer */
+	zpci_aibv[aisb] = zdev->aibv;
+
+	/* Request MSI interrupts */
+	hwirq = 0;
+	for_each_pci_msi_entry(msi, pdev) {
+		if (hwirq >= msi_vecs)
+			break;
+		irq = irq_alloc_desc(0);	/* Alloc irq on node 0 */
+		if (irq < 0)
+			return -ENOMEM;
+		rc = irq_set_msi_desc(irq, msi);
+		if (rc)
+			return rc;
+		irq_set_chip_and_handler(irq, &zpci_irq_chip,
+					 handle_simple_irq);
+		msg.data = hwirq;
+		msg.address_lo = zdev->msi_addr & 0xffffffff;
+		msg.address_hi = zdev->msi_addr >> 32;
+		pci_write_msi_msg(irq, &msg);
+		airq_iv_set_data(zdev->aibv, hwirq, irq);
+		hwirq++;
+	}
+
+	/* Enable adapter interrupts */
+	rc = zpci_set_airq(zdev);
+	if (rc)
+		return rc;
+
+	return (msi_vecs == nvec) ? 0 : msi_vecs;
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+	struct msi_desc *msi;
+	int rc;
+
+	/* Disable adapter interrupts */
+	rc = zpci_clear_airq(zdev);
+	if (rc)
+		return;
+
+	/* Release MSI interrupts */
+	for_each_pci_msi_entry(msi, pdev) {
+		if (!msi->irq)
+			continue;
+		if (msi->msi_attrib.is_msix)
+			__pci_msix_desc_mask_irq(msi, 1);
+		else
+			__pci_msi_desc_mask_irq(msi, 1, 1);
+		irq_set_msi_desc(msi->irq, NULL);
+		irq_free_desc(msi->irq);
+		msi->msg.address_lo = 0;
+		msi->msg.address_hi = 0;
+		msi->msg.data = 0;
+		msi->irq = 0;
+	}
+
+	if (zdev->aisb != -1UL) {
+		zpci_aibv[zdev->aisb] = NULL;
+		airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
+		zdev->aisb = -1UL;
+	}
+	if (zdev->aibv) {
+		airq_iv_release(zdev->aibv);
+		zdev->aibv = NULL;
+	}
+}
+
+static struct airq_struct zpci_airq = {
+	.handler = zpci_irq_handler,
+	.isc = PCI_ISC,
+};
+
+int __init zpci_irq_init(void)
+{
+	int rc;
+
+	rc = register_adapter_interrupt(&zpci_airq);
+	if (rc)
+		goto out;
+	/* Set summary to 1 to be called every time for the ISC. */
+	*zpci_airq.lsi_ptr = 1;
+
+	rc = -ENOMEM;
+	zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+	if (!zpci_aisb_iv)
+		goto out_airq;
+
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+	return 0;
+
+out_airq:
+	unregister_adapter_interrupt(&zpci_airq);
+out:
+	return rc;
+}
+
+void __init zpci_irq_exit(void)
+{
+	airq_iv_release(zpci_aisb_iv);
+	unregister_adapter_interrupt(&zpci_airq);
+}
-- 
GitLab


From 0a9fddfaa8ea0f66564329ce89390c8dd38b2df0 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Tue, 12 Feb 2019 12:37:50 +0100
Subject: [PATCH 1656/1861] s390/sclp: detect DIRQ facility

Detect the adapter CPU directed interruption facility.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/sclp.h   | 1 +
 drivers/s390/char/sclp.h       | 2 +-
 drivers/s390/char/sclp_early.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index ef5d8fa921226..f577c5f6031ad 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -81,6 +81,7 @@ struct sclp_info {
 	unsigned char has_diag318 : 1;
 	unsigned char has_sipl : 1;
 	unsigned char has_sipl_g2 : 1;
+	unsigned char has_dirq : 1;
 	unsigned int ibc;
 	unsigned int mtid;
 	unsigned int mtid_cp;
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 287382dc21c58..043f32ba37491 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -197,7 +197,7 @@ struct read_info_sccb {
 	u32	hmfai;			/* 124-127 */
 	u8	_pad_128[134 - 128];	/* 128-133 */
 	u8	byte_134;			/* 134 */
-	u8	_pad_135;		/* 135 */
+	u8	cpudirq;		/* 135 */
 	u16	cbl;			/* 136-137 */
 	u8	_pad_138[4096 - 138];	/* 138-4095 */
 } __packed __aligned(PAGE_SIZE);
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index dae9de3d7c962..fdad2a980129d 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -95,6 +95,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
 	sclp.mtid_prev = (sccb->fac42 & 0x80) ? (sccb->fac66 & 31) : 0;
 
 	sclp.hmfai = sccb->hmfai;
+	sclp.has_dirq = !!(sccb->cpudirq & 0x80);
 }
 
 /*
-- 
GitLab


From 30e63ef2ef43f014bf2039bd57cc917780d6a44b Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Sun, 28 Oct 2018 11:51:56 +0100
Subject: [PATCH 1657/1861] s390/airq: recognize directed interrupts

Add an extra parameter for airq handlers to recognize
floating vs. directed interrupts.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/airq.h     | 2 +-
 arch/s390/kvm/interrupt.c        | 2 +-
 arch/s390/pci/pci_irq.c          | 2 +-
 drivers/s390/cio/airq.c          | 2 +-
 drivers/s390/cio/cio.h           | 2 +-
 drivers/s390/cio/qdio_thinint.c  | 4 ++--
 drivers/s390/crypto/ap_bus.c     | 4 ++--
 drivers/s390/virtio/virtio_ccw.c | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index fcf539efb32fb..91e78df365c76 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -14,7 +14,7 @@
 
 struct airq_struct {
 	struct hlist_node list;		/* Handler queueing. */
-	void (*handler)(struct airq_struct *);	/* Thin-interrupt handler */
+	void (*handler)(struct airq_struct *airq, bool floating);
 	u8 *lsi_ptr;			/* Local-Summary-Indicator pointer */
 	u8 lsi_mask;			/* Local-Summary-Indicator mask */
 	u8 isc;				/* Interrupt-subclass */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 82162867f378d..37503ae62486c 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3194,7 +3194,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
 
-static void gib_alert_irq_handler(struct airq_struct *airq)
+static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
 {
 	inc_irq_stat(IRQIO_GAL);
 	process_gib_alert_list();
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 3b129cd35b6e5..55e22e3923761 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -56,7 +56,7 @@ static struct irq_chip zpci_irq_chip = {
 	.irq_mask = pci_msi_mask_irq,
 };
 
-static void zpci_irq_handler(struct airq_struct *airq)
+static void zpci_irq_handler(struct airq_struct *airq, bool floating)
 {
 	unsigned long si, ai;
 	struct airq_iv *aibv;
diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c
index a45011e4529e7..e045e79f061c5 100644
--- a/drivers/s390/cio/airq.c
+++ b/drivers/s390/cio/airq.c
@@ -95,7 +95,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy)
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(airq, head, list)
 		if ((*airq->lsi_ptr & airq->lsi_mask) != 0)
-			airq->handler(airq);
+			airq->handler(airq, !tpi_info->directed_irq);
 	rcu_read_unlock();
 
 	return IRQ_HANDLED;
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 92eabbb5f18d4..06a91743335af 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -51,7 +51,7 @@ struct tpi_info {
 	struct subchannel_id schid;
 	u32 intparm;
 	u32 adapter_IO:1;
-	u32 :1;
+	u32 directed_irq:1;
 	u32 isc:3;
 	u32 :27;
 	u32 type:3;
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 07dea602205bd..28d59ac2204cc 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -40,7 +40,7 @@ static LIST_HEAD(tiq_list);
 static DEFINE_MUTEX(tiq_list_lock);
 
 /* Adapter interrupt definitions */
-static void tiqdio_thinint_handler(struct airq_struct *airq);
+static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating);
 
 static struct airq_struct tiqdio_airq = {
 	.handler = tiqdio_thinint_handler,
@@ -179,7 +179,7 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq)
  * tiqdio_thinint_handler - thin interrupt handler for qdio
  * @airq: pointer to adapter interrupt descriptor
  */
-static void tiqdio_thinint_handler(struct airq_struct *airq)
+static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating)
 {
 	u32 si_used = clear_shared_ind();
 	struct qdio_q *q;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 1546389d71dbc..cc30e4f07fffa 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -116,7 +116,7 @@ static int user_set_domain;
 static struct bus_type ap_bus_type;
 
 /* Adapter interrupt definitions */
-static void ap_interrupt_handler(struct airq_struct *airq);
+static void ap_interrupt_handler(struct airq_struct *airq, bool floating);
 
 static int ap_airq_flag;
 
@@ -393,7 +393,7 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused)
  * ap_interrupt_handler() - Schedule ap_tasklet on interrupt
  * @airq: pointer to adapter interrupt descriptor
  */
-static void ap_interrupt_handler(struct airq_struct *airq)
+static void ap_interrupt_handler(struct airq_struct *airq, bool floating)
 {
 	inc_irq_stat(IRQIO_APB);
 	if (!ap_suspend_flag)
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 74c328321889b..991420caa4f20 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -182,7 +182,7 @@ static void drop_airq_indicator(struct virtqueue *vq, struct airq_info *info)
 	write_unlock_irqrestore(&info->lock, flags);
 }
 
-static void virtio_airq_handler(struct airq_struct *airq)
+static void virtio_airq_handler(struct airq_struct *airq, bool floating)
 {
 	struct airq_info *info = container_of(airq, struct airq_info, airq);
 	unsigned long ai;
-- 
GitLab


From b1f548645cb587c14f5e751b9163939d0723885f Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Thu, 14 Feb 2019 12:56:50 +0100
Subject: [PATCH 1658/1861] s390/pci: clarify interrupt vector usage

Rename and clarify the usage of the interrupt bit vectors. Also change
the array of the per-function bit vectors to be dynamically allocated.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_irq.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 55e22e3923761..0170db93be827 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -14,8 +14,15 @@
 #define	SIC_IRQ_MODE_ALL		0
 #define	SIC_IRQ_MODE_SINGLE		1
 
-static struct airq_iv *zpci_aisb_iv;
-static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
+/*
+ * summary bit vector - one summary bit per function
+ */
+static struct airq_iv *zpci_sbv;
+
+/*
+ * interrupt bit vectors - one vector per function
+ */
+static struct airq_iv **zpci_ibv;
 
 /* Modify PCI: Register adapter interruptions */
 static int zpci_set_airq(struct zpci_dev *zdev)
@@ -29,7 +36,7 @@ static int zpci_set_airq(struct zpci_dev *zdev)
 	fib.noi = airq_iv_end(zdev->aibv);
 	fib.aibv = (unsigned long) zdev->aibv->vector;
 	fib.aibvo = 0;		/* each zdev has its own interrupt vector */
-	fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+	fib.aisb = (unsigned long) zpci_sbv->vector + (zdev->aisb/64)*8;
 	fib.aisbo = zdev->aisb & 63;
 
 	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
@@ -65,7 +72,7 @@ static void zpci_irq_handler(struct airq_struct *airq, bool floating)
 	inc_irq_stat(IRQIO_PCI);
 	for (si = 0;;) {
 		/* Scan adapter summary indicator bit vector */
-		si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+		si = airq_iv_scan(zpci_sbv, si, airq_iv_end(zpci_sbv));
 		if (si == -1UL) {
 			if (irqs_on++)
 				/* End of second scan with interrupts on. */
@@ -78,7 +85,7 @@ static void zpci_irq_handler(struct airq_struct *airq, bool floating)
 		}
 
 		/* Scan the adapter interrupt vector for this device. */
-		aibv = zpci_aibv[si];
+		aibv = zpci_ibv[si];
 		for (ai = 0;;) {
 			ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
 			if (ai == -1UL)
@@ -106,7 +113,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
 
 	/* Allocate adapter summary indicator bit */
-	aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+	aisb = airq_iv_alloc_bit(zpci_sbv);
 	if (aisb == -1UL)
 		return -EIO;
 	zdev->aisb = aisb;
@@ -117,7 +124,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 		return -ENOMEM;
 
 	/* Wire up shortcut pointer */
-	zpci_aibv[aisb] = zdev->aibv;
+	zpci_ibv[aisb] = zdev->aibv;
 
 	/* Request MSI interrupts */
 	hwirq = 0;
@@ -176,8 +183,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
 	}
 
 	if (zdev->aisb != -1UL) {
-		zpci_aibv[zdev->aisb] = NULL;
-		airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
+		zpci_ibv[zdev->aisb] = NULL;
+		airq_iv_free_bit(zpci_sbv, zdev->aisb);
 		zdev->aisb = -1UL;
 	}
 	if (zdev->aibv) {
@@ -202,13 +209,19 @@ int __init zpci_irq_init(void)
 	*zpci_airq.lsi_ptr = 1;
 
 	rc = -ENOMEM;
-	zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
-	if (!zpci_aisb_iv)
+	zpci_ibv = kcalloc(ZPCI_NR_DEVICES, sizeof(*zpci_ibv), GFP_KERNEL);
+	if (!zpci_ibv)
 		goto out_airq;
 
+	zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+	if (!zpci_sbv)
+		goto out_free;
+
 	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
 	return 0;
 
+out_free:
+	kfree(zpci_ibv);
 out_airq:
 	unregister_adapter_interrupt(&zpci_airq);
 out:
@@ -217,6 +230,7 @@ out:
 
 void __init zpci_irq_exit(void)
 {
-	airq_iv_release(zpci_aisb_iv);
+	airq_iv_release(zpci_sbv);
+	kfree(zpci_ibv);
 	unregister_adapter_interrupt(&zpci_airq);
 }
-- 
GitLab


From 414cbd1e3d14ec0e60666a0fb9d8ae2d77eb7c63 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Wed, 27 Feb 2019 13:56:08 +0100
Subject: [PATCH 1659/1861] s390/airq: provide cacheline aligned ivs

Provide the ability to create cachesize aligned interrupt vectors.
These will be used for per-CPU interrupt vectors.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/airq.h | 10 +++++----
 drivers/s390/cio/airq.c      | 39 +++++++++++++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 91e78df365c76..c10d2ee2dfda4 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -35,13 +35,15 @@ struct airq_iv {
 	unsigned int *data;	/* 32 bit value associated with each bit */
 	unsigned long bits;	/* Number of bits in the vector */
 	unsigned long end;	/* Number of highest allocated bit + 1 */
+	unsigned long flags;	/* Allocation flags */
 	spinlock_t lock;	/* Lock to protect alloc & free */
 };
 
-#define AIRQ_IV_ALLOC	1	/* Use an allocation bit mask */
-#define AIRQ_IV_BITLOCK	2	/* Allocate the lock bit mask */
-#define AIRQ_IV_PTR	4	/* Allocate the ptr array */
-#define AIRQ_IV_DATA	8	/* Allocate the data array */
+#define AIRQ_IV_ALLOC		1	/* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK		2	/* Allocate the lock bit mask */
+#define AIRQ_IV_PTR		4	/* Allocate the ptr array */
+#define AIRQ_IV_DATA		8	/* Allocate the data array */
+#define AIRQ_IV_CACHELINE	16	/* Cacheline alignment for the vector */
 
 struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
 void airq_iv_release(struct airq_iv *iv);
diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c
index e045e79f061c5..4534afc635913 100644
--- a/drivers/s390/cio/airq.c
+++ b/drivers/s390/cio/airq.c
@@ -27,6 +27,8 @@
 static DEFINE_SPINLOCK(airq_lists_lock);
 static struct hlist_head airq_lists[MAX_ISC+1];
 
+static struct kmem_cache *airq_iv_cache;
+
 /**
  * register_adapter_interrupt() - register adapter interrupt handler
  * @airq: pointer to adapter interrupt descriptor
@@ -129,10 +131,21 @@ struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags)
 	if (!iv)
 		goto out;
 	iv->bits = bits;
+	iv->flags = flags;
 	size = BITS_TO_LONGS(bits) * sizeof(unsigned long);
-	iv->vector = kzalloc(size, GFP_KERNEL);
-	if (!iv->vector)
-		goto out_free;
+
+	if (flags & AIRQ_IV_CACHELINE) {
+		if ((cache_line_size() * BITS_PER_BYTE) < bits)
+			goto out_free;
+
+		iv->vector = kmem_cache_zalloc(airq_iv_cache, GFP_KERNEL);
+		if (!iv->vector)
+			goto out_free;
+	} else {
+		iv->vector = kzalloc(size, GFP_KERNEL);
+		if (!iv->vector)
+			goto out_free;
+	}
 	if (flags & AIRQ_IV_ALLOC) {
 		iv->avail = kmalloc(size, GFP_KERNEL);
 		if (!iv->avail)
@@ -165,7 +178,10 @@ out_free:
 	kfree(iv->ptr);
 	kfree(iv->bitlock);
 	kfree(iv->avail);
-	kfree(iv->vector);
+	if (iv->flags & AIRQ_IV_CACHELINE)
+		kmem_cache_free(airq_iv_cache, iv->vector);
+	else
+		kfree(iv->vector);
 	kfree(iv);
 out:
 	return NULL;
@@ -181,7 +197,10 @@ void airq_iv_release(struct airq_iv *iv)
 	kfree(iv->data);
 	kfree(iv->ptr);
 	kfree(iv->bitlock);
-	kfree(iv->vector);
+	if (iv->flags & AIRQ_IV_CACHELINE)
+		kmem_cache_free(airq_iv_cache, iv->vector);
+	else
+		kfree(iv->vector);
 	kfree(iv->avail);
 	kfree(iv);
 }
@@ -275,3 +294,13 @@ unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
 	return bit;
 }
 EXPORT_SYMBOL(airq_iv_scan);
+
+static int __init airq_init(void)
+{
+	airq_iv_cache = kmem_cache_create("airq_iv_cache", cache_line_size(),
+					  cache_line_size(), 0, NULL);
+	if (!airq_iv_cache)
+		return -ENOMEM;
+	return 0;
+}
+subsys_initcall(airq_init);
-- 
GitLab


From e979ce7bced2ee019b5b1a040295484bd7f23680 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Thu, 27 Sep 2018 13:57:12 +0200
Subject: [PATCH 1660/1861] s390/pci: provide support for CPU directed
 interrupts

Up until now all interrupts on s390 have been floating. For MSI interrupts
we've used a global summary bit vector (with a bit for each function) and
a per-function interrupt bit vector (with a bit per MSI).

This patch introduces a new IRQ delivery mode: CPU directed interrupts.
In this new mode a per-CPU interrupt bit vector is used (with a bit per
MSI per function). Further it is now possible to direct an IRQ to a
specific CPU so we can finally support IRQ affinity.

If an interrupt can't be delivered because the appointed CPU is occupied
by a hypervisor the interrupt is delivered floating. For this a global
summary bit vector is used (with a bit per CPU).

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci.h      |   2 +
 arch/s390/include/asm/pci_clp.h  |   6 +-
 arch/s390/include/asm/pci_insn.h |  74 ++++++-
 arch/s390/pci/pci_insn.c         |  10 +-
 arch/s390/pci/pci_irq.c          | 339 ++++++++++++++++++++++++++-----
 5 files changed, 368 insertions(+), 63 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 71517451750b4..a285754d07426 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -115,6 +115,8 @@ struct zpci_dev {
 	/* IRQ stuff */
 	u64		msi_addr;	/* MSI address */
 	unsigned int	max_msi;	/* maximum number of MSI's */
+	unsigned int	msi_first_bit;
+	unsigned int	msi_nr_irqs;
 	struct airq_iv *aibv;		/* adapter interrupt bit vector */
 	unsigned long	aisb;		/* number of the summary bit */
 
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index b3b31b31f0d33..d2d824a91e66f 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -118,7 +118,11 @@ struct clp_rsp_query_pci_grp {
 	u8 refresh		:  1;	/* TLB refresh mode */
 	u16 reserved2;
 	u16 mui;
-	u64 reserved3;
+	u16			: 16;
+	u16 maxfaal;
+	u16			:  4;
+	u16 dnoi		: 12;
+	u16 maxcpu;
 	u64 dasm;			/* dma address space mask */
 	u64 msia;			/* MSI address */
 	u64 reserved4;
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index ba22a6ea51a14..ab10310705033 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -38,6 +38,8 @@
 #define ZPCI_MOD_FC_RESET_ERROR	7
 #define ZPCI_MOD_FC_RESET_BLOCK	9
 #define ZPCI_MOD_FC_SET_MEASURE	10
+#define ZPCI_MOD_FC_REG_INT_D	16
+#define ZPCI_MOD_FC_DEREG_INT_D	17
 
 /* FIB function controls */
 #define ZPCI_FIB_FC_ENABLED	0x80
@@ -51,16 +53,7 @@
 #define ZPCI_FIB_FC_LS_BLOCKED	0x20
 #define ZPCI_FIB_FC_DMAAS_REG	0x10
 
-/* Function Information Block */
-struct zpci_fib {
-	u32 fmt		:  8;	/* format */
-	u32		: 24;
-	u32		: 32;
-	u8 fc;			/* function controls */
-	u64		: 56;
-	u64 pba;		/* PCI base address */
-	u64 pal;		/* PCI address limit */
-	u64 iota;		/* I/O Translation Anchor */
+struct zpci_fib_fmt0 {
 	u32		:  1;
 	u32 isc		:  3;	/* Interrupt subclass */
 	u32 noi		: 12;	/* Number of interrupts */
@@ -72,16 +65,75 @@ struct zpci_fib {
 	u32		: 32;
 	u64 aibv;		/* Adapter int bit vector address */
 	u64 aisb;		/* Adapter int summary bit address */
+};
+
+struct zpci_fib_fmt1 {
+	u32		:  4;
+	u32 noi		: 12;
+	u32		: 16;
+	u32 dibvo	: 16;
+	u32		: 16;
+	u64		: 64;
+	u64		: 64;
+};
+
+/* Function Information Block */
+struct zpci_fib {
+	u32 fmt		:  8;	/* format */
+	u32		: 24;
+	u32		: 32;
+	u8 fc;			/* function controls */
+	u64		: 56;
+	u64 pba;		/* PCI base address */
+	u64 pal;		/* PCI address limit */
+	u64 iota;		/* I/O Translation Anchor */
+	union {
+		struct zpci_fib_fmt0 fmt0;
+		struct zpci_fib_fmt1 fmt1;
+	};
 	u64 fmb_addr;		/* Function measurement block address and key */
 	u32		: 32;
 	u32 gd;
 } __packed __aligned(8);
 
+/* directed interruption information block */
+struct zpci_diib {
+	u32 : 1;
+	u32 isc : 3;
+	u32 : 28;
+	u16 : 16;
+	u16 nr_cpus;
+	u64 disb_addr;
+	u64 : 64;
+	u64 : 64;
+} __packed __aligned(8);
+
+/* cpu directed interruption information block */
+struct zpci_cdiib {
+	u64 : 64;
+	u64 dibv_addr;
+	u64 : 64;
+	u64 : 64;
+	u64 : 64;
+} __packed __aligned(8);
+
+union zpci_sic_iib {
+	struct zpci_diib diib;
+	struct zpci_cdiib cdiib;
+};
+
 u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
 int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
 int zpci_load(u64 *data, u64 req, u64 offset);
 int zpci_store(u64 data, u64 req, u64 offset);
 int zpci_store_block(const u64 *data, u64 req, u64 offset);
-int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
+
+static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
+{
+	union zpci_sic_iib iib = {{0}};
+
+	return __zpci_set_irq_ctrl(ctl, isc, &iib);
+}
 
 #endif
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index f069929e82114..4b2ca068d40ef 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -96,13 +96,15 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 }
 
 /* Set Interruption Controls */
-int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
 {
 	if (!test_facility(72))
 		return -EIO;
-	asm volatile (
-		"	.insn	rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
-		: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+
+	asm volatile(
+		".insn	rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]\n"
+		: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [iib] "Q" (*iib));
+
 	return 0;
 }
 
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 0170db93be827..4bfd902f27f44 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -7,20 +7,31 @@
 #include <linux/kernel_stat.h>
 #include <linux/pci.h>
 #include <linux/msi.h>
+#include <linux/smp.h>
 
 #include <asm/isc.h>
 #include <asm/airq.h>
 
+static enum {FLOATING, DIRECTED} irq_delivery;
+
 #define	SIC_IRQ_MODE_ALL		0
 #define	SIC_IRQ_MODE_SINGLE		1
+#define	SIC_IRQ_MODE_DIRECT		4
+#define	SIC_IRQ_MODE_D_ALL		16
+#define	SIC_IRQ_MODE_D_SINGLE		17
+#define	SIC_IRQ_MODE_SET_CPU		18
 
 /*
- * summary bit vector - one summary bit per function
+ * summary bit vector
+ * FLOATING - summary bit per function
+ * DIRECTED - summary bit per cpu (only used in fallback path)
  */
 static struct airq_iv *zpci_sbv;
 
 /*
- * interrupt bit vectors - one vector per function
+ * interrupt bit vectors
+ * FLOATING - interrupt bit vector per function
+ * DIRECTED - interrupt bit vector per cpu
  */
 static struct airq_iv **zpci_ibv;
 
@@ -31,13 +42,13 @@ static int zpci_set_airq(struct zpci_dev *zdev)
 	struct zpci_fib fib = {0};
 	u8 status;
 
-	fib.isc = PCI_ISC;
-	fib.sum = 1;		/* enable summary notifications */
-	fib.noi = airq_iv_end(zdev->aibv);
-	fib.aibv = (unsigned long) zdev->aibv->vector;
-	fib.aibvo = 0;		/* each zdev has its own interrupt vector */
-	fib.aisb = (unsigned long) zpci_sbv->vector + (zdev->aisb/64)*8;
-	fib.aisbo = zdev->aisb & 63;
+	fib.fmt0.isc = PCI_ISC;
+	fib.fmt0.sum = 1;	/* enable summary notifications */
+	fib.fmt0.noi = airq_iv_end(zdev->aibv);
+	fib.fmt0.aibv = (unsigned long) zdev->aibv->vector;
+	fib.fmt0.aibvo = 0;	/* each zdev has its own interrupt vector */
+	fib.fmt0.aisb = (unsigned long) zpci_sbv->vector + (zdev->aisb/64)*8;
+	fib.fmt0.aisbo = zdev->aisb & 63;
 
 	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
 }
@@ -57,13 +68,134 @@ static int zpci_clear_airq(struct zpci_dev *zdev)
 	return cc ? -EIO : 0;
 }
 
+/* Modify PCI: Register CPU directed interruptions */
+static int zpci_set_directed_irq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT_D);
+	struct zpci_fib fib = {0};
+	u8 status;
+
+	fib.fmt = 1;
+	fib.fmt1.noi = zdev->msi_nr_irqs;
+	fib.fmt1.dibvo = zdev->msi_first_bit;
+
+	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister CPU directed interruptions */
+static int zpci_clear_directed_irq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT_D);
+	struct zpci_fib fib = {0};
+	u8 cc, status;
+
+	fib.fmt = 1;
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc == 3 || (cc == 1 && status == 24))
+		/* Function already gone or IRQs already deregistered. */
+		cc = 0;
+
+	return cc ? -EIO : 0;
+}
+
+static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest,
+				 bool force)
+{
+	struct msi_desc *entry = irq_get_msi_desc(data->irq);
+	struct msi_msg msg = entry->msg;
+
+	msg.address_lo &= 0xff0000ff;
+	msg.address_lo |= (cpumask_first(dest) << 8);
+	pci_write_msi_msg(data->irq, &msg);
+
+	return IRQ_SET_MASK_OK;
+}
+
 static struct irq_chip zpci_irq_chip = {
 	.name = "zPCI",
 	.irq_unmask = pci_msi_unmask_irq,
 	.irq_mask = pci_msi_mask_irq,
+	.irq_set_affinity = zpci_set_irq_affinity,
 };
 
-static void zpci_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_handle_cpu_local_irq(bool rescan)
+{
+	struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+	unsigned long bit;
+	int irqs_on = 0;
+
+	for (bit = 0;;) {
+		/* Scan the directed IRQ bit vector */
+		bit = airq_iv_scan(dibv, bit, airq_iv_end(dibv));
+		if (bit == -1UL) {
+			if (!rescan || irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, reenable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
+				break;
+			bit = 0;
+			continue;
+		}
+		inc_irq_stat(IRQIO_MSI);
+		generic_handle_irq(airq_iv_get_data(dibv, bit));
+	}
+}
+
+struct cpu_irq_data {
+	call_single_data_t csd;
+	atomic_t scheduled;
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_irq_data, irq_data);
+
+static void zpci_handle_remote_irq(void *data)
+{
+	atomic_t *scheduled = data;
+
+	do {
+		zpci_handle_cpu_local_irq(false);
+	} while (atomic_dec_return(scheduled));
+}
+
+static void zpci_handle_fallback_irq(void)
+{
+	struct cpu_irq_data *cpu_data;
+	unsigned long cpu;
+	int irqs_on = 0;
+
+	for (cpu = 0;;) {
+		cpu = airq_iv_scan(zpci_sbv, cpu, airq_iv_end(zpci_sbv));
+		if (cpu == -1UL) {
+			if (irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, reenable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+				break;
+			cpu = 0;
+			continue;
+		}
+		cpu_data = &per_cpu(irq_data, cpu);
+		if (atomic_inc_return(&cpu_data->scheduled) > 1)
+			continue;
+
+		cpu_data->csd.func = zpci_handle_remote_irq;
+		cpu_data->csd.info = &cpu_data->scheduled;
+		cpu_data->csd.flags = 0;
+		smp_call_function_single_async(cpu, &cpu_data->csd);
+	}
+}
+
+static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
+{
+	inc_irq_stat(IRQIO_PCI);
+	if (floating)
+		zpci_handle_fallback_irq();
+	else
+		zpci_handle_cpu_local_irq(true);
+}
+
+static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
 {
 	unsigned long si, ai;
 	struct airq_iv *aibv;
@@ -78,7 +210,7 @@ static void zpci_irq_handler(struct airq_struct *airq, bool floating)
 				/* End of second scan with interrupts on. */
 				break;
 			/* First scan complete, reenable interrupts. */
-			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC))
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
 				break;
 			si = 0;
 			continue;
@@ -101,54 +233,79 @@ static void zpci_irq_handler(struct airq_struct *airq, bool floating)
 int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
 	struct zpci_dev *zdev = to_zpci(pdev);
-	unsigned int hwirq, msi_vecs;
-	unsigned long aisb;
+	unsigned int hwirq, msi_vecs, cpu;
+	unsigned long bit;
 	struct msi_desc *msi;
 	struct msi_msg msg;
 	int rc, irq;
 
 	zdev->aisb = -1UL;
+	zdev->msi_first_bit = -1U;
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
 
-	/* Allocate adapter summary indicator bit */
-	aisb = airq_iv_alloc_bit(zpci_sbv);
-	if (aisb == -1UL)
-		return -EIO;
-	zdev->aisb = aisb;
-
-	/* Create adapter interrupt vector */
-	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
-	if (!zdev->aibv)
-		return -ENOMEM;
+	if (irq_delivery == DIRECTED) {
+		/* Allocate cpu vector bits */
+		bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+		if (bit == -1UL)
+			return -EIO;
+	} else {
+		/* Allocate adapter summary indicator bit */
+		bit = airq_iv_alloc_bit(zpci_sbv);
+		if (bit == -1UL)
+			return -EIO;
+		zdev->aisb = bit;
+
+		/* Create adapter interrupt vector */
+		zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+		if (!zdev->aibv)
+			return -ENOMEM;
 
-	/* Wire up shortcut pointer */
-	zpci_ibv[aisb] = zdev->aibv;
+		/* Wire up shortcut pointer */
+		zpci_ibv[bit] = zdev->aibv;
+		/* Each function has its own interrupt vector */
+		bit = 0;
+	}
 
 	/* Request MSI interrupts */
-	hwirq = 0;
+	hwirq = bit;
 	for_each_pci_msi_entry(msi, pdev) {
-		if (hwirq >= msi_vecs)
+		rc = -EIO;
+		if (hwirq - bit >= msi_vecs)
 			break;
-		irq = irq_alloc_desc(0);	/* Alloc irq on node 0 */
+		irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity);
 		if (irq < 0)
 			return -ENOMEM;
 		rc = irq_set_msi_desc(irq, msi);
 		if (rc)
 			return rc;
 		irq_set_chip_and_handler(irq, &zpci_irq_chip,
-					 handle_simple_irq);
+					 handle_percpu_irq);
 		msg.data = hwirq;
-		msg.address_lo = zdev->msi_addr & 0xffffffff;
+		if (irq_delivery == DIRECTED) {
+			msg.address_lo = zdev->msi_addr & 0xff0000ff;
+			msg.address_lo |= msi->affinity ?
+				(cpumask_first(&msi->affinity->mask) << 8) : 0;
+			for_each_possible_cpu(cpu) {
+				airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+			}
+		} else {
+			msg.address_lo = zdev->msi_addr & 0xffffffff;
+			airq_iv_set_data(zdev->aibv, hwirq, irq);
+		}
 		msg.address_hi = zdev->msi_addr >> 32;
 		pci_write_msi_msg(irq, &msg);
-		airq_iv_set_data(zdev->aibv, hwirq, irq);
 		hwirq++;
 	}
 
-	/* Enable adapter interrupts */
-	rc = zpci_set_airq(zdev);
+	zdev->msi_first_bit = bit;
+	zdev->msi_nr_irqs = msi_vecs;
+
+	if (irq_delivery == DIRECTED)
+		rc = zpci_set_directed_irq(zdev);
+	else
+		rc = zpci_set_airq(zdev);
 	if (rc)
 		return rc;
 
@@ -161,8 +318,11 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
 	struct msi_desc *msi;
 	int rc;
 
-	/* Disable adapter interrupts */
-	rc = zpci_clear_airq(zdev);
+	/* Disable interrupts */
+	if (irq_delivery == DIRECTED)
+		rc = zpci_clear_directed_irq(zdev);
+	else
+		rc = zpci_clear_airq(zdev);
 	if (rc)
 		return;
 
@@ -191,37 +351,114 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
 		airq_iv_release(zdev->aibv);
 		zdev->aibv = NULL;
 	}
+
+	if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U)
+		airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
 }
 
 static struct airq_struct zpci_airq = {
-	.handler = zpci_irq_handler,
+	.handler = zpci_floating_irq_handler,
 	.isc = PCI_ISC,
 };
 
-int __init zpci_irq_init(void)
+static void __init cpu_enable_directed_irq(void *unused)
 {
-	int rc;
+	union zpci_sic_iib iib = {{0}};
 
-	rc = register_adapter_interrupt(&zpci_airq);
-	if (rc)
-		goto out;
-	/* Set summary to 1 to be called every time for the ISC. */
-	*zpci_airq.lsi_ptr = 1;
+	iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+
+	__zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
+}
+
+static int __init zpci_directed_irq_init(void)
+{
+	union zpci_sic_iib iib = {{0}};
+	unsigned int cpu;
+
+	zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
+	if (!zpci_sbv)
+		return -ENOMEM;
+
+	iib.diib.isc = PCI_ISC;
+	iib.diib.nr_cpus = num_possible_cpus();
+	iib.diib.disb_addr = (u64) zpci_sbv->vector;
+	__zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
+
+	zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
+			   GFP_KERNEL);
+	if (!zpci_ibv)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		/*
+		 * Per CPU IRQ vectors look the same but bit-allocation
+		 * is only done on the first vector.
+		 */
+		zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
+					       AIRQ_IV_DATA |
+					       AIRQ_IV_CACHELINE |
+					       (!cpu ? AIRQ_IV_ALLOC : 0));
+		if (!zpci_ibv[cpu])
+			return -ENOMEM;
+	}
+	on_each_cpu(cpu_enable_directed_irq, NULL, 1);
 
-	rc = -ENOMEM;
+	zpci_irq_chip.irq_set_affinity = zpci_set_irq_affinity;
+
+	return 0;
+}
+
+static int __init zpci_floating_irq_init(void)
+{
 	zpci_ibv = kcalloc(ZPCI_NR_DEVICES, sizeof(*zpci_ibv), GFP_KERNEL);
 	if (!zpci_ibv)
-		goto out_airq;
+		return -ENOMEM;
 
 	zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
 	if (!zpci_sbv)
 		goto out_free;
 
-	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
 	return 0;
 
 out_free:
 	kfree(zpci_ibv);
+	return -ENOMEM;
+}
+
+int __init zpci_irq_init(void)
+{
+	int rc;
+
+	irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
+	if (irq_delivery == DIRECTED)
+		zpci_airq.handler = zpci_directed_irq_handler;
+
+	rc = register_adapter_interrupt(&zpci_airq);
+	if (rc)
+		goto out;
+	/* Set summary to 1 to be called every time for the ISC. */
+	*zpci_airq.lsi_ptr = 1;
+
+	switch (irq_delivery) {
+	case FLOATING:
+		rc = zpci_floating_irq_init();
+		break;
+	case DIRECTED:
+		rc = zpci_directed_irq_init();
+		break;
+	}
+
+	if (rc)
+		goto out_airq;
+
+	/*
+	 * Enable floating IRQs (with suppression after one IRQ). When using
+	 * directed IRQs this enables the fallback path.
+	 */
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
+
+	return 0;
 out_airq:
 	unregister_adapter_interrupt(&zpci_airq);
 out:
@@ -230,7 +467,15 @@ out:
 
 void __init zpci_irq_exit(void)
 {
-	airq_iv_release(zpci_sbv);
+	unsigned int cpu;
+
+	if (irq_delivery == DIRECTED) {
+		for_each_possible_cpu(cpu) {
+			airq_iv_release(zpci_ibv[cpu]);
+		}
+	}
 	kfree(zpci_ibv);
+	if (zpci_sbv)
+		airq_iv_release(zpci_sbv);
 	unregister_adapter_interrupt(&zpci_airq);
 }
-- 
GitLab


From 914b7dd07ee8713c69c31ddb3e19a76852a846ac Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 14 Feb 2017 18:13:09 +0100
Subject: [PATCH 1661/1861] s390: show statistics for MSI IRQs

Improve /proc/interrupts on s390 to show statistics for individual
MSI interrupts.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/irq.h |  6 ++---
 arch/s390/kernel/irq.c      | 47 ++++++++++++++++++++++++++++++-------
 arch/s390/pci/pci_irq.c     |  2 +-
 drivers/s390/cio/cio.c      |  2 +-
 4 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index afaf5e3c57fd8..d159439440542 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -47,7 +47,6 @@ enum interruption_class {
 	IRQEXT_CMC,
 	IRQEXT_FTP,
 	IRQIO_CIO,
-	IRQIO_QAI,
 	IRQIO_DAS,
 	IRQIO_C15,
 	IRQIO_C70,
@@ -55,12 +54,13 @@ enum interruption_class {
 	IRQIO_VMR,
 	IRQIO_LCS,
 	IRQIO_CTC,
-	IRQIO_APB,
 	IRQIO_ADM,
 	IRQIO_CSC,
+	IRQIO_VIR,
+	IRQIO_QAI,
+	IRQIO_APB,
 	IRQIO_PCI,
 	IRQIO_MSI,
-	IRQIO_VIR,
 	IRQIO_VAI,
 	IRQIO_GAL,
 	NMI_NMI,
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 0cd5a5f96729d..f586f94d39476 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -73,7 +73,6 @@ static const struct irq_class irqclass_sub_desc[] = {
 	{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
 	{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
 	{.irq = IRQIO_CIO,  .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
-	{.irq = IRQIO_QAI,  .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
 	{.irq = IRQIO_DAS,  .name = "DAS", .desc = "[I/O] DASD"},
 	{.irq = IRQIO_C15,  .name = "C15", .desc = "[I/O] 3215"},
 	{.irq = IRQIO_C70,  .name = "C70", .desc = "[I/O] 3270"},
@@ -81,14 +80,15 @@ static const struct irq_class irqclass_sub_desc[] = {
 	{.irq = IRQIO_VMR,  .name = "VMR", .desc = "[I/O] Unit Record Devices"},
 	{.irq = IRQIO_LCS,  .name = "LCS", .desc = "[I/O] LCS"},
 	{.irq = IRQIO_CTC,  .name = "CTC", .desc = "[I/O] CTC"},
-	{.irq = IRQIO_APB,  .name = "APB", .desc = "[I/O] AP Bus"},
 	{.irq = IRQIO_ADM,  .name = "ADM", .desc = "[I/O] EADM Subchannel"},
 	{.irq = IRQIO_CSC,  .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
-	{.irq = IRQIO_PCI,  .name = "PCI", .desc = "[I/O] PCI Interrupt" },
-	{.irq = IRQIO_MSI,  .name = "MSI", .desc = "[I/O] MSI Interrupt" },
 	{.irq = IRQIO_VIR,  .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
-	{.irq = IRQIO_VAI,  .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
-	{.irq = IRQIO_GAL,  .name = "GAL", .desc = "[I/O] GIB Alert"},
+	{.irq = IRQIO_QAI,  .name = "QAI", .desc = "[AIO] QDIO Adapter Interrupt"},
+	{.irq = IRQIO_APB,  .name = "APB", .desc = "[AIO] AP Bus"},
+	{.irq = IRQIO_PCI,  .name = "PCI", .desc = "[AIO] PCI Interrupt"},
+	{.irq = IRQIO_MSI,  .name = "MSI", .desc = "[AIO] MSI Interrupt"},
+	{.irq = IRQIO_VAI,  .name = "VAI", .desc = "[AIO] Virtual I/O Devices AI"},
+	{.irq = IRQIO_GAL,  .name = "GAL", .desc = "[AIO] GIB Alert"},
 	{.irq = NMI_NMI,    .name = "NMI", .desc = "[NMI] Machine Check"},
 	{.irq = CPU_RST,    .name = "RST", .desc = "[CPU] CPU Restart"},
 };
@@ -116,6 +116,34 @@ void do_IRQ(struct pt_regs *regs, int irq)
 	set_irq_regs(old_regs);
 }
 
+static void show_msi_interrupt(struct seq_file *p, int irq)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+	int cpu;
+
+	irq_lock_sparse();
+	desc = irq_to_desc(irq);
+	if (!desc)
+		goto out;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	seq_printf(p, "%3d: ", irq);
+	for_each_online_cpu(cpu)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+
+	if (desc->irq_data.chip)
+		seq_printf(p, " %8s", desc->irq_data.chip->name);
+
+	if (desc->action)
+		seq_printf(p, "  %s", desc->action->name);
+
+	seq_putc(p, '\n');
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+out:
+	irq_unlock_sparse();
+}
+
 /*
  * show_interrupts is needed by /proc/interrupts.
  */
@@ -128,7 +156,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	if (index == 0) {
 		seq_puts(p, "           ");
 		for_each_online_cpu(cpu)
-			seq_printf(p, "CPU%d       ", cpu);
+			seq_printf(p, "CPU%-8d", cpu);
 		seq_putc(p, '\n');
 	}
 	if (index < NR_IRQS_BASE) {
@@ -139,9 +167,10 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 		goto out;
 	}
-	if (index > NR_IRQS_BASE)
+	if (index < nr_irqs) {
+		show_msi_interrupt(p, index);
 		goto out;
-
+	}
 	for (index = 0; index < NR_ARCH_IRQS; index++) {
 		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
 		irq = irqclass_sub_desc[index].irq;
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 4bfd902f27f44..e7e3eab9a2b8d 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -112,7 +112,7 @@ static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *de
 }
 
 static struct irq_chip zpci_irq_chip = {
-	.name = "zPCI",
+	.name = "PCI-MSI",
 	.irq_unmask = pci_msi_unmask_irq,
 	.irq_mask = pci_msi_mask_irq,
 	.irq_set_affinity = zpci_set_irq_affinity,
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index de744ca158fdf..18f5458f90e8f 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -564,7 +564,7 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy)
 }
 
 static struct irqaction io_interrupt = {
-	.name	 = "IO",
+	.name	 = "I/O",
 	.handler = do_cio_interrupt,
 };
 
-- 
GitLab


From 07e3ec3acb80726f60b7ab924b1b0f1498148b56 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Thu, 22 Nov 2018 14:08:33 +0100
Subject: [PATCH 1662/1861] s390/pci: gather statistics for floating vs
 directed irqs

Gather statistics to distinguish floating and directed interrupts.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/irq.h |  3 ++-
 arch/s390/kernel/irq.c      |  3 ++-
 arch/s390/pci/pci_irq.c     | 10 ++++++----
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index d159439440542..9f75d67b8c207 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -59,7 +59,8 @@ enum interruption_class {
 	IRQIO_VIR,
 	IRQIO_QAI,
 	IRQIO_APB,
-	IRQIO_PCI,
+	IRQIO_PCF,
+	IRQIO_PCD,
 	IRQIO_MSI,
 	IRQIO_VAI,
 	IRQIO_GAL,
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f586f94d39476..150964f911833 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -85,7 +85,8 @@ static const struct irq_class irqclass_sub_desc[] = {
 	{.irq = IRQIO_VIR,  .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
 	{.irq = IRQIO_QAI,  .name = "QAI", .desc = "[AIO] QDIO Adapter Interrupt"},
 	{.irq = IRQIO_APB,  .name = "APB", .desc = "[AIO] AP Bus"},
-	{.irq = IRQIO_PCI,  .name = "PCI", .desc = "[AIO] PCI Interrupt"},
+	{.irq = IRQIO_PCF,  .name = "PCF", .desc = "[AIO] PCI Floating Interrupt"},
+	{.irq = IRQIO_PCD,  .name = "PCD", .desc = "[AIO] PCI Directed Interrupt"},
 	{.irq = IRQIO_MSI,  .name = "MSI", .desc = "[AIO] MSI Interrupt"},
 	{.irq = IRQIO_VAI,  .name = "VAI", .desc = "[AIO] Virtual I/O Devices AI"},
 	{.irq = IRQIO_GAL,  .name = "GAL", .desc = "[AIO] GIB Alert"},
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index e7e3eab9a2b8d..c73ab855a2ca3 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -188,11 +188,13 @@ static void zpci_handle_fallback_irq(void)
 
 static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
 {
-	inc_irq_stat(IRQIO_PCI);
-	if (floating)
+	if (floating) {
+		inc_irq_stat(IRQIO_PCF);
 		zpci_handle_fallback_irq();
-	else
+	} else {
+		inc_irq_stat(IRQIO_PCD);
 		zpci_handle_cpu_local_irq(true);
+	}
 }
 
 static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
@@ -201,7 +203,7 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
 	struct airq_iv *aibv;
 	int irqs_on = 0;
 
-	inc_irq_stat(IRQIO_PCI);
+	inc_irq_stat(IRQIO_PCF);
 	for (si = 0;;) {
 		/* Scan adapter summary indicator bit vector */
 		si = airq_iv_scan(zpci_sbv, si, airq_iv_end(zpci_sbv));
-- 
GitLab


From fbfe07d440f2c55070a0358f66560bb4f9fb92e7 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Tue, 26 Feb 2019 16:07:32 +0100
Subject: [PATCH 1663/1861] s390/pci: add parameter to force floating irqs

Provide a kernel parameter to force the usage of floating interrupts.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 1 +
 arch/s390/include/asm/pci.h                     | 1 +
 arch/s390/pci/pci.c                             | 5 +++++
 arch/s390/pci/pci_irq.c                         | 3 +++
 4 files changed, 10 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b8ee90bb6447..abc1b6e305c4a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3394,6 +3394,7 @@
 				bridges without forcing it upstream. Note:
 				this removes isolation between devices and
 				may put more devices in an IOMMU group.
+		force_floating	[S390] Force usage of floating interrupts.
 
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index a285754d07426..328013219aec5 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -163,6 +163,7 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
 }
 
 extern const struct attribute_group *zpci_attr_groups[];
+extern unsigned int s390_pci_force_floating __initdata;
 
 /* -----------------------------------------------------------------------------
   Prototypes
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 716e773af7880..db096fd16f464 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -741,6 +741,7 @@ static void zpci_mem_exit(void)
 }
 
 static unsigned int s390_pci_probe __initdata = 1;
+unsigned int s390_pci_force_floating __initdata;
 static unsigned int s390_pci_initialized;
 
 char * __init pcibios_setup(char *str)
@@ -749,6 +750,10 @@ char * __init pcibios_setup(char *str)
 		s390_pci_probe = 0;
 		return NULL;
 	}
+	if (!strcmp(str, "force_floating")) {
+		s390_pci_force_floating = 1;
+		return NULL;
+	}
 	return str;
 }
 
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index c73ab855a2ca3..d80616ae8dd8a 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -433,6 +433,9 @@ int __init zpci_irq_init(void)
 	int rc;
 
 	irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
+	if (s390_pci_force_floating)
+		irq_delivery = FLOATING;
+
 	if (irq_delivery == DIRECTED)
 		zpci_airq.handler = zpci_directed_irq_handler;
 
-- 
GitLab


From 81deca12c202aa240a28f561a161ac3387a985db Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Sun, 14 Apr 2019 16:25:54 +0200
Subject: [PATCH 1664/1861] s390/pci: move io address mapping code to
 pci_insn.c

This is a preparation patch for usage of new pci instructions.
No functional change.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci_insn.h |  8 ++++--
 arch/s390/include/asm/pci_io.h   | 49 +++++++++++++-------------------
 arch/s390/pci/pci.c              |  4 +--
 arch/s390/pci/pci_insn.c         | 38 ++++++++++++++++++++++---
 drivers/s390/net/ism.h           |  2 +-
 5 files changed, 61 insertions(+), 40 deletions(-)

diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index ab10310705033..71f19604ec61d 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -124,9 +124,11 @@ union zpci_sic_iib {
 
 u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
 int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
-int zpci_load(u64 *data, u64 req, u64 offset);
-int zpci_store(u64 data, u64 req, u64 offset);
-int zpci_store_block(const u64 *data, u64 req, u64 offset);
+int __zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len);
+int __zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
+int __zpci_store_block(const u64 *data, u64 req, u64 offset);
 int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
 
 static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index cbb9cb9c65476..cd060b5dd8fdd 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -37,12 +37,10 @@ extern struct zpci_iomap_entry *zpci_iomap_start;
 #define zpci_read(LENGTH, RETTYPE)						\
 static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr)	\
 {										\
-	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];	\
-	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH);		\
 	u64 data;								\
 	int rc;									\
 										\
-	rc = zpci_load(&data, req, ZPCI_OFFSET(addr));				\
+	rc = zpci_load(&data, addr, LENGTH);					\
 	if (rc)									\
 		data = -1ULL;							\
 	return (RETTYPE) data;							\
@@ -52,11 +50,9 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr)	\
 static inline void zpci_write_##VALTYPE(VALTYPE val,				\
 					const volatile void __iomem *addr)	\
 {										\
-	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];	\
-	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH);		\
 	u64 data = (VALTYPE) val;						\
 										\
-	zpci_store(data, req, ZPCI_OFFSET(addr));				\
+	zpci_store(addr, data, LENGTH);						\
 }
 
 zpci_read(8, u64)
@@ -68,36 +64,38 @@ zpci_write(4, u32)
 zpci_write(2, u16)
 zpci_write(1, u8)
 
-static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len)
+static inline int zpci_write_single(volatile void __iomem *dst, const void *src,
+				    unsigned long len)
 {
 	u64 val;
 
 	switch (len) {
 	case 1:
-		val = (u64) *((u8 *) data);
+		val = (u64) *((u8 *) src);
 		break;
 	case 2:
-		val = (u64) *((u16 *) data);
+		val = (u64) *((u16 *) src);
 		break;
 	case 4:
-		val = (u64) *((u32 *) data);
+		val = (u64) *((u32 *) src);
 		break;
 	case 8:
-		val = (u64) *((u64 *) data);
+		val = (u64) *((u64 *) src);
 		break;
 	default:
 		val = 0;		/* let FW report error */
 		break;
 	}
-	return zpci_store(val, req, offset);
+	return zpci_store(dst, val, len);
 }
 
-static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
+static inline int zpci_read_single(void *dst, const volatile void __iomem *src,
+				   unsigned long len)
 {
 	u64 data;
 	int cc;
 
-	cc = zpci_load(&data, req, offset);
+	cc = zpci_load(&data, src, len);
 	if (cc)
 		goto out;
 
@@ -119,10 +117,8 @@ out:
 	return cc;
 }
 
-static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
-{
-	return zpci_store_block(data, req, offset);
-}
+int zpci_write_block(volatile void __iomem *dst, const void *src,
+		     unsigned long len);
 
 static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
 {
@@ -140,18 +136,15 @@ static inline int zpci_memcpy_fromio(void *dst,
 				     const volatile void __iomem *src,
 				     unsigned long n)
 {
-	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(src)];
-	u64 req, offset = ZPCI_OFFSET(src);
 	int size, rc = 0;
 
 	while (n > 0) {
 		size = zpci_get_max_write_size((u64 __force) src,
 					       (u64) dst, n, 8);
-		req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
-		rc = zpci_read_single(req, dst, offset, size);
+		rc = zpci_read_single(dst, src, size);
 		if (rc)
 			break;
-		offset += size;
+		src += size;
 		dst += size;
 		n -= size;
 	}
@@ -161,8 +154,6 @@ static inline int zpci_memcpy_fromio(void *dst,
 static inline int zpci_memcpy_toio(volatile void __iomem *dst,
 				   const void *src, unsigned long n)
 {
-	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
-	u64 req, offset = ZPCI_OFFSET(dst);
 	int size, rc = 0;
 
 	if (!src)
@@ -171,16 +162,14 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
 	while (n > 0) {
 		size = zpci_get_max_write_size((u64 __force) dst,
 					       (u64) src, n, 128);
-		req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
-
 		if (size > 8) /* main path */
-			rc = zpci_write_block(req, src, offset);
+			rc = zpci_write_block(dst, src, size);
 		else
-			rc = zpci_write_single(req, src, offset, size);
+			rc = zpci_write_single(dst, src, size);
 		if (rc)
 			break;
-		offset += size;
 		src += size;
+		dst += size;
 		n -= size;
 	}
 	return rc;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index db096fd16f464..89d15a7f2e9ae 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -188,7 +188,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
 	u64 data;
 	int rc;
 
-	rc = zpci_load(&data, req, offset);
+	rc = __zpci_load(&data, req, offset);
 	if (!rc) {
 		data = le64_to_cpu((__force __le64) data);
 		data >>= (8 - len) * 8;
@@ -206,7 +206,7 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
 
 	data <<= (8 - len) * 8;
 	data = (__force u64) cpu_to_le64(data);
-	rc = zpci_store(data, req, offset);
+	rc = __zpci_store(data, req, offset);
 	return rc;
 }
 
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 4b2ca068d40ef..6dbd13bb7c1c9 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -11,6 +11,7 @@
 #include <asm/facility.h>
 #include <asm/pci_insn.h>
 #include <asm/pci_debug.h>
+#include <asm/pci_io.h>
 #include <asm/processor.h>
 
 #define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
@@ -142,7 +143,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 	return cc;
 }
 
-int zpci_load(u64 *data, u64 req, u64 offset)
+int __zpci_load(u64 *data, u64 req, u64 offset)
 {
 	u8 status;
 	int cc;
@@ -158,6 +159,15 @@ int zpci_load(u64 *data, u64 req, u64 offset)
 
 	return (cc > 0) ? -EIO : cc;
 }
+EXPORT_SYMBOL_GPL(__zpci_load);
+
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
+{
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
+	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+
+	return __zpci_load(data, req, ZPCI_OFFSET(addr));
+}
 EXPORT_SYMBOL_GPL(zpci_load);
 
 /* PCI Store */
@@ -180,7 +190,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
 	return cc;
 }
 
-int zpci_store(u64 data, u64 req, u64 offset)
+int __zpci_store(u64 data, u64 req, u64 offset)
 {
 	u8 status;
 	int cc;
@@ -196,6 +206,15 @@ int zpci_store(u64 data, u64 req, u64 offset)
 
 	return (cc > 0) ? -EIO : cc;
 }
+EXPORT_SYMBOL_GPL(__zpci_store);
+
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
+{
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
+	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+
+	return __zpci_store(data, req, ZPCI_OFFSET(addr));
+}
 EXPORT_SYMBOL_GPL(zpci_store);
 
 /* PCI Store Block */
@@ -216,7 +235,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
 	return cc;
 }
 
-int zpci_store_block(const u64 *data, u64 req, u64 offset)
+int __zpci_store_block(const u64 *data, u64 req, u64 offset)
 {
 	u8 status;
 	int cc;
@@ -232,4 +251,15 @@ int zpci_store_block(const u64 *data, u64 req, u64 offset)
 
 	return (cc > 0) ? -EIO : cc;
 }
-EXPORT_SYMBOL_GPL(zpci_store_block);
+EXPORT_SYMBOL_GPL(__zpci_store_block);
+
+int zpci_write_block(volatile void __iomem *dst,
+		     const void *src, unsigned long len)
+{
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
+	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+	u64 offset = ZPCI_OFFSET(dst);
+
+	return __zpci_store_block(src, req, offset);
+}
+EXPORT_SYMBOL_GPL(zpci_write_block);
diff --git a/drivers/s390/net/ism.h b/drivers/s390/net/ism.h
index 0aab908173267..e8bf3d38af536 100644
--- a/drivers/s390/net/ism.h
+++ b/drivers/s390/net/ism.h
@@ -215,7 +215,7 @@ static inline int __ism_move(struct ism_dev *ism, u64 dmb_req, void *data,
 	struct zpci_dev *zdev = to_zpci(ism->pdev);
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, size);
 
-	return zpci_write_block(req, data, dmb_req);
+	return __zpci_store_block(data, req, dmb_req);
 }
 
 #endif /* S390_ISM_H */
-- 
GitLab


From c475f1770a5e062652a6a877afcbd4e22b18039f Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Tue, 23 Apr 2019 11:57:46 +0200
Subject: [PATCH 1665/1861] s390/ism: move oddities of device IO to wrapper
 function

ISM devices are special in how they access PCI memory space. Provide
wrappers for handling commands to the device. No functional change.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/net/ism.h     | 27 +++++++++++++++++++++++++--
 drivers/s390/net/ism_drv.c | 20 ++++++--------------
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/s390/net/ism.h b/drivers/s390/net/ism.h
index e8bf3d38af536..66eac2b9704d5 100644
--- a/drivers/s390/net/ism.h
+++ b/drivers/s390/net/ism.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <net/smc.h>
+#include <asm/pci_insn.h>
 
 #define UTIL_STR_LEN	16
 
@@ -194,8 +195,6 @@ struct ism_dev {
 	struct pci_dev *pdev;
 	struct smcd_dev *smcd;
 
-	void __iomem *ctl;
-
 	struct ism_sba *sba;
 	dma_addr_t sba_dma_addr;
 	DECLARE_BITMAP(sba_bitmap, ISM_NR_DMBS);
@@ -209,6 +208,30 @@ struct ism_dev {
 #define ISM_CREATE_REQ(dmb, idx, sf, offset)		\
 	((dmb) | (idx) << 24 | (sf) << 23 | (offset))
 
+static inline void __ism_read_cmd(struct ism_dev *ism, void *data,
+				  unsigned long offset, unsigned long len)
+{
+	struct zpci_dev *zdev = to_zpci(ism->pdev);
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 2, 8);
+
+	while (len > 0) {
+		__zpci_load(data, req, offset);
+		offset += 8;
+		data += 8;
+		len -= 8;
+	}
+}
+
+static inline void __ism_write_cmd(struct ism_dev *ism, void *data,
+				   unsigned long offset, unsigned long len)
+{
+	struct zpci_dev *zdev = to_zpci(ism->pdev);
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 2, len);
+
+	if (len)
+		__zpci_store_block(data, req, offset);
+}
+
 static inline int __ism_move(struct ism_dev *ism, u64 dmb_req, void *data,
 			     unsigned int size)
 {
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index 3e132592c1fe3..4fc2056bd2272 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -38,19 +38,18 @@ static int ism_cmd(struct ism_dev *ism, void *cmd)
 	struct ism_req_hdr *req = cmd;
 	struct ism_resp_hdr *resp = cmd;
 
-	memcpy_toio(ism->ctl + sizeof(*req), req + 1, req->len - sizeof(*req));
-	memcpy_toio(ism->ctl, req, sizeof(*req));
+	__ism_write_cmd(ism, req + 1, sizeof(*req), req->len - sizeof(*req));
+	__ism_write_cmd(ism, req, 0, sizeof(*req));
 
 	WRITE_ONCE(resp->ret, ISM_ERROR);
 
-	memcpy_fromio(resp, ism->ctl, sizeof(*resp));
+	__ism_read_cmd(ism, resp, 0, sizeof(*resp));
 	if (resp->ret) {
 		debug_text_event(ism_debug_info, 0, "cmd failure");
 		debug_event(ism_debug_info, 0, resp, sizeof(*resp));
 		goto out;
 	}
-	memcpy_fromio(resp + 1, ism->ctl + sizeof(*resp),
-		      resp->len - sizeof(*resp));
+	__ism_read_cmd(ism, resp + 1, sizeof(*resp), resp->len - sizeof(*resp));
 out:
 	return resp->ret;
 }
@@ -512,13 +511,9 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		goto err_disable;
 
-	ism->ctl = pci_iomap(pdev, 2, 0);
-	if (!ism->ctl)
-		goto err_resource;
-
 	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
 	if (ret)
-		goto err_unmap;
+		goto err_resource;
 
 	dma_set_seg_boundary(&pdev->dev, SZ_1M - 1);
 	dma_set_max_seg_size(&pdev->dev, SZ_1M);
@@ -527,7 +522,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	ism->smcd = smcd_alloc_dev(&pdev->dev, dev_name(&pdev->dev), &ism_ops,
 				   ISM_NR_DMBS);
 	if (!ism->smcd)
-		goto err_unmap;
+		goto err_resource;
 
 	ism->smcd->priv = ism;
 	ret = ism_dev_init(ism);
@@ -538,8 +533,6 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 err_free:
 	smcd_free_dev(ism->smcd);
-err_unmap:
-	pci_iounmap(pdev, ism->ctl);
 err_resource:
 	pci_release_mem_regions(pdev);
 err_disable:
@@ -568,7 +561,6 @@ static void ism_remove(struct pci_dev *pdev)
 	ism_dev_exit(ism);
 
 	smcd_free_dev(ism->smcd);
-	pci_iounmap(pdev, ism->ctl);
 	pci_release_mem_regions(pdev);
 	pci_disable_device(pdev);
 	dev_set_drvdata(&pdev->dev, NULL);
-- 
GitLab


From 71ba41c9b1d91042960e9d92a5c8f52dc8531eda Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Sun, 14 Apr 2019 15:38:01 +0200
Subject: [PATCH 1666/1861] s390/pci: provide support for MIO instructions

Provide support for PCI I/O instructions that work on mapped IO addresses.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/io.h       |  17 ++--
 arch/s390/include/asm/pci.h      |   3 +
 arch/s390/include/asm/pci_clp.h  |  14 +++-
 arch/s390/include/asm/pci_insn.h |   5 ++
 arch/s390/pci/pci.c              | 132 ++++++++++++++++++++++++++++---
 arch/s390/pci/pci_clp.c          |  25 ++++--
 arch/s390/pci/pci_insn.c         | 129 +++++++++++++++++++++++++++++-
 7 files changed, 294 insertions(+), 31 deletions(-)

diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index f34d729347e4c..ca421614722f6 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -30,14 +30,8 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
 #define ioremap_wc			ioremap_nocache
 #define ioremap_wt			ioremap_nocache
 
-static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
-{
-	return (void __iomem *) offset;
-}
-
-static inline void iounmap(volatile void __iomem *addr)
-{
-}
+void __iomem *ioremap(unsigned long offset, unsigned long size);
+void iounmap(volatile void __iomem *addr);
 
 static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
 {
@@ -57,14 +51,17 @@ static inline void ioport_unmap(void __iomem *p)
  * the corresponding device and create the mapping cookie.
  */
 #define pci_iomap pci_iomap
+#define pci_iomap_range pci_iomap_range
 #define pci_iounmap pci_iounmap
-#define pci_iomap_wc pci_iomap
-#define pci_iomap_wc_range pci_iomap_range
+#define pci_iomap_wc pci_iomap_wc
+#define pci_iomap_wc_range pci_iomap_wc_range
 
 #define memcpy_fromio(dst, src, count)	zpci_memcpy_fromio(dst, src, count)
 #define memcpy_toio(dst, src, count)	zpci_memcpy_toio(dst, src, count)
 #define memset_io(dst, val, count)	zpci_memset_io(dst, val, count)
 
+#define mmiowb()	zpci_barrier()
+
 #define __raw_readb	zpci_read_u8
 #define __raw_readw	zpci_read_u16
 #define __raw_readl	zpci_read_u32
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 328013219aec5..305befd553264 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -86,6 +86,8 @@ enum zpci_state {
 
 struct zpci_bar_struct {
 	struct resource *res;		/* bus resource */
+	void __iomem	*mio_wb;
+	void __iomem	*mio_wt;
 	u32		val;		/* bar start & 3 flag bits */
 	u16		map_idx;	/* index into bar mapping array */
 	u8		size;		/* order 2 exponent */
@@ -135,6 +137,7 @@ struct zpci_dev {
 	struct iommu_device iommu_dev;  /* IOMMU core handle */
 
 	char res_name[16];
+	bool mio_capable;
 	struct zpci_bar_struct bars[PCI_BAR_COUNT];
 
 	u64		start_dma;	/* Start of available DMA addresses */
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index d2d824a91e66f..3ec52a05d5006 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -43,6 +43,8 @@ struct clp_fh_list_entry {
 
 #define CLP_SET_ENABLE_PCI_FN	0	/* Yes, 0 enables it */
 #define CLP_SET_DISABLE_PCI_FN	1	/* Yes, 1 disables it */
+#define CLP_SET_ENABLE_MIO	2
+#define CLP_SET_DISABLE_MIO	3
 
 #define CLP_UTIL_STR_LEN	64
 #define CLP_PFIP_NR_SEGMENTS	4
@@ -80,7 +82,8 @@ struct clp_req_query_pci {
 struct clp_rsp_query_pci {
 	struct clp_rsp_hdr hdr;
 	u16 vfn;			/* virtual fn number */
-	u16			:  7;
+	u16			:  6;
+	u16 mio_addr_avail	:  1;
 	u16 util_str_avail	:  1;	/* utility string available? */
 	u16 pfgid		:  8;	/* pci function group id */
 	u32 fid;			/* pci function id */
@@ -96,6 +99,15 @@ struct clp_rsp_query_pci {
 	u32 reserved[11];
 	u32 uid;			/* user defined id */
 	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
+	u32 reserved2[16];
+	u32 mio_valid : 6;
+	u32 : 26;
+	u32 : 32;
+	struct {
+		u64 wb;
+		u64 wt;
+	} addr[PCI_BAR_COUNT];
+	u32 reserved3[6];
 } __packed;
 
 /* Query PCI function group request */
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 71f19604ec61d..61cf9531f68f2 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_S390_PCI_INSN_H
 #define _ASM_S390_PCI_INSN_H
 
+#include <linux/jump_label.h>
+
 /* Load/Store status codes */
 #define ZPCI_PCI_ST_FUNC_NOT_ENABLED		4
 #define ZPCI_PCI_ST_FUNC_IN_ERR			8
@@ -122,6 +124,8 @@ union zpci_sic_iib {
 	struct zpci_cdiib cdiib;
 };
 
+DECLARE_STATIC_KEY_FALSE(have_mio);
+
 u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
 int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
 int __zpci_load(u64 *data, u64 req, u64 offset);
@@ -129,6 +133,7 @@ int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len);
 int __zpci_store(u64 data, u64 req, u64 offset);
 int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
 int __zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_barrier(void);
 int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
 
 static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 89d15a7f2e9ae..dff8f4526c8db 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -25,6 +25,7 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
+#include <linux/jump_label.h>
 #include <linux/pci.h>
 
 #include <asm/isc.h>
@@ -50,6 +51,8 @@ static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
+DEFINE_STATIC_KEY_FALSE(have_mio);
+
 static struct kmem_cache *zdev_fmb_cache;
 
 struct zpci_dev *get_zdev_by_fid(u32 fid)
@@ -223,18 +226,48 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
        zpci_memcpy_toio(to, from, count);
 }
 
+void __iomem *ioremap(unsigned long ioaddr, unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long offset;
+
+	if (!size)
+		return NULL;
+
+	if (!static_branch_unlikely(&have_mio))
+		return (void __iomem *) ioaddr;
+
+	offset = ioaddr & ~PAGE_MASK;
+	ioaddr &= PAGE_MASK;
+	size = PAGE_ALIGN(size + offset);
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+
+	if (ioremap_page_range((unsigned long) area->addr,
+			       (unsigned long) area->addr + size,
+			       ioaddr, PAGE_KERNEL)) {
+		vunmap(area->addr);
+		return NULL;
+	}
+	return (void __iomem *) ((unsigned long) area->addr + offset);
+}
+EXPORT_SYMBOL(ioremap);
+
+void iounmap(volatile void __iomem *addr)
+{
+	if (static_branch_likely(&have_mio))
+		vunmap((__force void *) ((unsigned long) addr & PAGE_MASK));
+}
+EXPORT_SYMBOL(iounmap);
+
 /* Create a virtual mapping cookie for a PCI BAR */
-void __iomem *pci_iomap_range(struct pci_dev *pdev,
-			      int bar,
-			      unsigned long offset,
-			      unsigned long max)
+static void __iomem *pci_iomap_range_fh(struct pci_dev *pdev, int bar,
+					unsigned long offset, unsigned long max)
 {
 	struct zpci_dev *zdev =	to_zpci(pdev);
 	int idx;
 
-	if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
-		return NULL;
-
 	idx = zdev->bars[bar].map_idx;
 	spin_lock(&zpci_iomap_lock);
 	/* Detect overrun */
@@ -245,6 +278,30 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 
 	return (void __iomem *) ZPCI_ADDR(idx) + offset;
 }
+
+static void __iomem *pci_iomap_range_mio(struct pci_dev *pdev, int bar,
+					 unsigned long offset,
+					 unsigned long max)
+{
+	unsigned long barsize = pci_resource_len(pdev, bar);
+	struct zpci_dev *zdev = to_zpci(pdev);
+	void __iomem *iova;
+
+	iova = ioremap((unsigned long) zdev->bars[bar].mio_wt, barsize);
+	return iova ? iova + offset : iova;
+}
+
+void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
+			      unsigned long offset, unsigned long max)
+{
+	if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+		return NULL;
+
+	if (static_branch_likely(&have_mio))
+		return pci_iomap_range_mio(pdev, bar, offset, max);
+	else
+		return pci_iomap_range_fh(pdev, bar, offset, max);
+}
 EXPORT_SYMBOL(pci_iomap_range);
 
 void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
@@ -253,7 +310,37 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 }
 EXPORT_SYMBOL(pci_iomap);
 
-void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+static void __iomem *pci_iomap_wc_range_mio(struct pci_dev *pdev, int bar,
+					    unsigned long offset, unsigned long max)
+{
+	unsigned long barsize = pci_resource_len(pdev, bar);
+	struct zpci_dev *zdev = to_zpci(pdev);
+	void __iomem *iova;
+
+	iova = ioremap((unsigned long) zdev->bars[bar].mio_wb, barsize);
+	return iova ? iova + offset : iova;
+}
+
+void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
+				 unsigned long offset, unsigned long max)
+{
+	if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+		return NULL;
+
+	if (static_branch_likely(&have_mio))
+		return pci_iomap_wc_range_mio(pdev, bar, offset, max);
+	else
+		return pci_iomap_range_fh(pdev, bar, offset, max);
+}
+EXPORT_SYMBOL(pci_iomap_wc_range);
+
+void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	return pci_iomap_wc_range(dev, bar, 0, maxlen);
+}
+EXPORT_SYMBOL(pci_iomap_wc);
+
+static void pci_iounmap_fh(struct pci_dev *pdev, void __iomem *addr)
 {
 	unsigned int idx = ZPCI_IDX(addr);
 
@@ -266,6 +353,19 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 	}
 	spin_unlock(&zpci_iomap_lock);
 }
+
+static void pci_iounmap_mio(struct pci_dev *pdev, void __iomem *addr)
+{
+	iounmap(addr);
+}
+
+void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+	if (static_branch_likely(&have_mio))
+		pci_iounmap_mio(pdev, addr);
+	else
+		pci_iounmap_fh(pdev, addr);
+}
 EXPORT_SYMBOL(pci_iounmap);
 
 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
@@ -312,6 +412,7 @@ static struct resource iov_res = {
 
 static void zpci_map_resources(struct pci_dev *pdev)
 {
+	struct zpci_dev *zdev = to_zpci(pdev);
 	resource_size_t len;
 	int i;
 
@@ -319,8 +420,13 @@ static void zpci_map_resources(struct pci_dev *pdev)
 		len = pci_resource_len(pdev, i);
 		if (!len)
 			continue;
-		pdev->resource[i].start =
-			(resource_size_t __force) pci_iomap(pdev, i, 0);
+
+		if (static_branch_likely(&have_mio))
+			pdev->resource[i].start =
+				(resource_size_t __force) zdev->bars[i].mio_wb;
+		else
+			pdev->resource[i].start =
+				(resource_size_t __force) pci_iomap(pdev, i, 0);
 		pdev->resource[i].end = pdev->resource[i].start + len - 1;
 	}
 
@@ -341,6 +447,9 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
 	resource_size_t len;
 	int i;
 
+	if (static_branch_likely(&have_mio))
+		return;
+
 	for (i = 0; i < PCI_BAR_COUNT; i++) {
 		len = pci_resource_len(pdev, i);
 		if (!len)
@@ -772,6 +881,9 @@ static int __init pci_base_init(void)
 	if (!test_facility(69) || !test_facility(71))
 		return 0;
 
+	if (test_facility(153))
+		static_branch_enable(&have_mio);
+
 	rc = zpci_debug_init();
 	if (rc)
 		goto out;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index eeb7450db18c0..3a36b07a55713 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -163,7 +163,14 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
 		memcpy(zdev->util_str, response->util_str,
 		       sizeof(zdev->util_str));
 	}
+	zdev->mio_capable = response->mio_addr_avail;
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		if (!(response->mio_valid & (1 << (PCI_BAR_COUNT - i - 1))))
+			continue;
 
+		zdev->bars[i].mio_wb = (void __iomem *) response->addr[i].wb;
+		zdev->bars[i].mio_wt = (void __iomem *) response->addr[i].wt;
+	}
 	return 0;
 }
 
@@ -279,11 +286,18 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
 	int rc;
 
 	rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
-	if (!rc)
-		/* Success -> store enabled handle in zdev */
-		zdev->fh = fh;
+	zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+	if (rc)
+		goto out;
 
-	zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+	zdev->fh = fh;
+	if (zdev->mio_capable) {
+		rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
+		zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+		if (rc)
+			clp_disable_fh(zdev);
+	}
+out:
 	return rc;
 }
 
@@ -296,11 +310,10 @@ int clp_disable_fh(struct zpci_dev *zdev)
 		return 0;
 
 	rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
+	zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
 	if (!rc)
-		/* Success -> store disabled handle in zdev */
 		zdev->fh = fh;
 
-	zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
 	return rc;
 }
 
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 6dbd13bb7c1c9..02f9505c99a83 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -8,6 +8,7 @@
 #include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
+#include <linux/jump_label.h>
 #include <asm/facility.h>
 #include <asm/pci_insn.h>
 #include <asm/pci_debug.h>
@@ -161,13 +162,50 @@ int __zpci_load(u64 *data, u64 req, u64 offset)
 }
 EXPORT_SYMBOL_GPL(__zpci_load);
 
-int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
+static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
+			       unsigned long len)
 {
 	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
 	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
 
 	return __zpci_load(data, req, ZPCI_OFFSET(addr));
 }
+
+static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
+{
+	register u64 addr asm("2") = ioaddr;
+	register u64 r3 asm("3") = len;
+	int cc = -ENXIO;
+	u64 __data;
+
+	asm volatile (
+		"       .insn   rre,0xb9d60000,%[data],%[ioaddr]\n"
+		"0:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [data] "=d" (__data), "+d" (r3)
+		: [ioaddr] "d" (addr)
+		: "cc");
+	*status = r3 >> 24 & 0xff;
+	*data = __data;
+	return cc;
+}
+
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
+{
+	u8 status;
+	int cc;
+
+	if (!static_branch_unlikely(&have_mio))
+		return zpci_load_fh(data, addr, len);
+
+	cc = __pcilg_mio(data, (__force u64) addr, len, &status);
+	if (cc)
+		zpci_err_insn(cc, status, 0, (__force u64) addr);
+
+	return (cc > 0) ? -EIO : cc;
+}
 EXPORT_SYMBOL_GPL(zpci_load);
 
 /* PCI Store */
@@ -208,13 +246,48 @@ int __zpci_store(u64 data, u64 req, u64 offset)
 }
 EXPORT_SYMBOL_GPL(__zpci_store);
 
-int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
+static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
+				unsigned long len)
 {
 	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
 	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
 
 	return __zpci_store(data, req, ZPCI_OFFSET(addr));
 }
+
+static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
+{
+	register u64 addr asm("2") = ioaddr;
+	register u64 r3 asm("3") = len;
+	int cc = -ENXIO;
+
+	asm volatile (
+		"       .insn   rre,0xb9d40000,%[data],%[ioaddr]\n"
+		"0:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), "+d" (r3)
+		: [data] "d" (data), [ioaddr] "d" (addr)
+		: "cc");
+	*status = r3 >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
+{
+	u8 status;
+	int cc;
+
+	if (!static_branch_unlikely(&have_mio))
+		return zpci_store_fh(addr, data, len);
+
+	cc = __pcistg_mio(data, (__force u64) addr, len, &status);
+	if (cc)
+		zpci_err_insn(cc, status, 0, (__force u64) addr);
+
+	return (cc > 0) ? -EIO : cc;
+}
 EXPORT_SYMBOL_GPL(zpci_store);
 
 /* PCI Store Block */
@@ -253,8 +326,8 @@ int __zpci_store_block(const u64 *data, u64 req, u64 offset)
 }
 EXPORT_SYMBOL_GPL(__zpci_store_block);
 
-int zpci_write_block(volatile void __iomem *dst,
-		     const void *src, unsigned long len)
+static inline int zpci_write_block_fh(volatile void __iomem *dst,
+				      const void *src, unsigned long len)
 {
 	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
 	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
@@ -262,4 +335,52 @@ int zpci_write_block(volatile void __iomem *dst,
 
 	return __zpci_store_block(src, req, offset);
 }
+
+static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
+{
+	int cc = -ENXIO;
+
+	asm volatile (
+		"       .insn   rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
+		"0:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [len] "+d" (len)
+		: [ioaddr] "d" (ioaddr), [data] "Q" (*data)
+		: "cc");
+	*status = len >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_write_block(volatile void __iomem *dst,
+		     const void *src, unsigned long len)
+{
+	u8 status;
+	int cc;
+
+	if (!static_branch_unlikely(&have_mio))
+		return zpci_write_block_fh(dst, src, len);
+
+	cc = __pcistb_mio(src, (__force u64) dst, len, &status);
+	if (cc)
+		zpci_err_insn(cc, status, 0, (__force u64) dst);
+
+	return (cc > 0) ? -EIO : cc;
+}
 EXPORT_SYMBOL_GPL(zpci_write_block);
+
+static inline void __pciwb_mio(void)
+{
+	unsigned long unused = 0;
+
+	asm volatile (".insn    rre,0xb9d50000,%[op],%[op]\n"
+		      : [op] "+d" (unused));
+}
+
+void zpci_barrier(void)
+{
+	if (static_branch_likely(&have_mio))
+		__pciwb_mio();
+}
+EXPORT_SYMBOL_GPL(zpci_barrier);
-- 
GitLab


From 56271303808fb86375ec33183d56c21fdeb836ea Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Thu, 18 Apr 2019 21:39:06 +0200
Subject: [PATCH 1667/1861] s390/pci: add parameter to disable usage of MIO
 instructions

Allow users to disable usage of MIO instructions by specifying pci=nomio
at the kernel command line.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 1 +
 arch/s390/pci/pci.c                             | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index abc1b6e305c4a..055284c31e370 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3395,6 +3395,7 @@
 				this removes isolation between devices and
 				may put more devices in an IOMMU group.
 		force_floating	[S390] Force usage of floating interrupts.
+		nomio		[S390] Do not use MIO instructions.
 
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index dff8f4526c8db..0ebb7c405a250 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -850,6 +850,7 @@ static void zpci_mem_exit(void)
 }
 
 static unsigned int s390_pci_probe __initdata = 1;
+static unsigned int s390_pci_no_mio __initdata;
 unsigned int s390_pci_force_floating __initdata;
 static unsigned int s390_pci_initialized;
 
@@ -859,6 +860,10 @@ char * __init pcibios_setup(char *str)
 		s390_pci_probe = 0;
 		return NULL;
 	}
+	if (!strcmp(str, "nomio")) {
+		s390_pci_no_mio = 1;
+		return NULL;
+	}
 	if (!strcmp(str, "force_floating")) {
 		s390_pci_force_floating = 1;
 		return NULL;
@@ -881,7 +886,7 @@ static int __init pci_base_init(void)
 	if (!test_facility(69) || !test_facility(71))
 		return 0;
 
-	if (test_facility(153))
+	if (test_facility(153) && !s390_pci_no_mio)
 		static_branch_enable(&have_mio);
 
 	rc = zpci_debug_init();
-- 
GitLab


From 833b441ec0f6f3c57cc2106ef628bb19d8fb0ee2 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Tue, 23 Apr 2019 13:14:28 +0200
Subject: [PATCH 1668/1861] s390: enable processes for mio instructions

Allow for userspace to use PCI MIO instructions.

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci_insn.h | 10 ++++++++++
 arch/s390/kernel/early.c         |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 61cf9531f68f2..ff81ed19c5065 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -143,4 +143,14 @@ static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
 	return __zpci_set_irq_ctrl(ctl, isc, &iib);
 }
 
+#ifdef CONFIG_PCI
+static inline void enable_mio_ctl(void)
+{
+	if (static_branch_likely(&have_mio))
+		__ctl_set_bit(2, 5);
+}
+#else /* CONFIG_PCI */
+static inline void enable_mio_ctl(void) {}
+#endif /* CONFIG_PCI */
+
 #endif
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c196abda36c77..ab09ada0e930d 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,6 +30,7 @@
 #include <asm/sclp.h>
 #include <asm/facility.h>
 #include <asm/boot_data.h>
+#include <asm/pci_insn.h>
 #include "entry.h"
 
 /*
@@ -235,6 +236,7 @@ static __init void detect_machine_facilities(void)
 		clock_comparator_max = -1ULL >> 1;
 		__ctl_set_bit(0, 53);
 	}
+	enable_mio_ctl();
 }
 
 static inline void save_vector_registers(void)
-- 
GitLab


From 805bc0bc238f7209fca5e39c152b0d3c12046ac9 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Sun, 3 Feb 2019 21:35:45 +0100
Subject: [PATCH 1669/1861] s390/kernel: build a relocatable kernel

This patch adds support for building a relocatable kernel with -fPIE.
The kernel will be relocated to 0 early in the boot process.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig                        | 13 ++++++
 arch/s390/Makefile                       |  4 ++
 arch/s390/boot/Makefile                  |  1 +
 arch/s390/boot/compressed/decompressor.h |  3 ++
 arch/s390/boot/machine_kexec_reloc.c     |  2 +
 arch/s390/boot/startup.c                 | 27 ++++++++++++
 arch/s390/include/asm/kexec.h            |  2 +
 arch/s390/kernel/Makefile                |  2 +-
 arch/s390/kernel/machine_kexec_file.c    | 44 ++------------------
 arch/s390/kernel/machine_kexec_reloc.c   | 53 ++++++++++++++++++++++++
 arch/s390/kernel/vmlinux.lds.S           | 15 +++++++
 11 files changed, 124 insertions(+), 42 deletions(-)
 create mode 100644 arch/s390/boot/machine_kexec_reloc.c
 create mode 100644 arch/s390/kernel/machine_kexec_reloc.c

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 21e851b0a989b..4c99e4f5f3661 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -624,6 +624,19 @@ config EXPOLINE_FULL
 
 endchoice
 
+config RELOCATABLE
+	bool "Build a relocatable kernel"
+	select MODULE_REL_CRCS if MODVERSIONS
+	default y
+	help
+	  This builds a kernel image that retains relocation information
+	  so it can be loaded at an arbitrary address.
+	  The kernel is linked as a position-independent executable (PIE)
+	  and contains dynamic relocations which are processed early in the
+	  bootup process.
+	  The relocations make the kernel image about 15% larger (compressed
+	  10%), but are discarded at runtime.
+
 endmenu
 
 menu "Memory setup"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 9c079a506325b..54b8a12d64e86 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -16,6 +16,10 @@ KBUILD_AFLAGS_MODULE += -fPIC
 KBUILD_CFLAGS_MODULE += -fPIC
 KBUILD_AFLAGS	+= -m64
 KBUILD_CFLAGS	+= -m64
+ifeq ($(CONFIG_RELOCATABLE),y)
+KBUILD_CFLAGS	+= -fPIE
+LDFLAGS_vmlinux	:= -pie
+endif
 aflags_dwarf	:= -Wa,-gdwarf-2
 KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__
 KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index c1993c57300fb..4df43e83363a5 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -32,6 +32,7 @@ obj-y	:= head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
 obj-y	+= string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
 obj-y	+= ctype.o
 obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST)	+= uv.o
+obj-$(CONFIG_RELOCATABLE)	+= machine_kexec_reloc.o
 targets	:= bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
 subdir-	:= compressed
 
diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h
index 424cf524aac16..c15eb7114d836 100644
--- a/arch/s390/boot/compressed/decompressor.h
+++ b/arch/s390/boot/compressed/decompressor.h
@@ -19,6 +19,9 @@ struct vmlinux_info {
 	unsigned long bootdata_size;
 	unsigned long bootdata_preserved_off;
 	unsigned long bootdata_preserved_size;
+	unsigned long dynsym_start;
+	unsigned long rela_dyn_start;
+	unsigned long rela_dyn_end;
 };
 
 extern char _vmlinux_info[];
diff --git a/arch/s390/boot/machine_kexec_reloc.c b/arch/s390/boot/machine_kexec_reloc.c
new file mode 100644
index 0000000000000..b7a5d0f72097e
--- /dev/null
+++ b/arch/s390/boot/machine_kexec_reloc.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/machine_kexec_reloc.c"
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 90898976a9410..b7d6a76cb5e97 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/string.h>
+#include <linux/elf.h>
 #include <asm/setup.h>
+#include <asm/kexec.h>
 #include <asm/sclp.h>
 #include <asm/uv.h>
 #include "compressed/decompressor.h"
@@ -47,6 +49,29 @@ static void copy_bootdata(void)
 	memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
 }
 
+static void handle_relocs(unsigned long offset)
+{
+	Elf64_Rela *rela_start, *rela_end, *rela;
+	int r_type, r_sym, rc;
+	Elf64_Addr loc, val;
+	Elf64_Sym *dynsym;
+
+	rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start;
+	rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
+	dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
+	for (rela = rela_start; rela < rela_end; rela++) {
+		loc = rela->r_offset + offset;
+		val = rela->r_addend + offset;
+		r_sym = ELF64_R_SYM(rela->r_info);
+		if (r_sym)
+			val += dynsym[r_sym].st_value;
+		r_type = ELF64_R_TYPE(rela->r_info);
+		rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
+		if (rc)
+			error("Unknown relocation type");
+	}
+}
+
 void startup_kernel(void)
 {
 	unsigned long safe_addr;
@@ -67,5 +92,7 @@ void startup_kernel(void)
 		memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
 	}
 	copy_bootdata();
+	if (IS_ENABLED(CONFIG_RELOCATABLE))
+		handle_relocs(0);
 	vmlinux.entry();
 }
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 305d3465574f1..ea398a05f6432 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -71,6 +71,8 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len);
 void *kexec_file_add_components(struct kimage *image,
 				int (*add_kernel)(struct kimage *image,
 						  struct s390_load_data *data));
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+			 unsigned long addr);
 
 extern const struct kexec_file_ops s390_kexec_image_ops;
 extern const struct kexec_file_ops s390_kexec_elf_ops;
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index d28acd7ba81e9..19425605a83dc 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -51,7 +51,7 @@ obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o
 obj-y	+= sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
 obj-y	+= entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
-obj-y	+= nospec-branch.o ipl_vmparm.o
+obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o
 
 extra-y				+= head64.o vmlinux.lds
 
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 48cab9600ed91..42c23a5c82291 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -290,7 +290,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
 				     const Elf_Shdr *symtab)
 {
 	Elf_Rela *relas;
-	int i;
+	int i, r_type;
 
 	relas = (void *)pi->ehdr + relsec->sh_offset;
 
@@ -324,46 +324,8 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
 
 		addr = section->sh_addr + relas[i].r_offset;
 
-		switch (ELF64_R_TYPE(relas[i].r_info)) {
-		case R_390_8:		/* Direct 8 bit.   */
-			*(u8 *)loc = val;
-			break;
-		case R_390_12:		/* Direct 12 bit.  */
-			*(u16 *)loc &= 0xf000;
-			*(u16 *)loc |= val & 0xfff;
-			break;
-		case R_390_16:		/* Direct 16 bit.  */
-			*(u16 *)loc = val;
-			break;
-		case R_390_20:		/* Direct 20 bit.  */
-			*(u32 *)loc &= 0xf00000ff;
-			*(u32 *)loc |= (val & 0xfff) << 16;	/* DL */
-			*(u32 *)loc |= (val & 0xff000) >> 4;	/* DH */
-			break;
-		case R_390_32:		/* Direct 32 bit.  */
-			*(u32 *)loc = val;
-			break;
-		case R_390_64:		/* Direct 64 bit.  */
-			*(u64 *)loc = val;
-			break;
-		case R_390_PC16:	/* PC relative 16 bit.	*/
-			*(u16 *)loc = (val - addr);
-			break;
-		case R_390_PC16DBL:	/* PC relative 16 bit shifted by 1.  */
-			*(u16 *)loc = (val - addr) >> 1;
-			break;
-		case R_390_PC32DBL:	/* PC relative 32 bit shifted by 1.  */
-			*(u32 *)loc = (val - addr) >> 1;
-			break;
-		case R_390_PC32:	/* PC relative 32 bit.	*/
-			*(u32 *)loc = (val - addr);
-			break;
-		case R_390_PC64:	/* PC relative 64 bit.	*/
-			*(u64 *)loc = (val - addr);
-			break;
-		default:
-			break;
-		}
+		r_type = ELF64_R_TYPE(relas[i].r_info);
+		arch_kexec_do_relocs(r_type, loc, val, addr);
 	}
 	return 0;
 }
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
new file mode 100644
index 0000000000000..1dded39239f86
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/elf.h>
+
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+			 unsigned long addr)
+{
+	switch (r_type) {
+	case R_390_NONE:
+		break;
+	case R_390_8:		/* Direct 8 bit.   */
+		*(u8 *)loc = val;
+		break;
+	case R_390_12:		/* Direct 12 bit.  */
+		*(u16 *)loc &= 0xf000;
+		*(u16 *)loc |= val & 0xfff;
+		break;
+	case R_390_16:		/* Direct 16 bit.  */
+		*(u16 *)loc = val;
+		break;
+	case R_390_20:		/* Direct 20 bit.  */
+		*(u32 *)loc &= 0xf00000ff;
+		*(u32 *)loc |= (val & 0xfff) << 16;	/* DL */
+		*(u32 *)loc |= (val & 0xff000) >> 4;	/* DH */
+		break;
+	case R_390_32:		/* Direct 32 bit.  */
+		*(u32 *)loc = val;
+		break;
+	case R_390_64:		/* Direct 64 bit.  */
+		*(u64 *)loc = val;
+		break;
+	case R_390_PC16:	/* PC relative 16 bit.	*/
+		*(u16 *)loc = (val - addr);
+		break;
+	case R_390_PC16DBL:	/* PC relative 16 bit shifted by 1.  */
+		*(u16 *)loc = (val - addr) >> 1;
+		break;
+	case R_390_PC32DBL:	/* PC relative 32 bit shifted by 1.  */
+		*(u32 *)loc = (val - addr) >> 1;
+		break;
+	case R_390_PC32:	/* PC relative 32 bit.	*/
+		*(u32 *)loc = (val - addr);
+		break;
+	case R_390_PC64:	/* PC relative 64 bit.	*/
+		*(u64 *)loc = (val - addr);
+		break;
+	case R_390_RELATIVE:
+		*(unsigned long *) loc = val;
+		break;
+	default:
+		return 1;
+	}
+	return 0;
+}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 6ef9c62bb01b6..49d55327de0bc 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -144,6 +144,18 @@ SECTIONS
 	INIT_DATA_SECTION(0x100)
 
 	PERCPU_SECTION(0x100)
+
+	.dynsym ALIGN(8) : {
+		__dynsym_start = .;
+		*(.dynsym)
+		__dynsym_end = .;
+	}
+	.rela.dyn ALIGN(8) : {
+		__rela_dyn_start = .;
+		*(.rela*)
+		__rela_dyn_end = .;
+	}
+
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 
@@ -165,6 +177,9 @@ SECTIONS
 		QUAD(__boot_data_preserved_start)		/* bootdata_preserved_off */
 		QUAD(__boot_data_preserved_end -
 		     __boot_data_preserved_start)		/* bootdata_preserved_size */
+		QUAD(__dynsym_start)				/* dynsym_start */
+		QUAD(__rela_dyn_start)				/* rela_dyn_start */
+		QUAD(__rela_dyn_end)				/* rela_dyn_end */
 	} :NONE
 
 	/* Debugging sections.	*/
-- 
GitLab


From ff4a742dde3c4b80a91cdd754fed3bc576df28c9 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Sun, 3 Feb 2019 21:36:13 +0100
Subject: [PATCH 1670/1861] s390/kernel: convert SYSCALL and PGM_CHECK handlers
 to .quad

With a relocatable kernel that could reside at any place in memory, the
storage size for the SYSCALL and PGM_CHECK handlers needs to be
increased from .long to .quad.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/syscall.h |  9 ++-------
 arch/s390/kernel/entry.S        | 18 +++++++++---------
 arch/s390/kernel/pgm_check.S    |  2 +-
 3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 96f9a9151fde0..8f77bb4293f92 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -14,13 +14,8 @@
 #include <linux/err.h>
 #include <asm/ptrace.h>
 
-/*
- * The syscall table always contains 32 bit pointers since we know that the
- * address of the function to be called is (way) below 4GB.  So the "int"
- * type here is what we want [need] for both 32 bit and 64 bit systems.
- */
-extern const unsigned int sys_call_table[];
-extern const unsigned int sys_call_table_emu[];
+extern const unsigned long sys_call_table[];
+extern const unsigned long sys_call_table_emu[];
 
 static inline long syscall_get_nr(struct task_struct *task,
 				  struct pt_regs *regs)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 583d65ef5007a..baa67e434b468 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -358,19 +358,19 @@ ENTRY(system_call)
 	# load address of system call table
 	lg	%r10,__THREAD_sysc_table(%r13,%r12)
 	llgh	%r8,__PT_INT_CODE+2(%r11)
-	slag	%r8,%r8,2			# shift and test for svc 0
+	slag	%r8,%r8,3			# shift and test for svc 0
 	jnz	.Lsysc_nr_ok
 	# svc 0: system call number in %r1
 	llgfr	%r1,%r1				# clear high word in r1
 	cghi	%r1,NR_syscalls
 	jnl	.Lsysc_nr_ok
 	sth	%r1,__PT_INT_CODE+2(%r11)
-	slag	%r8,%r1,2
+	slag	%r8,%r1,3
 .Lsysc_nr_ok:
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stg	%r2,__PT_ORIG_GPR2(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
-	lgf	%r9,0(%r8,%r10)			# get system call add.
+	lg	%r9,0(%r8,%r10)			# get system call add.
 	TSTMSK	__TI_flags(%r12),_TIF_TRACE
 	jnz	.Lsysc_tracesys
 	BASR_EX	%r14,%r9			# call sys_xxxx
@@ -556,8 +556,8 @@ ENTRY(system_call)
 	lghi	%r0,NR_syscalls
 	clgr	%r0,%r2
 	jnh	.Lsysc_tracenogo
-	sllg	%r8,%r2,2
-	lgf	%r9,0(%r8,%r10)
+	sllg	%r8,%r2,3
+	lg	%r9,0(%r8,%r10)
 .Lsysc_tracego:
 	lmg	%r3,%r7,__PT_R3(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
@@ -665,9 +665,9 @@ ENTRY(pgm_check_handler)
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
 	nill	%r10,0x007f
-	sll	%r10,2
+	sll	%r10,3
 	je	.Lpgm_return
-	lgf	%r9,0(%r10,%r1)		# load address of handler routine
+	lg	%r9,0(%r10,%r1)		# load address of handler routine
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	BASR_EX	%r14,%r9		# branch to interrupt-handler
 .Lpgm_return:
@@ -1512,7 +1512,7 @@ cleanup_critical:
 	.quad   .Lsie_skip - .Lsie_entry
 #endif
 	.section .rodata, "a"
-#define SYSCALL(esame,emu)	.long __s390x_ ## esame
+#define SYSCALL(esame,emu)	.quad __s390x_ ## esame
 	.globl	sys_call_table
 sys_call_table:
 #include "asm/syscall_table.h"
@@ -1520,7 +1520,7 @@ sys_call_table:
 
 #ifdef CONFIG_COMPAT
 
-#define SYSCALL(esame,emu)	.long __s390_ ## emu
+#define SYSCALL(esame,emu)	.quad __s390_ ## emu
 	.globl	sys_call_table_emu
 sys_call_table_emu:
 #include "asm/syscall_table.h"
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 3e62aae34ea3f..59dee9d3bebf1 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -7,7 +7,7 @@
 
 #include <linux/linkage.h>
 
-#define PGM_CHECK(handler)	.long handler
+#define PGM_CHECK(handler)	.quad handler
 #define PGM_CHECK_DEFAULT	PGM_CHECK(default_trap_handler)
 
 /*
-- 
GitLab


From fd3d2742d558d33560ec5dfee3001e561b5c0822 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Sun, 3 Feb 2019 21:36:46 +0100
Subject: [PATCH 1671/1861] s390/kprobes: use static buffer for insn_page

With a relocatable kernel that could reside at any place in memory, the
current logic for allocating a kprobes insn_page does not work. The
GFP_DMA allocated buffer might be more than 2 GB away from the kernel.

Use a static buffer for the insn_page instead.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/kprobes.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 7c0a095e9c5f6..263426dcf97dd 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -27,29 +27,30 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = { };
 
-DEFINE_INSN_CACHE_OPS(dmainsn);
+DEFINE_INSN_CACHE_OPS(s390_insn);
 
-static void *alloc_dmainsn_page(void)
-{
-	void *page;
+static int insn_page_in_use;
+static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE);
 
-	page = (void *) __get_free_page(GFP_KERNEL | GFP_DMA);
-	if (page)
-		set_memory_x((unsigned long) page, 1);
-	return page;
+static void *alloc_s390_insn_page(void)
+{
+	if (xchg(&insn_page_in_use, 1) == 1)
+		return NULL;
+	set_memory_x((unsigned long) &insn_page, 1);
+	return &insn_page;
 }
 
-static void free_dmainsn_page(void *page)
+static void free_s390_insn_page(void *page)
 {
 	set_memory_nx((unsigned long) page, 1);
-	free_page((unsigned long)page);
+	xchg(&insn_page_in_use, 0);
 }
 
-struct kprobe_insn_cache kprobe_dmainsn_slots = {
-	.mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
-	.alloc = alloc_dmainsn_page,
-	.free = free_dmainsn_page,
-	.pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+struct kprobe_insn_cache kprobe_s390_insn_slots = {
+	.mutex = __MUTEX_INITIALIZER(kprobe_s390_insn_slots.mutex),
+	.alloc = alloc_s390_insn_page,
+	.free = free_s390_insn_page,
+	.pages = LIST_HEAD_INIT(kprobe_s390_insn_slots.pages),
 	.insn_size = MAX_INSN_SIZE,
 };
 
@@ -102,7 +103,7 @@ static int s390_get_insn_slot(struct kprobe *p)
 	 */
 	p->ainsn.insn = NULL;
 	if (is_kernel_addr(p->addr))
-		p->ainsn.insn = get_dmainsn_slot();
+		p->ainsn.insn = get_s390_insn_slot();
 	else if (is_module_addr(p->addr))
 		p->ainsn.insn = get_insn_slot();
 	return p->ainsn.insn ? 0 : -ENOMEM;
@@ -114,7 +115,7 @@ static void s390_free_insn_slot(struct kprobe *p)
 	if (!p->ainsn.insn)
 		return;
 	if (is_kernel_addr(p->addr))
-		free_dmainsn_slot(p->ainsn.insn, 0);
+		free_s390_insn_slot(p->ainsn.insn, 0);
 	else
 		free_insn_slot(p->ainsn.insn, 0);
 	p->ainsn.insn = NULL;
-- 
GitLab


From 087c4d7423989b110c3312592db05acc009a5d58 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 8 Apr 2019 12:49:58 +0200
Subject: [PATCH 1672/1861] s390/sclp: do not use static sccbs

The sccbs for init/read/sdias/early have to be located below 2 GB, and
they are currently defined as a static buffer.

With a relocatable kernel that could reside at any place in memory, this
will no longer guarantee the location below 2 GB, so use a dynamic
GFP_DMA allocation instead.

The sclp_early_sccb buffer needs special handling, as it can be used
very early, and by both the decompressor and also the decompressed
kernel. Therefore, a fixed 4 KB buffer is introduced at 0x11000, the
former PARMAREA_END. The new PARMAREA_END is now 0x12000, and it is
renamed to HEAD_END, as it is rather the end of head.S and not the end
of the parmarea.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/head.S                 |  5 +-
 arch/s390/include/asm/setup.h         |  5 +-
 arch/s390/kernel/machine_kexec_file.c |  4 +-
 arch/s390/kernel/setup.c              |  2 +-
 drivers/s390/char/sclp.c              | 14 +++--
 drivers/s390/char/sclp.h              |  2 +-
 drivers/s390/char/sclp_early.c        |  2 +-
 drivers/s390/char/sclp_early_core.c   | 20 ++++----
 drivers/s390/char/sclp_sdias.c        | 74 +++++++++++++++------------
 9 files changed, 71 insertions(+), 57 deletions(-)

diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index d585c4dbdac7c..c6b4b4c5dd585 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -336,4 +336,7 @@ ENTRY(startup_kdump)
 	.byte	"root=/dev/ram0 ro"
 	.byte	0
 
-	.org	0x11000
+	.org	EARLY_SCCB_OFFSET
+	.fill	4096
+
+	.org	HEAD_END
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index b603cc09c895c..d202756d62912 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -12,7 +12,10 @@
 #define EP_OFFSET		0x10008
 #define EP_STRING		"S390EP"
 #define PARMAREA		0x10400
-#define PARMAREA_END		0x11000
+#define EARLY_SCCB_OFFSET	0x11000
+#define HEAD_END		0x12000
+
+#define EARLY_SCCB_SIZE		PAGE_SIZE
 
 /*
  * Machine features detected in early.c
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 42c23a5c82291..fbdd3ea736674 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -337,10 +337,8 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
 	 * load memory in head.S will be accessed, e.g. to register the next
 	 * command line. If the next kernel were smaller the current kernel
 	 * will panic at load.
-	 *
-	 * 0x11000 = sizeof(head.S)
 	 */
-	if (buf_len < 0x11000)
+	if (buf_len < HEAD_END)
 		return -ENOEXEC;
 
 	return kexec_image_probe_default(image, buf, buf_len);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ffc87520aca96..94efb1eb34b60 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -829,7 +829,7 @@ static void __init reserve_kernel(void)
 {
 	unsigned long start_pfn = PFN_UP(__pa(_end));
 
-	memblock_reserve(0, PARMAREA_END);
+	memblock_reserve(0, HEAD_END);
 	memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
 			 - (unsigned long)_stext);
 }
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index e9aa71cdfc44e..d2ab3f07c008c 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -45,8 +45,8 @@ static struct list_head sclp_req_queue;
 /* Data for read and and init requests. */
 static struct sclp_req sclp_read_req;
 static struct sclp_req sclp_init_req;
-static char sclp_read_sccb[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
-static char sclp_init_sccb[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static void *sclp_read_sccb;
+static struct init_sccb *sclp_init_sccb;
 
 /* Suspend request */
 static DECLARE_COMPLETION(sclp_request_queue_flushed);
@@ -753,9 +753,8 @@ EXPORT_SYMBOL(sclp_remove_processed);
 static inline void
 __sclp_make_init_req(sccb_mask_t receive_mask, sccb_mask_t send_mask)
 {
-	struct init_sccb *sccb;
+	struct init_sccb *sccb = sclp_init_sccb;
 
-	sccb = (struct init_sccb *) sclp_init_sccb;
 	clear_page(sccb);
 	memset(&sclp_init_req, 0, sizeof(struct sclp_req));
 	sclp_init_req.command = SCLP_CMDW_WRITE_EVENT_MASK;
@@ -782,7 +781,7 @@ static int
 sclp_init_mask(int calculate)
 {
 	unsigned long flags;
-	struct init_sccb *sccb = (struct init_sccb *) sclp_init_sccb;
+	struct init_sccb *sccb = sclp_init_sccb;
 	sccb_mask_t receive_mask;
 	sccb_mask_t send_mask;
 	int retry;
@@ -1175,6 +1174,9 @@ sclp_init(void)
 	if (sclp_init_state != sclp_init_state_uninitialized)
 		goto fail_unlock;
 	sclp_init_state = sclp_init_state_initializing;
+	sclp_read_sccb = (void *) __get_free_page(GFP_ATOMIC | GFP_DMA);
+	sclp_init_sccb = (void *) __get_free_page(GFP_ATOMIC | GFP_DMA);
+	BUG_ON(!sclp_read_sccb || !sclp_init_sccb);
 	/* Set up variables */
 	INIT_LIST_HEAD(&sclp_req_queue);
 	INIT_LIST_HEAD(&sclp_reg_list);
@@ -1207,6 +1209,8 @@ fail_unregister_reboot_notifier:
 	unregister_reboot_notifier(&sclp_reboot_notifier);
 fail_init_state_uninitialized:
 	sclp_init_state = sclp_init_state_uninitialized;
+	free_page((unsigned long) sclp_read_sccb);
+	free_page((unsigned long) sclp_init_sccb);
 fail_unlock:
 	spin_unlock_irqrestore(&sclp_lock, flags);
 	return rc;
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 043f32ba37491..28b433960831f 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -321,7 +321,7 @@ extern int sclp_console_drop;
 extern unsigned long sclp_console_full;
 extern bool sclp_mask_compat_mode;
 
-extern char sclp_early_sccb[PAGE_SIZE];
+extern char *sclp_early_sccb;
 
 void sclp_early_wait_irq(void);
 int sclp_early_cmd(sclp_cmdw_t cmd, void *sccb);
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index fdad2a980129d..6c90aa725f237 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -147,7 +147,7 @@ static void __init sclp_early_console_detect(struct init_sccb *sccb)
 
 void __init sclp_early_detect(void)
 {
-	void *sccb = &sclp_early_sccb;
+	void *sccb = sclp_early_sccb;
 
 	sclp_early_facilities_detect(sccb);
 	sclp_early_init_core_info(sccb);
diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c
index 387c114ded3f7..7737470f8498f 100644
--- a/drivers/s390/char/sclp_early_core.c
+++ b/drivers/s390/char/sclp_early_core.c
@@ -16,7 +16,7 @@
 
 static struct read_info_sccb __bootdata(sclp_info_sccb);
 static int __bootdata(sclp_info_sccb_valid);
-char sclp_early_sccb[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
+char *sclp_early_sccb = (char *) EARLY_SCCB_OFFSET;
 int sclp_init_state __section(.data) = sclp_init_state_uninitialized;
 /*
  * Used to keep track of the size of the event masks. Qemu until version 2.11
@@ -91,8 +91,8 @@ static void sclp_early_print_lm(const char *str, unsigned int len)
 	struct mto *mto;
 	struct go *go;
 
-	sccb = (struct write_sccb *) &sclp_early_sccb;
-	end = (unsigned char *) sccb + sizeof(sclp_early_sccb) - 1;
+	sccb = (struct write_sccb *) sclp_early_sccb;
+	end = (unsigned char *) sccb + EARLY_SCCB_SIZE - 1;
 	memset(sccb, 0, sizeof(*sccb));
 	ptr = (unsigned char *) &sccb->msg.mdb.mto;
 	offset = 0;
@@ -139,9 +139,9 @@ static void sclp_early_print_vt220(const char *str, unsigned int len)
 {
 	struct vt220_sccb *sccb;
 
-	sccb = (struct vt220_sccb *) &sclp_early_sccb;
-	if (sizeof(*sccb) + len >= sizeof(sclp_early_sccb))
-		len = sizeof(sclp_early_sccb) - sizeof(*sccb);
+	sccb = (struct vt220_sccb *) sclp_early_sccb;
+	if (sizeof(*sccb) + len >= EARLY_SCCB_SIZE)
+		len = EARLY_SCCB_SIZE - sizeof(*sccb);
 	memset(sccb, 0, sizeof(*sccb));
 	memcpy(&sccb->msg.data, str, len);
 	sccb->header.length = sizeof(*sccb) + len;
@@ -199,7 +199,7 @@ static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220)
 	BUILD_BUG_ON(sizeof(struct init_sccb) > PAGE_SIZE);
 
 	*have_linemode = *have_vt220 = 0;
-	sccb = (struct init_sccb *) &sclp_early_sccb;
+	sccb = (struct init_sccb *) sclp_early_sccb;
 	receive_mask = disable ? 0 : EVTYP_OPCMD_MASK;
 	send_mask = disable ? 0 : EVTYP_VT220MSG_MASK | EVTYP_MSG_MASK;
 	rc = sclp_early_set_event_mask(sccb, receive_mask, send_mask);
@@ -304,7 +304,7 @@ int __init sclp_early_get_hsa_size(unsigned long *hsa_size)
 void __weak __init add_mem_detect_block(u64 start, u64 end) {}
 int __init sclp_early_read_storage_info(void)
 {
-	struct read_storage_sccb *sccb = (struct read_storage_sccb *)&sclp_early_sccb;
+	struct read_storage_sccb *sccb = (struct read_storage_sccb *)sclp_early_sccb;
 	int rc, id, max_id = 0;
 	unsigned long rn, rzm;
 	sclp_cmdw_t command;
@@ -320,8 +320,8 @@ int __init sclp_early_read_storage_info(void)
 	rzm <<= 20;
 
 	for (id = 0; id <= max_id; id++) {
-		memset(sclp_early_sccb, 0, sizeof(sclp_early_sccb));
-		sccb->header.length = sizeof(sclp_early_sccb);
+		memset(sclp_early_sccb, 0, EARLY_SCCB_SIZE);
+		sccb->header.length = EARLY_SCCB_SIZE;
 		command = SCLP_CMDW_READ_STORAGE_INFO | (id << 8);
 		rc = sclp_early_cmd(command, sccb);
 		if (rc)
diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c
index 8e0b69a2f11a5..13f97fd73acac 100644
--- a/drivers/s390/char/sclp_sdias.c
+++ b/drivers/s390/char/sclp_sdias.c
@@ -29,7 +29,7 @@ static struct sclp_register sclp_sdias_register = {
 	.send_mask = EVTYP_SDIAS_MASK,
 };
 
-static struct sdias_sccb sccb __attribute__((aligned(4096)));
+static struct sdias_sccb *sclp_sdias_sccb;
 static struct sdias_evbuf sdias_evbuf;
 
 static DECLARE_COMPLETION(evbuf_accepted);
@@ -58,6 +58,7 @@ static void sdias_callback(struct sclp_req *request, void *data)
 
 static int sdias_sclp_send(struct sclp_req *req)
 {
+	struct sdias_sccb *sccb = sclp_sdias_sccb;
 	int retries;
 	int rc;
 
@@ -78,16 +79,16 @@ static int sdias_sclp_send(struct sclp_req *req)
 			continue;
 		}
 		/* if not accepted, retry */
-		if (!(sccb.evbuf.hdr.flags & 0x80)) {
+		if (!(sccb->evbuf.hdr.flags & 0x80)) {
 			TRACE("sclp request failed: flags=%x\n",
-			      sccb.evbuf.hdr.flags);
+			      sccb->evbuf.hdr.flags);
 			continue;
 		}
 		/*
 		 * for the sync interface the response is in the initial sccb
 		 */
 		if (!sclp_sdias_register.receiver_fn) {
-			memcpy(&sdias_evbuf, &sccb.evbuf, sizeof(sdias_evbuf));
+			memcpy(&sdias_evbuf, &sccb->evbuf, sizeof(sdias_evbuf));
 			TRACE("sync request done\n");
 			return 0;
 		}
@@ -104,23 +105,24 @@ static int sdias_sclp_send(struct sclp_req *req)
  */
 int sclp_sdias_blk_count(void)
 {
+	struct sdias_sccb *sccb = sclp_sdias_sccb;
 	struct sclp_req request;
 	int rc;
 
 	mutex_lock(&sdias_mutex);
 
-	memset(&sccb, 0, sizeof(sccb));
+	memset(sccb, 0, sizeof(*sccb));
 	memset(&request, 0, sizeof(request));
 
-	sccb.hdr.length = sizeof(sccb);
-	sccb.evbuf.hdr.length = sizeof(struct sdias_evbuf);
-	sccb.evbuf.hdr.type = EVTYP_SDIAS;
-	sccb.evbuf.event_qual = SDIAS_EQ_SIZE;
-	sccb.evbuf.data_id = SDIAS_DI_FCP_DUMP;
-	sccb.evbuf.event_id = 4712;
-	sccb.evbuf.dbs = 1;
+	sccb->hdr.length = sizeof(*sccb);
+	sccb->evbuf.hdr.length = sizeof(struct sdias_evbuf);
+	sccb->evbuf.hdr.type = EVTYP_SDIAS;
+	sccb->evbuf.event_qual = SDIAS_EQ_SIZE;
+	sccb->evbuf.data_id = SDIAS_DI_FCP_DUMP;
+	sccb->evbuf.event_id = 4712;
+	sccb->evbuf.dbs = 1;
 
-	request.sccb = &sccb;
+	request.sccb = sccb;
 	request.command = SCLP_CMDW_WRITE_EVENT_DATA;
 	request.status = SCLP_REQ_FILLED;
 	request.callback = sdias_callback;
@@ -130,8 +132,8 @@ int sclp_sdias_blk_count(void)
 		pr_err("sclp_send failed for get_nr_blocks\n");
 		goto out;
 	}
-	if (sccb.hdr.response_code != 0x0020) {
-		TRACE("send failed: %x\n", sccb.hdr.response_code);
+	if (sccb->hdr.response_code != 0x0020) {
+		TRACE("send failed: %x\n", sccb->hdr.response_code);
 		rc = -EIO;
 		goto out;
 	}
@@ -163,30 +165,31 @@ out:
  */
 int sclp_sdias_copy(void *dest, int start_blk, int nr_blks)
 {
+	struct sdias_sccb *sccb = sclp_sdias_sccb;
 	struct sclp_req request;
 	int rc;
 
 	mutex_lock(&sdias_mutex);
 
-	memset(&sccb, 0, sizeof(sccb));
+	memset(sccb, 0, sizeof(*sccb));
 	memset(&request, 0, sizeof(request));
 
-	sccb.hdr.length = sizeof(sccb);
-	sccb.evbuf.hdr.length = sizeof(struct sdias_evbuf);
-	sccb.evbuf.hdr.type = EVTYP_SDIAS;
-	sccb.evbuf.hdr.flags = 0;
-	sccb.evbuf.event_qual = SDIAS_EQ_STORE_DATA;
-	sccb.evbuf.data_id = SDIAS_DI_FCP_DUMP;
-	sccb.evbuf.event_id = 4712;
-	sccb.evbuf.asa_size = SDIAS_ASA_SIZE_64;
-	sccb.evbuf.event_status = 0;
-	sccb.evbuf.blk_cnt = nr_blks;
-	sccb.evbuf.asa = (unsigned long)dest;
-	sccb.evbuf.fbn = start_blk;
-	sccb.evbuf.lbn = 0;
-	sccb.evbuf.dbs = 1;
-
-	request.sccb	 = &sccb;
+	sccb->hdr.length = sizeof(*sccb);
+	sccb->evbuf.hdr.length = sizeof(struct sdias_evbuf);
+	sccb->evbuf.hdr.type = EVTYP_SDIAS;
+	sccb->evbuf.hdr.flags = 0;
+	sccb->evbuf.event_qual = SDIAS_EQ_STORE_DATA;
+	sccb->evbuf.data_id = SDIAS_DI_FCP_DUMP;
+	sccb->evbuf.event_id = 4712;
+	sccb->evbuf.asa_size = SDIAS_ASA_SIZE_64;
+	sccb->evbuf.event_status = 0;
+	sccb->evbuf.blk_cnt = nr_blks;
+	sccb->evbuf.asa = (unsigned long)dest;
+	sccb->evbuf.fbn = start_blk;
+	sccb->evbuf.lbn = 0;
+	sccb->evbuf.dbs = 1;
+
+	request.sccb	 = sccb;
 	request.command  = SCLP_CMDW_WRITE_EVENT_DATA;
 	request.status	 = SCLP_REQ_FILLED;
 	request.callback = sdias_callback;
@@ -196,8 +199,8 @@ int sclp_sdias_copy(void *dest, int start_blk, int nr_blks)
 		pr_err("sclp_send failed: %x\n", rc);
 		goto out;
 	}
-	if (sccb.hdr.response_code != 0x0020) {
-		TRACE("copy failed: %x\n", sccb.hdr.response_code);
+	if (sccb->hdr.response_code != 0x0020) {
+		TRACE("copy failed: %x\n", sccb->hdr.response_code);
 		rc = -EIO;
 		goto out;
 	}
@@ -256,6 +259,8 @@ int __init sclp_sdias_init(void)
 {
 	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
 		return 0;
+	sclp_sdias_sccb = (void *) __get_free_page(GFP_KERNEL | GFP_DMA);
+	BUG_ON(!sclp_sdias_sccb);
 	sdias_dbf = debug_register("dump_sdias", 4, 1, 4 * sizeof(long));
 	debug_register_view(sdias_dbf, &debug_sprintf_view);
 	debug_set_level(sdias_dbf, 6);
@@ -264,6 +269,7 @@ int __init sclp_sdias_init(void)
 	if (sclp_sdias_init_async() == 0)
 		goto out;
 	TRACE("init failed\n");
+	free_page((unsigned long) sclp_sdias_sccb);
 	return -ENODEV;
 out:
 	TRACE("init done\n");
-- 
GitLab


From a80313ff91abda67641dc33bed97f6bcc5e9f6a4 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Sun, 3 Feb 2019 21:37:20 +0100
Subject: [PATCH 1673/1861] s390/kernel: introduce .dma sections

With a relocatable kernel that could reside at any place in memory, code
and data that has to stay below 2 GB needs special handling.

This patch introduces .dma sections for such text, data and ex_table.
The sections will be part of the decompressor kernel, so they will not
be relocated and stay below 2 GB. Their location is passed over to the
decompressed / relocated kernel via the .boot.preserved.data section.

The duald and aste for control register setup also need to stay below
2 GB, so move the setup code from arch/s390/kernel/head64.S to
arch/s390/boot/head.S. The duct and linkage_stack could reside above
2 GB, but their content has to be preserved for the decompresed kernel,
so they are also moved into the .dma section.

The start and end address of the .dma sections is added to vmcoreinfo,
for crash support, to help debugging in case the kernel crashed there.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/Makefile                 |   2 +-
 arch/s390/boot/compressed/vmlinux.lds.S |  21 +++
 arch/s390/boot/head.S                   |  32 ++++-
 arch/s390/boot/startup.c                |  39 ++++++
 arch/s390/boot/text_dma.S               | 167 ++++++++++++++++++++++++
 arch/s390/hypfs/hypfs_diag0c.c          |  18 +--
 arch/s390/include/asm/diag.h            |  13 ++
 arch/s390/include/asm/extable.h         |   5 +
 arch/s390/include/asm/ipl.h             |   1 -
 arch/s390/include/asm/linkage.h         |   7 +
 arch/s390/include/asm/sections.h        |   3 +
 arch/s390/kernel/base.S                 |  68 ----------
 arch/s390/kernel/diag.c                 |  67 ++--------
 arch/s390/kernel/early.c                |   2 +-
 arch/s390/kernel/head64.S               |  26 ----
 arch/s390/kernel/ipl.c                  |   2 +-
 arch/s390/kernel/kprobes.c              |   2 +-
 arch/s390/kernel/machine_kexec.c        |   2 +
 arch/s390/kernel/setup.c                |   9 ++
 arch/s390/kernel/smp.c                  |   2 +-
 arch/s390/kernel/swsusp.S               |  15 +--
 arch/s390/kernel/traps.c                |   3 +-
 arch/s390/mm/fault.c                    |  14 +-
 arch/s390/mm/vmem.c                     |   2 +
 24 files changed, 334 insertions(+), 188 deletions(-)
 create mode 100644 arch/s390/boot/text_dma.S

diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 4df43e83363a5..88932c25ad26c 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -30,7 +30,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
 
 obj-y	:= head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
 obj-y	+= string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y	+= ctype.o
+obj-y	+= ctype.o text_dma.o
 obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST)	+= uv.o
 obj-$(CONFIG_RELOCATABLE)	+= machine_kexec_reloc.o
 targets	:= bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 1c8ef393c6569..112b8d9f1e4cd 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -33,6 +33,27 @@ SECTIONS
 		*(.data.*)
 		_edata = . ;
 	}
+	/*
+	* .dma section for code, data, ex_table that need to stay below 2 GB,
+	* even when the kernel is relocate: above 2 GB.
+	*/
+	_sdma = .;
+	.dma.text : {
+		. = ALIGN(PAGE_SIZE);
+		_stext_dma = .;
+		*(.dma.text)
+		. = ALIGN(PAGE_SIZE);
+		_etext_dma = .;
+	}
+	. = ALIGN(16);
+	.dma.ex_table : {
+		_start_dma_ex_table = .;
+		KEEP(*(.dma.ex_table))
+		_stop_dma_ex_table = .;
+	}
+	.dma.data : { *(.dma.data) }
+	_edma = .;
+
 	BOOT_DATA
 	BOOT_DATA_PRESERVED
 
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index c6b4b4c5dd585..028aab03a9e77 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -305,7 +305,7 @@ ENTRY(startup_kdump)
 	xc	0x300(256),0x300
 	xc	0xe00(256),0xe00
 	xc	0xf00(256),0xf00
-	lctlg	%c0,%c15,0x200(%r0)	# initialize control registers
+	lctlg	%c0,%c15,.Lctl-.LPG0(%r13)	# load control registers
 	stcke	__LC_BOOT_CLOCK
 	mvc	__LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
 	spt	6f-.LPG0(%r13)
@@ -319,6 +319,36 @@ ENTRY(startup_kdump)
 	.align	8
 6:	.long	0x7fffffff,0xffffffff
 
+.Lctl:	.quad	0x04040000		# cr0: AFP registers & secondary space
+	.quad	0			# cr1: primary space segment table
+	.quad	.Lduct			# cr2: dispatchable unit control table
+	.quad	0			# cr3: instruction authorization
+	.quad	0xffff			# cr4: instruction authorization
+	.quad	.Lduct			# cr5: primary-aste origin
+	.quad	0			# cr6:	I/O interrupts
+	.quad	0			# cr7:	secondary space segment table
+	.quad	0			# cr8:	access registers translation
+	.quad	0			# cr9:	tracing off
+	.quad	0			# cr10: tracing off
+	.quad	0			# cr11: tracing off
+	.quad	0			# cr12: tracing off
+	.quad	0			# cr13: home space segment table
+	.quad	0xc0000000		# cr14: machine check handling off
+	.quad	.Llinkage_stack		# cr15: linkage stack operations
+
+	.section .dma.data,"aw",@progbits
+.Lduct: .long	0,.Laste,.Laste,0,.Lduald,0,0,0
+	.long	0,0,0,0,0,0,0,0
+.Llinkage_stack:
+	.long	0,0,0x89000000,0,0,0,0x8a000000,0
+	.align 64
+.Laste:	.quad	0,0xffffffffffffffff,0,0,0,0,0,0
+	.align	128
+.Lduald:.rept	8
+	.long	0x80000000,0,0,0	# invalid access-list entries
+	.endr
+	.previous
+
 #include "head_kdump.S"
 
 #
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index b7d6a76cb5e97..e3f339d248cea 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/string.h>
 #include <linux/elf.h>
+#include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/kexec.h>
 #include <asm/sclp.h>
+#include <asm/diag.h>
 #include <asm/uv.h>
 #include "compressed/decompressor.h"
 #include "boot.h"
@@ -11,6 +13,43 @@
 extern char __boot_data_start[], __boot_data_end[];
 extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
 
+/*
+ * Some code and data needs to stay below 2 GB, even when the kernel would be
+ * relocated above 2 GB, because it has to use 31 bit addresses.
+ * Such code and data is part of the .dma section, and its location is passed
+ * over to the decompressed / relocated kernel via the .boot.preserved.data
+ * section.
+ */
+extern char _sdma[], _edma[];
+extern char _stext_dma[], _etext_dma[];
+extern struct exception_table_entry _start_dma_ex_table[];
+extern struct exception_table_entry _stop_dma_ex_table[];
+unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma);
+unsigned long __bootdata_preserved(__edma) = __pa(&_edma);
+unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma);
+unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma);
+struct exception_table_entry *
+	__bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table;
+struct exception_table_entry *
+	__bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table;
+
+int _diag210_dma(struct diag210 *addr);
+int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode);
+int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode);
+void _diag0c_dma(struct hypfs_diag0c_entry *entry);
+void _diag308_reset_dma(void);
+struct diag_ops __bootdata_preserved(diag_dma_ops) = {
+	.diag210 = _diag210_dma,
+	.diag26c = _diag26c_dma,
+	.diag14 = _diag14_dma,
+	.diag0c = _diag0c_dma,
+	.diag308_reset = _diag308_reset_dma
+};
+static struct diag210 _diag210_tmp_dma __section(".dma.data");
+struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
+void _swsusp_reset_dma(void);
+unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
+
 void error(char *x)
 {
 	sclp_early_printk("\n\n");
diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
new file mode 100644
index 0000000000000..2414360ff22da
--- /dev/null
+++ b/arch/s390/boot/text_dma.S
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Code that needs to run below 2 GB.
+ *
+ * Copyright IBM Corp. 2019
+ */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/sigp.h>
+
+	.section .dma.text,"ax"
+/*
+ * Simplified version of expoline thunk. The normal thunks can not be used here,
+ * because they might be more than 2 GB away, and not reachable by the relative
+ * branch. No comdat, exrl, etc. optimizations used here, because it only
+ * affects a few functions that are not performance-relevant.
+ */
+	.macro BR_EX_DMA_r14
+	larl	%r1,0f
+	ex	0,0(%r1)
+	j	.
+0:	br	%r14
+	.endm
+
+/*
+ * int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode)
+ */
+ENTRY(_diag14_dma)
+	lgr	%r1,%r2
+	lgr	%r2,%r3
+	lgr	%r3,%r4
+	lhi	%r5,-EIO
+	sam31
+	diag	%r1,%r2,0x14
+.Ldiag14_ex:
+	ipm	%r5
+	srl	%r5,28
+.Ldiag14_fault:
+	sam64
+	lgfr	%r2,%r5
+	BR_EX_DMA_r14
+	EX_TABLE_DMA(.Ldiag14_ex, .Ldiag14_fault)
+
+/*
+ * int _diag210_dma(struct diag210 *addr)
+ */
+ENTRY(_diag210_dma)
+	lgr	%r1,%r2
+	lhi	%r2,-1
+	sam31
+	diag	%r1,%r0,0x210
+.Ldiag210_ex:
+	ipm	%r2
+	srl	%r2,28
+.Ldiag210_fault:
+	sam64
+	lgfr	%r2,%r2
+	BR_EX_DMA_r14
+	EX_TABLE_DMA(.Ldiag210_ex, .Ldiag210_fault)
+
+/*
+ * int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode)
+ */
+ENTRY(_diag26c_dma)
+	lghi	%r5,-EOPNOTSUPP
+	sam31
+	diag	%r2,%r4,0x26c
+.Ldiag26c_ex:
+	sam64
+	lgfr	%r2,%r5
+	BR_EX_DMA_r14
+	EX_TABLE_DMA(.Ldiag26c_ex, .Ldiag26c_ex)
+
+/*
+ * void _diag0c_dma(struct hypfs_diag0c_entry *entry)
+ */
+ENTRY(_diag0c_dma)
+	sam31
+	diag	%r2,%r2,0x0c
+	sam64
+	BR_EX_DMA_r14
+
+/*
+ * void _swsusp_reset_dma(void)
+ */
+ENTRY(_swsusp_reset_dma)
+	larl	%r1,restart_entry
+	larl	%r2,.Lrestart_diag308_psw
+	og	%r1,0(%r2)
+	stg	%r1,0(%r0)
+	lghi	%r0,0
+	diag	%r0,%r0,0x308
+restart_entry:
+	lhi	%r1,1
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
+	sam64
+	BR_EX_DMA_r14
+
+/*
+ * void _diag308_reset_dma(void)
+ *
+ * Calls diag 308 subcode 1 and continues execution
+ */
+ENTRY(_diag308_reset_dma)
+	larl	%r4,.Lctlregs		# Save control registers
+	stctg	%c0,%c15,0(%r4)
+	lg	%r2,0(%r4)		# Disable lowcore protection
+	nilh	%r2,0xefff
+	larl	%r4,.Lctlreg0
+	stg	%r2,0(%r4)
+	lctlg	%c0,%c0,0(%r4)
+	larl	%r4,.Lfpctl		# Floating point control register
+	stfpc	0(%r4)
+	larl	%r4,.Lprefix		# Save prefix register
+	stpx	0(%r4)
+	larl	%r4,.Lprefix_zero	# Set prefix register to 0
+	spx	0(%r4)
+	larl	%r4,.Lcontinue_psw	# Save PSW flags
+	epsw	%r2,%r3
+	stm	%r2,%r3,0(%r4)
+	larl	%r4,restart_part2	# Setup restart PSW at absolute 0
+	larl	%r3,.Lrestart_diag308_psw
+	og	%r4,0(%r3)		# Save PSW
+	lghi	%r3,0
+	sturg	%r4,%r3			# Use sturg, because of large pages
+	lghi	%r1,1
+	lghi	%r0,0
+	diag	%r0,%r1,0x308
+restart_part2:
+	lhi	%r0,0			# Load r0 with zero
+	lhi	%r1,2			# Use mode 2 = ESAME (dump)
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to ESAME mode
+	sam64				# Switch to 64 bit addressing mode
+	larl	%r4,.Lctlregs		# Restore control registers
+	lctlg	%c0,%c15,0(%r4)
+	larl	%r4,.Lfpctl		# Restore floating point ctl register
+	lfpc	0(%r4)
+	larl	%r4,.Lprefix		# Restore prefix register
+	spx	0(%r4)
+	larl	%r4,.Lcontinue_psw	# Restore PSW flags
+	lpswe	0(%r4)
+.Lcontinue:
+	BR_EX_DMA_r14
+
+	.section .dma.data,"aw",@progbits
+.align	8
+.Lrestart_diag308_psw:
+	.long	0x00080000,0x80000000
+
+.align 8
+.Lcontinue_psw:
+	.quad	0,.Lcontinue
+
+.align 8
+.Lctlreg0:
+	.quad	0
+.Lctlregs:
+	.rept	16
+	.quad	0
+	.endr
+.Lfpctl:
+	.long	0
+.Lprefix:
+	.long	0
+.Lprefix_zero:
+	.long	0
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 72e3140fafb50..3235e4d82f2d5 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -15,27 +15,13 @@
 
 #define DBFS_D0C_HDR_VERSION 0
 
-/*
- * Execute diagnose 0c in 31 bit mode
- */
-static void diag0c(struct hypfs_diag0c_entry *entry)
-{
-	diag_stat_inc(DIAG_STAT_X00C);
-	asm volatile (
-		"	sam31\n"
-		"	diag	%0,%0,0x0c\n"
-		"	sam64\n"
-		: /* no output register */
-		: "a" (entry)
-		: "memory");
-}
-
 /*
  * Get hypfs_diag0c_entry from CPU vector and store diag0c data
  */
 static void diag0c_fn(void *data)
 {
-	diag0c(((void **) data)[smp_processor_id()]);
+	diag_stat_inc(DIAG_STAT_X00C);
+	diag_dma_ops.diag0c(((void **) data)[smp_processor_id()]);
 }
 
 /*
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 19562be22b7e3..0036eab14391d 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -308,4 +308,17 @@ union diag318_info {
 int diag204(unsigned long subcode, unsigned long size, void *addr);
 int diag224(void *ptr);
 int diag26c(void *req, void *resp, enum diag26c_sc subcode);
+
+struct hypfs_diag0c_entry;
+
+struct diag_ops {
+	int (*diag210)(struct diag210 *addr);
+	int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
+	int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
+	void (*diag0c)(struct hypfs_diag0c_entry *entry);
+	void (*diag308_reset)(void);
+};
+
+extern struct diag_ops diag_dma_ops;
+extern struct diag210 *__diag210_tmp_dma;
 #endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index 80a4e5a9cb460..ae27f756b409f 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -19,6 +19,11 @@ struct exception_table_entry
 	int insn, fixup;
 };
 
+extern struct exception_table_entry *__start_dma_ex_table;
+extern struct exception_table_entry *__stop_dma_ex_table;
+
+const struct exception_table_entry *s390_search_extables(unsigned long addr);
+
 static inline unsigned long extable_fixup(const struct exception_table_entry *x)
 {
 	return (unsigned long)&x->fixup + x->fixup;
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 420d39ebdc46b..084e71b7272af 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -127,7 +127,6 @@ enum diag308_rc {
 };
 
 extern int diag308(unsigned long subcode, void *addr);
-extern void diag308_reset(void);
 extern void store_status(void (*fn)(void *), void *data);
 extern void lgr_info_log(void);
 
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index 1b95da3fdd64e..7f22262b0e46c 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -28,5 +28,12 @@
 	.long	(_target) - . ;		\
 	.previous
 
+#define EX_TABLE_DMA(_fault, _target)	\
+	.section .dma.ex_table, "a" ;	\
+	.align	4 ;			\
+	.long	(_fault) - . ;		\
+	.long	(_target) - . ;		\
+	.previous
+
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 29e55739b516a..af670fa4b12aa 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -23,4 +23,7 @@
  */
 #define __bootdata_preserved(var) __section(.boot.preserved.data.var) var
 
+extern unsigned long __sdma, __edma;
+extern unsigned long __stext_dma, __etext_dma;
+
 #endif
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index f268fca67e822..d6ee5978e2731 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -79,71 +79,3 @@ disabled_wait_psw:
 s390_base_pgm_handler_fn:
 	.quad	0
 	.previous
-
-#
-# Calls diag 308 subcode 1 and continues execution
-#
-ENTRY(diag308_reset)
-	larl	%r4,.Lctlregs		# Save control registers
-	stctg	%c0,%c15,0(%r4)
-	lg	%r2,0(%r4)		# Disable lowcore protection
-	nilh	%r2,0xefff
-	larl	%r4,.Lctlreg0
-	stg	%r2,0(%r4)
-	lctlg	%c0,%c0,0(%r4)
-	larl	%r4,.Lfpctl		# Floating point control register
-	stfpc	0(%r4)
-	larl	%r4,.Lprefix		# Save prefix register
-	stpx	0(%r4)
-	larl	%r4,.Lprefix_zero	# Set prefix register to 0
-	spx	0(%r4)
-	larl	%r4,.Lcontinue_psw	# Save PSW flags
-	epsw	%r2,%r3
-	stm	%r2,%r3,0(%r4)
-	larl	%r4,.Lrestart_psw	# Setup restart PSW at absolute 0
-	lghi	%r3,0
-	lg	%r4,0(%r4)		# Save PSW
-	sturg	%r4,%r3			# Use sturg, because of large pages
-	lghi	%r1,1
-	lghi	%r0,0
-	diag	%r0,%r1,0x308
-.Lrestart_part2:
-	lhi	%r0,0			# Load r0 with zero
-	lhi	%r1,2			# Use mode 2 = ESAME (dump)
-	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to ESAME mode
-	sam64				# Switch to 64 bit addressing mode
-	larl	%r4,.Lctlregs		# Restore control registers
-	lctlg	%c0,%c15,0(%r4)
-	larl	%r4,.Lfpctl		# Restore floating point ctl register
-	lfpc	0(%r4)
-	larl	%r4,.Lprefix		# Restore prefix register
-	spx	0(%r4)
-	larl	%r4,.Lcontinue_psw	# Restore PSW flags
-	lpswe	0(%r4)
-.Lcontinue:
-	BR_EX	%r14
-.align 16
-.Lrestart_psw:
-	.long	0x00080000,0x80000000 + .Lrestart_part2
-
-	.section .data..nosave,"aw",@progbits
-.align 8
-.Lcontinue_psw:
-	.quad	0,.Lcontinue
-	.previous
-
-	.section .bss
-.align 8
-.Lctlreg0:
-	.quad	0
-.Lctlregs:
-	.rept	16
-	.quad	0
-	.endr
-.Lfpctl:
-	.long	0
-.Lprefix:
-	.long	0
-.Lprefix_zero:
-	.long	0
-	.previous
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 7edaa733a77fd..e9dac9a24d3fc 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -13,6 +13,7 @@
 #include <linux/debugfs.h>
 #include <asm/diag.h>
 #include <asm/trace/diag.h>
+#include <asm/sections.h>
 
 struct diag_stat {
 	unsigned int counter[NR_DIAG_STAT];
@@ -49,6 +50,9 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
 	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
 };
 
+struct diag_ops __bootdata_preserved(diag_dma_ops);
+struct diag210 *__bootdata_preserved(__diag210_tmp_dma);
+
 static int show_diag_stat(struct seq_file *m, void *v)
 {
 	struct diag_stat *stat;
@@ -139,30 +143,10 @@ EXPORT_SYMBOL(diag_stat_inc_norecursion);
 /*
  * Diagnose 14: Input spool file manipulation
  */
-static inline int __diag14(unsigned long rx, unsigned long ry1,
-			   unsigned long subcode)
-{
-	register unsigned long _ry1 asm("2") = ry1;
-	register unsigned long _ry2 asm("3") = subcode;
-	int rc = 0;
-
-	asm volatile(
-		"   sam31\n"
-		"   diag    %2,2,0x14\n"
-		"   sam64\n"
-		"   ipm     %0\n"
-		"   srl     %0,28\n"
-		: "=d" (rc), "+d" (_ry2)
-		: "d" (rx), "d" (_ry1)
-		: "cc");
-
-	return rc;
-}
-
 int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
 {
 	diag_stat_inc(DIAG_STAT_X014);
-	return __diag14(rx, ry1, subcode);
+	return diag_dma_ops.diag14(rx, ry1, subcode);
 }
 EXPORT_SYMBOL(diag14);
 
@@ -195,30 +179,17 @@ EXPORT_SYMBOL(diag204);
  */
 int diag210(struct diag210 *addr)
 {
-	/*
-	 * diag 210 needs its data below the 2GB border, so we
-	 * use a static data area to be sure
-	 */
-	static struct diag210 diag210_tmp;
 	static DEFINE_SPINLOCK(diag210_lock);
 	unsigned long flags;
 	int ccode;
 
 	spin_lock_irqsave(&diag210_lock, flags);
-	diag210_tmp = *addr;
+	*__diag210_tmp_dma = *addr;
 
 	diag_stat_inc(DIAG_STAT_X210);
-	asm volatile(
-		"	lhi	%0,-1\n"
-		"	sam31\n"
-		"	diag	%1,0,0x210\n"
-		"0:	ipm	%0\n"
-		"	srl	%0,28\n"
-		"1:	sam64\n"
-		EX_TABLE(0b, 1b)
-		: "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
-
-	*addr = diag210_tmp;
+	ccode = diag_dma_ops.diag210(__diag210_tmp_dma);
+
+	*addr = *__diag210_tmp_dma;
 	spin_unlock_irqrestore(&diag210_lock, flags);
 
 	return ccode;
@@ -243,27 +214,9 @@ EXPORT_SYMBOL(diag224);
 /*
  * Diagnose 26C: Access Certain System Information
  */
-static inline int __diag26c(void *req, void *resp, enum diag26c_sc subcode)
-{
-	register unsigned long _req asm("2") = (addr_t) req;
-	register unsigned long _resp asm("3") = (addr_t) resp;
-	register unsigned long _subcode asm("4") = subcode;
-	register unsigned long _rc asm("5") = -EOPNOTSUPP;
-
-	asm volatile(
-		"	sam31\n"
-		"	diag	%[rx],%[ry],0x26c\n"
-		"0:	sam64\n"
-		EX_TABLE(0b,0b)
-		: "+d" (_rc)
-		: [rx] "d" (_req), "d" (_resp), [ry] "d" (_subcode)
-		: "cc", "memory");
-	return _rc;
-}
-
 int diag26c(void *req, void *resp, enum diag26c_sc subcode)
 {
 	diag_stat_inc(DIAG_STAT_X26C);
-	return __diag26c(req, resp, subcode);
+	return diag_dma_ops.diag26c(req, resp, subcode);
 }
 EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ab09ada0e930d..33f704c16bd33 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -139,7 +139,7 @@ static void early_pgm_check_handler(void)
 	unsigned long addr;
 
 	addr = S390_lowcore.program_old_psw.addr;
-	fixup = search_exception_tables(addr);
+	fixup = s390_search_extables(addr);
 	if (!fixup)
 		disabled_wait(0);
 	/* Disable low address protection before storing into lowcore. */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 56491e636eabc..5aea1a5274430 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,7 +26,6 @@ ENTRY(startup_continue)
 0:	larl	%r1,tod_clock_base
 	mvc	0(16,%r1),__LC_BOOT_CLOCK
 	larl	%r13,.LPG1		# get base
-	lctlg	%c0,%c15,.Lctl-.LPG1(%r13)	# load control registers
 	larl	%r0,boot_vdso_data
 	stg	%r0,__LC_VDSO_PER_CPU
 #
@@ -61,22 +60,6 @@ ENTRY(startup_continue)
 
 	.align	16
 .LPG1:
-.Lctl:	.quad	0x04040000		# cr0: AFP registers & secondary space
-	.quad	0			# cr1: primary space segment table
-	.quad	.Lduct			# cr2: dispatchable unit control table
-	.quad	0			# cr3: instruction authorization
-	.quad	0xffff			# cr4: instruction authorization
-	.quad	.Lduct			# cr5: primary-aste origin
-	.quad	0			# cr6:	I/O interrupts
-	.quad	0			# cr7:	secondary space segment table
-	.quad	0			# cr8:	access registers translation
-	.quad	0			# cr9:	tracing off
-	.quad	0			# cr10: tracing off
-	.quad	0			# cr11: tracing off
-	.quad	0			# cr12: tracing off
-	.quad	0			# cr13: home space segment table
-	.quad	0xc0000000		# cr14: machine check handling off
-	.quad	.Llinkage_stack		# cr15: linkage stack operations
 .Lpcmsk:.quad	0x0000000180000000
 .L4malign:.quad 0xffffffffffc00000
 .Lscan2g:.quad	0x80000000 + 0x20000 - 8	# 2GB + 128K - 8
@@ -84,14 +67,5 @@ ENTRY(startup_continue)
 .Lparmaddr:
 	.quad	PARMAREA
 	.align	64
-.Lduct: .long	0,.Laste,.Laste,0,.Lduald,0,0,0
-	.long	0,0,0,0,0,0,0,0
-.Laste:	.quad	0,0xffffffffffffffff,0,0,0,0,0,0
-	.align	128
-.Lduald:.rept	8
-	.long	0x80000000,0,0,0	# invalid access-list entries
-	.endr
-.Llinkage_stack:
-	.long	0,0,0x89000000,0,0,0,0x8a000000,0
 .Ldw:	.quad	0x0002000180000000,0x0000000000000000
 .Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e123c0df83f1c..aa8fe768640e4 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1720,7 +1720,7 @@ void s390_reset_system(void)
 
 	/* Disable lowcore protection */
 	__ctl_clear_bit(0, 28);
-	diag308_reset();
+	diag_dma_ops.diag308_reset();
 }
 
 #ifdef CONFIG_KEXEC_FILE
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 263426dcf97dd..6f1388391620a 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -573,7 +573,7 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		entry = search_exception_tables(regs->psw.addr);
+		entry = s390_search_extables(regs->psw.addr);
 		if (entry) {
 			regs->psw.addr = extable_fixup(entry);
 			return 1;
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index cb582649aba6b..4b998d639c32e 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -253,6 +253,8 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
 	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+	vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
+	vmcoreinfo_append_str("EDMA=%lx\n", __edma);
 }
 
 void machine_shutdown(void)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 94efb1eb34b60..4ccaf5ed96ee9 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -101,6 +101,14 @@ unsigned long __bootdata(memory_end);
 unsigned long __bootdata(max_physmem_end);
 struct mem_detect_info __bootdata(mem_detect);
 
+struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
+struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
+unsigned long __bootdata_preserved(__swsusp_reset_dma);
+unsigned long __bootdata_preserved(__stext_dma);
+unsigned long __bootdata_preserved(__etext_dma);
+unsigned long __bootdata_preserved(__sdma);
+unsigned long __bootdata_preserved(__edma);
+
 unsigned long VMALLOC_START;
 EXPORT_SYMBOL(VMALLOC_START);
 
@@ -832,6 +840,7 @@ static void __init reserve_kernel(void)
 	memblock_reserve(0, HEAD_END);
 	memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
 			 - (unsigned long)_stext);
+	memblock_reserve(__sdma, __edma - __sdma);
 }
 
 static void __init setup_memory(void)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index bd197baf1dc33..88634fb0cc50b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -689,7 +689,7 @@ void __init smp_save_dump_cpus(void)
 			smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
 	}
 	memblock_free(page, PAGE_SIZE);
-	diag308_reset();
+	diag_dma_ops.diag308_reset();
 	pcpu_set_smt(0);
 }
 #endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 993100c31d655..f5219ce11cc5b 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -154,20 +154,13 @@ ENTRY(swsusp_arch_resume)
 	ptlb				/* flush tlb */
 
 	/* Reset System */
-	larl	%r1,restart_entry
-	larl	%r2,.Lrestart_diag308_psw
-	og	%r1,0(%r2)
-	stg	%r1,0(%r0)
 	larl	%r1,.Lnew_pgm_check_psw
 	epsw	%r2,%r3
 	stm	%r2,%r3,0(%r1)
 	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
-	lghi	%r0,0
-	diag	%r0,%r0,0x308
-restart_entry:
-	lhi	%r1,1
-	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
-	sam64
+	larl	%r1,__swsusp_reset_dma
+	lg	%r1,0(%r1)
+	BASR_EX	%r14,%r1
 #ifdef CONFIG_SMP
 	larl	%r1,smp_cpu_mt_shift
 	icm	%r1,15,0(%r1)
@@ -275,8 +268,6 @@ restore_registers:
 .Lpanic_string:
 	.asciz	"Resume not possible because suspend CPU is no longer available\n"
 	.align	8
-.Lrestart_diag308_psw:
-	.long	0x00080000,0x80000000
 .Lrestart_suspend_psw:
 	.quad	0x0000000180000000,restart_suspend
 .Lnew_pgm_check_psw:
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 8003b38c1688f..82e81a9f71126 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -49,7 +49,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
 		report_user_fault(regs, si_signo, 0);
         } else {
                 const struct exception_table_entry *fixup;
-		fixup = search_exception_tables(regs->psw.addr);
+		fixup = s390_search_extables(regs->psw.addr);
                 if (fixup)
 			regs->psw.addr = extable_fixup(fixup);
 		else {
@@ -263,5 +263,6 @@ NOKPROBE_SYMBOL(kernel_stack_overflow);
 
 void __init trap_init(void)
 {
+	sort_extable(__start_dma_ex_table, __stop_dma_ex_table);
 	local_mcck_enable();
 }
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 11613362c4e75..c220399ae196e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -247,12 +247,24 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
 			current);
 }
 
+const struct exception_table_entry *s390_search_extables(unsigned long addr)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_extable(__start_dma_ex_table,
+			       __stop_dma_ex_table - __start_dma_ex_table,
+			       addr);
+	if (!fixup)
+		fixup = search_exception_tables(addr);
+	return fixup;
+}
+
 static noinline void do_no_context(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fixup;
 
 	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->psw.addr);
+	fixup = s390_search_extables(regs->psw.addr);
 	if (fixup) {
 		regs->psw.addr = extable_fixup(fixup);
 		return;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 0472e27febdfa..b403fa14847dc 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -413,6 +413,8 @@ void __init vmem_map_init(void)
 	__set_memory((unsigned long)_sinittext,
 		     (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
 		     SET_MEMORY_RO | SET_MEMORY_X);
+	__set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
+		     SET_MEMORY_RO | SET_MEMORY_X);
 	pr_info("Write protected kernel read-only data: %luk\n",
 		(unsigned long)(__end_rodata - _stext) >> 10);
 }
-- 
GitLab


From b2d24b97b2a9691351920e700bfda4368c177232 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Sun, 3 Feb 2019 21:37:20 +0100
Subject: [PATCH 1674/1861] s390/kernel: add support for kernel address space
 layout randomization (KASLR)

This patch adds support for relocating the kernel to a random address.
The random kernel offset is obtained from cpacf, using either TRNG, PRNO,
or KMC_PRNG, depending on supported MSA level.

KERNELOFFSET is added to vmcoreinfo, for crash --kaslr support.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig                |  10 +++
 arch/s390/boot/Makefile          |   1 +
 arch/s390/boot/boot.h            |   3 +
 arch/s390/boot/ipl_parm.c        |  15 +++-
 arch/s390/boot/kaslr.c           | 144 +++++++++++++++++++++++++++++++
 arch/s390/boot/startup.c         |  31 ++++++-
 arch/s390/include/asm/setup.h    |   6 ++
 arch/s390/kernel/machine_kexec.c |   1 +
 arch/s390/kernel/setup.c         |   1 +
 9 files changed, 207 insertions(+), 5 deletions(-)
 create mode 100644 arch/s390/boot/kaslr.c

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 4c99e4f5f3661..8c15392ee9854 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -637,6 +637,16 @@ config RELOCATABLE
 	  The relocations make the kernel image about 15% larger (compressed
 	  10%), but are discarded at runtime.
 
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image (KASLR)"
+	depends on RELOCATABLE
+	default y
+	help
+	  In support of Kernel Address Space Layout Randomization (KASLR),
+	  this randomizes the address at which the kernel image is loaded,
+	  as a security feature that deters exploit attempts relying on
+	  knowledge of the location of kernel internals.
+
 endmenu
 
 menu "Memory setup"
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 88932c25ad26c..cb40317dc3f73 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -33,6 +33,7 @@ obj-y	+= string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
 obj-y	+= ctype.o text_dma.o
 obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST)	+= uv.o
 obj-$(CONFIG_RELOCATABLE)	+= machine_kexec_reloc.o
+obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 targets	:= bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
 subdir-	:= compressed
 
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index ca395fcff15ef..ad57c2205a71b 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -9,6 +9,9 @@ void setup_boot_command_line(void);
 void parse_boot_command_line(void);
 void setup_memory_end(void);
 void print_missing_facilities(void);
+unsigned long get_random_base(unsigned long safe_addr);
+
+extern int kaslr_enabled;
 
 unsigned long read_ipl_report(unsigned long safe_offset);
 
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 96777092fb14d..b49bd97d33af8 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -18,6 +18,8 @@ unsigned long __bootdata(memory_end);
 int __bootdata(memory_end_set);
 int __bootdata(noexec_disabled);
 
+int kaslr_enabled __section(.data);
+
 static inline int __diag308(unsigned long subcode, void *addr)
 {
 	register unsigned long _addr asm("0") = (unsigned long)addr;
@@ -214,6 +216,7 @@ void parse_boot_command_line(void)
 	char *args;
 	int rc;
 
+	kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
 	args = strcpy(command_line_buf, early_command_line);
 	while (*args) {
 		args = next_arg(args, &param, &val);
@@ -231,15 +234,21 @@ void parse_boot_command_line(void)
 
 		if (!strcmp(param, "facilities"))
 			modify_fac_list(val);
+
+		if (!strcmp(param, "nokaslr"))
+			kaslr_enabled = 0;
 	}
 }
 
 void setup_memory_end(void)
 {
 #ifdef CONFIG_CRASH_DUMP
-	if (!OLDMEM_BASE && ipl_block_valid &&
-	    ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
-	    ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) {
+	if (OLDMEM_BASE) {
+		kaslr_enabled = 0;
+	} else if (ipl_block_valid &&
+		   ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
+		   ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) {
+		kaslr_enabled = 0;
 		if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
 			memory_end_set = 1;
 	}
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
new file mode 100644
index 0000000000000..3bdd8132e56bc
--- /dev/null
+++ b/arch/s390/boot/kaslr.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2019
+ */
+#include <asm/mem_detect.h>
+#include <asm/cpacf.h>
+#include <asm/timex.h>
+#include <asm/sclp.h>
+#include "compressed/decompressor.h"
+
+#define PRNG_MODE_TDES	 1
+#define PRNG_MODE_SHA512 2
+#define PRNG_MODE_TRNG	 3
+
+struct prno_parm {
+	u32 res;
+	u32 reseed_counter;
+	u64 stream_bytes;
+	u8  V[112];
+	u8  C[112];
+};
+
+struct prng_parm {
+	u8  parm_block[32];
+	u32 reseed_counter;
+	u64 byte_counter;
+};
+
+static int check_prng(void)
+{
+	if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
+		sclp_early_printk("KASLR disabled: CPU has no PRNG\n");
+		return 0;
+	}
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+		return PRNG_MODE_TRNG;
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN))
+		return PRNG_MODE_SHA512;
+	else
+		return PRNG_MODE_TDES;
+}
+
+static unsigned long get_random(unsigned long limit)
+{
+	struct prng_parm prng = {
+		/* initial parameter block for tdes mode, copied from libica */
+		.parm_block = {
+			0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+			0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+			0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+			0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0
+		},
+	};
+	unsigned long seed, random;
+	struct prno_parm prno;
+	__u64 entropy[4];
+	int mode, i;
+
+	mode = check_prng();
+	seed = get_tod_clock_fast();
+	switch (mode) {
+	case PRNG_MODE_TRNG:
+		cpacf_trng(NULL, 0, (u8 *) &random, sizeof(random));
+		break;
+	case PRNG_MODE_SHA512:
+		cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, &prno, NULL, 0,
+			   (u8 *) &seed, sizeof(seed));
+		cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prno, (u8 *) &random,
+			   sizeof(random), NULL, 0);
+		break;
+	case PRNG_MODE_TDES:
+		/* add entropy */
+		*(unsigned long *) prng.parm_block ^= seed;
+		for (i = 0; i < 16; i++) {
+			cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block,
+				  (char *) entropy, (char *) entropy,
+				  sizeof(entropy));
+			memcpy(prng.parm_block, entropy, sizeof(entropy));
+		}
+		random = seed;
+		cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, (u8 *) &random,
+			  (u8 *) &random, sizeof(random));
+		break;
+	default:
+		random = 0;
+	}
+	return random % limit;
+}
+
+unsigned long get_random_base(unsigned long safe_addr)
+{
+	unsigned long base, start, end, kernel_size;
+	unsigned long block_sum, offset;
+	int i;
+
+	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) {
+		if (safe_addr < INITRD_START + INITRD_SIZE)
+			safe_addr = INITRD_START + INITRD_SIZE;
+	}
+	safe_addr = ALIGN(safe_addr, THREAD_SIZE);
+
+	kernel_size = vmlinux.image_size + vmlinux.bss_size;
+	block_sum = 0;
+	for_each_mem_detect_block(i, &start, &end) {
+		if (memory_end_set) {
+			if (start >= memory_end)
+				break;
+			if (end > memory_end)
+				end = memory_end;
+		}
+		if (end - start < kernel_size)
+			continue;
+		block_sum += end - start - kernel_size;
+	}
+	if (!block_sum) {
+		sclp_early_printk("KASLR disabled: not enough memory\n");
+		return 0;
+	}
+
+	base = get_random(block_sum);
+	if (base == 0)
+		return 0;
+	if (base < safe_addr)
+		base = safe_addr;
+	block_sum = offset = 0;
+	for_each_mem_detect_block(i, &start, &end) {
+		if (memory_end_set) {
+			if (start >= memory_end)
+				break;
+			if (end > memory_end)
+				end = memory_end;
+		}
+		if (end - start < kernel_size)
+			continue;
+		block_sum += end - start - kernel_size;
+		if (base <= block_sum) {
+			base = start + base - offset;
+			base = ALIGN_DOWN(base, THREAD_SIZE);
+			break;
+		}
+		offset = block_sum;
+	}
+	return base;
+}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index e3f339d248cea..4401e992bda1e 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -12,6 +12,7 @@
 
 extern char __boot_data_start[], __boot_data_end[];
 extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
+unsigned long __bootdata_preserved(__kaslr_offset);
 
 /*
  * Some code and data needs to stay below 2 GB, even when the kernel would be
@@ -113,6 +114,7 @@ static void handle_relocs(unsigned long offset)
 
 void startup_kernel(void)
 {
+	unsigned long random_lma;
 	unsigned long safe_addr;
 	void *img;
 
@@ -126,12 +128,37 @@ void startup_kernel(void)
 	parse_boot_command_line();
 	setup_memory_end();
 	detect_memory();
+
+	random_lma = __kaslr_offset = 0;
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
+		random_lma = get_random_base(safe_addr);
+		if (random_lma) {
+			__kaslr_offset = random_lma - vmlinux.default_lma;
+			img = (void *)vmlinux.default_lma;
+			vmlinux.default_lma += __kaslr_offset;
+			vmlinux.entry += __kaslr_offset;
+			vmlinux.bootdata_off += __kaslr_offset;
+			vmlinux.bootdata_preserved_off += __kaslr_offset;
+			vmlinux.rela_dyn_start += __kaslr_offset;
+			vmlinux.rela_dyn_end += __kaslr_offset;
+			vmlinux.dynsym_start += __kaslr_offset;
+		}
+	}
+
 	if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
 		img = decompress_kernel();
 		memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
-	}
+	} else if (__kaslr_offset)
+		memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+
 	copy_bootdata();
 	if (IS_ENABLED(CONFIG_RELOCATABLE))
-		handle_relocs(0);
+		handle_relocs(__kaslr_offset);
+
+	if (__kaslr_offset) {
+		/* Clear non-relocated kernel */
+		if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
+			memset(img, 0, vmlinux.image_size);
+	}
 	vmlinux.entry();
 }
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index d202756d62912..925889d360c1d 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -147,6 +147,12 @@ extern void (*_machine_restart)(char *command);
 extern void (*_machine_halt)(void);
 extern void (*_machine_power_off)(void);
 
+extern unsigned long __kaslr_offset;
+static inline unsigned long kaslr_offset(void)
+{
+	return __kaslr_offset;
+}
+
 #else /* __ASSEMBLY__ */
 
 #define IPL_DEVICE	(IPL_DEVICE_OFFSET)
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 4b998d639c32e..e2ba7b7f574ef 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -255,6 +255,7 @@ void arch_crash_save_vmcoreinfo(void)
 	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 	vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
 	vmcoreinfo_append_str("EDMA=%lx\n", __edma);
+	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
 }
 
 void machine_shutdown(void)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 4ccaf5ed96ee9..64e4bc9dd1301 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -108,6 +108,7 @@ unsigned long __bootdata_preserved(__stext_dma);
 unsigned long __bootdata_preserved(__etext_dma);
 unsigned long __bootdata_preserved(__sdma);
 unsigned long __bootdata_preserved(__edma);
+unsigned long __bootdata_preserved(__kaslr_offset);
 
 unsigned long VMALLOC_START;
 EXPORT_SYMBOL(VMALLOC_START);
-- 
GitLab


From 7a5da02de8d6eafba99556f8c98e5313edebb449 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Thu, 18 Apr 2019 16:24:50 +0200
Subject: [PATCH 1675/1861] locking/lockdep: check for freed initmem in
 static_obj()

The following warning occurred on s390:
WARNING: CPU: 0 PID: 804 at kernel/locking/lockdep.c:1025 lockdep_register_key+0x30/0x150

This is because the check in static_obj() assumes that all memory within
[_stext, _end] belongs to static objects, which at least for s390 isn't
true. The init section is also part of this range, and freeing it allows
the buddy allocator to allocate memory from it. We have virt == phys for
the kernel on s390, so that such allocations would then have addresses
within the range [_stext, _end].

To fix this, introduce arch_is_kernel_initmem_freed(), similar to
arch_is_kernel_text/data(), and add it to the checks in static_obj().
This will always return 0 on architectures that do not define
arch_is_kernel_initmem_freed. On s390, it will return 1 if initmem has
been freed and the address is in the range [__init_begin, __init_end].

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/sections.h | 12 ++++++++++++
 arch/s390/mm/init.c              |  3 +++
 include/asm-generic/sections.h   | 14 ++++++++++++++
 kernel/locking/lockdep.c         |  3 +++
 4 files changed, 32 insertions(+)

diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index af670fa4b12aa..42de04ad9c07b 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -2,8 +2,20 @@
 #ifndef _S390_SECTIONS_H
 #define _S390_SECTIONS_H
 
+#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
+
 #include <asm-generic/sections.h>
 
+extern bool initmem_freed;
+
+static inline int arch_is_kernel_initmem_freed(unsigned long addr)
+{
+	if (!initmem_freed)
+		return 0;
+	return addr >= (unsigned long)__init_begin &&
+	       addr < (unsigned long)__init_end;
+}
+
 /*
  * .boot.data section contains variables "shared" between the decompressor and
  * the decompressed kernel. The decompressor will store values in them, and
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 3e82f66d5c613..7cf48eefec8fc 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -49,6 +49,8 @@ unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(zero_page_mask);
 
+bool initmem_freed;
+
 static void __init setup_zero_pages(void)
 {
 	unsigned int order;
@@ -148,6 +150,7 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
+	initmem_freed = true;
 	__set_memory((unsigned long)_sinittext,
 		     (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
 		     SET_MEMORY_RW | SET_MEMORY_NX);
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d79abca81a52a..d1779d442aa51 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -77,6 +77,20 @@ static inline int arch_is_kernel_data(unsigned long addr)
 }
 #endif
 
+/*
+ * Check if an address is part of freed initmem. This is needed on architectures
+ * with virt == phys kernel mapping, for code that wants to check if an address
+ * is part of a static object within [_stext, _end]. After initmem is freed,
+ * memory can be allocated from it, and such allocations would then have
+ * addresses within the range [_stext, _end].
+ */
+#ifndef arch_is_kernel_initmem_freed
+static inline int arch_is_kernel_initmem_freed(unsigned long addr)
+{
+	return 0;
+}
+#endif
+
 /**
  * memory_contains - checks if an object is contained within a memory region
  * @begin: virtual address of the beginning of the memory region
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 34cdcbedda492..22a99530983e6 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -649,6 +649,9 @@ static int static_obj(const void *obj)
 		      end   = (unsigned long) &_end,
 		      addr  = (unsigned long) obj;
 
+	if (arch_is_kernel_initmem_freed(addr))
+		return 0;
+
 	/*
 	 * static variable?
 	 */
-- 
GitLab


From 8db82563451f976597ab7b282ec655e4390a4088 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@bootlin.com>
Date: Fri, 8 Mar 2019 17:47:10 +0100
Subject: [PATCH 1676/1861] cpufreq: armada-37xx: fix frequency calculation for
 opp

The frequency calculation was based on the current(max) frequency of the
CPU. However for low frequency, the value used was already the parent
frequency divided by a factor of 2.

Instead of using this frequency, this fix directly get the frequency from
the parent clock.

Fixes: 92ce45fb875d ("cpufreq: Add DVFS support for Armada 37xx")
Cc: <stable@vger.kernel.org>
Reported-by: Christian Neubert <christian.neubert.86@gmail.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/armada-37xx-cpufreq.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index 75491fc841a6b..0df16eb1eb3ce 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -359,11 +359,11 @@ static int __init armada37xx_cpufreq_driver_init(void)
 	struct armada_37xx_dvfs *dvfs;
 	struct platform_device *pdev;
 	unsigned long freq;
-	unsigned int cur_frequency;
+	unsigned int cur_frequency, base_frequency;
 	struct regmap *nb_pm_base, *avs_base;
 	struct device *cpu_dev;
 	int load_lvl, ret;
-	struct clk *clk;
+	struct clk *clk, *parent;
 
 	nb_pm_base =
 		syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm");
@@ -399,6 +399,22 @@ static int __init armada37xx_cpufreq_driver_init(void)
 		return PTR_ERR(clk);
 	}
 
+	parent = clk_get_parent(clk);
+	if (IS_ERR(parent)) {
+		dev_err(cpu_dev, "Cannot get parent clock for CPU0\n");
+		clk_put(clk);
+		return PTR_ERR(parent);
+	}
+
+	/* Get parent CPU frequency */
+	base_frequency =  clk_get_rate(parent);
+
+	if (!base_frequency) {
+		dev_err(cpu_dev, "Failed to get parent clock rate for CPU\n");
+		clk_put(clk);
+		return -EINVAL;
+	}
+
 	/* Get nominal (current) CPU frequency */
 	cur_frequency = clk_get_rate(clk);
 	if (!cur_frequency) {
@@ -431,7 +447,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
 	for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
 	     load_lvl++) {
 		unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000;
-		freq = cur_frequency / dvfs->divider[load_lvl];
+		freq = base_frequency / dvfs->divider[load_lvl];
 		ret = dev_pm_opp_add(cpu_dev, freq, u_volt);
 		if (ret)
 			goto remove_opp;
-- 
GitLab


From 3d9a8072915366b5932beeed97f158f8d4955768 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:44:54 +0200
Subject: [PATCH 1677/1861] tracing: Cleanup stack trace code

- Remove the extra array member of stack_dump_trace[] along with the
  ARRAY_SIZE - 1 initialization for struct stack_trace :: max_entries.

  Both are historical leftovers of no value. The stack tracer never exceeds
  the array and there is no extra storage requirement either.

- Make variables which are only used in trace_stack.c static.

- Simplify the enable/disable logic.

- Rename stack_trace_print() as it's using the stack_trace_ namespace. Free
  the name up for stack trace related functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.230654524@linutronix.de
---
 include/linux/ftrace.h     | 18 ++++------------
 kernel/trace/trace_stack.c | 42 ++++++++++++--------------------------
 2 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 730876187344a..20899919ead8a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -241,21 +241,11 @@ static inline void ftrace_free_mem(struct module *mod, void *start, void *end) {
 
 #ifdef CONFIG_STACK_TRACER
 
-#define STACK_TRACE_ENTRIES 500
-
-struct stack_trace;
-
-extern unsigned stack_trace_index[];
-extern struct stack_trace stack_trace_max;
-extern unsigned long stack_trace_max_size;
-extern arch_spinlock_t stack_trace_max_lock;
-
 extern int stack_tracer_enabled;
-void stack_trace_print(void);
-int
-stack_trace_sysctl(struct ctl_table *table, int write,
-		   void __user *buffer, size_t *lenp,
-		   loff_t *ppos);
+
+int stack_trace_sysctl(struct ctl_table *table, int write,
+		       void __user *buffer, size_t *lenp,
+		       loff_t *ppos);
 
 /* DO NOT MODIFY THIS VARIABLE DIRECTLY! */
 DECLARE_PER_CPU(int, disable_stack_tracer);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c6e54ff25caea..4efda5f75a0f3 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -18,30 +18,26 @@
 
 #include "trace.h"
 
-static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES + 1];
-unsigned stack_trace_index[STACK_TRACE_ENTRIES];
+#define STACK_TRACE_ENTRIES 500
+
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES];
+static unsigned stack_trace_index[STACK_TRACE_ENTRIES];
 
-/*
- * Reserve one entry for the passed in ip. This will allow
- * us to remove most or all of the stack size overhead
- * added by the stack tracer itself.
- */
 struct stack_trace stack_trace_max = {
-	.max_entries		= STACK_TRACE_ENTRIES - 1,
+	.max_entries		= STACK_TRACE_ENTRIES,
 	.entries		= &stack_dump_trace[0],
 };
 
-unsigned long stack_trace_max_size;
-arch_spinlock_t stack_trace_max_lock =
+static unsigned long stack_trace_max_size;
+static arch_spinlock_t stack_trace_max_lock =
 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 
 DEFINE_PER_CPU(int, disable_stack_tracer);
 static DEFINE_MUTEX(stack_sysctl_mutex);
 
 int stack_tracer_enabled;
-static int last_stack_tracer_enabled;
 
-void stack_trace_print(void)
+static void print_max_stack(void)
 {
 	long i;
 	int size;
@@ -61,16 +57,7 @@ void stack_trace_print(void)
 	}
 }
 
-/*
- * When arch-specific code overrides this function, the following
- * data should be filled up, assuming stack_trace_max_lock is held to
- * prevent concurrent updates.
- *     stack_trace_index[]
- *     stack_trace_max
- *     stack_trace_max_size
- */
-void __weak
-check_stack(unsigned long ip, unsigned long *stack)
+static void check_stack(unsigned long ip, unsigned long *stack)
 {
 	unsigned long this_size, flags; unsigned long *p, *top, *start;
 	static int tracer_frame;
@@ -179,7 +166,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 	stack_trace_max.nr_entries = x;
 
 	if (task_stack_end_corrupted(current)) {
-		stack_trace_print();
+		print_max_stack();
 		BUG();
 	}
 
@@ -412,23 +399,21 @@ stack_trace_sysctl(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp,
 		   loff_t *ppos)
 {
+	int was_enabled;
 	int ret;
 
 	mutex_lock(&stack_sysctl_mutex);
+	was_enabled = !!stack_tracer_enabled;
 
 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
-	if (ret || !write ||
-	    (last_stack_tracer_enabled == !!stack_tracer_enabled))
+	if (ret || !write || (was_enabled == !!stack_tracer_enabled))
 		goto out;
 
-	last_stack_tracer_enabled = !!stack_tracer_enabled;
-
 	if (stack_tracer_enabled)
 		register_ftrace_function(&trace_ops);
 	else
 		unregister_ftrace_function(&trace_ops);
-
  out:
 	mutex_unlock(&stack_sysctl_mutex);
 	return ret;
@@ -444,7 +429,6 @@ static __init int enable_stacktrace(char *str)
 		strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE);
 
 	stack_tracer_enabled = 1;
-	last_stack_tracer_enabled = 1;
 	return 1;
 }
 __setup("stacktrace", enable_stacktrace);
-- 
GitLab


From e9b98e162aa53cbea7c8b0d6c9d5dc6e0f822b9c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:44:55 +0200
Subject: [PATCH 1678/1861] stacktrace: Provide helpers for common stack trace
 operations

All operations with stack traces are based on struct stack_trace. That's a
horrible construct as the struct is a kitchen sink for input and
output. Quite some usage sites embed it into their own data structures
which creates weird indirections.

There is absolutely no point in doing so. For all use cases a storage array
and the number of valid stack trace entries in the array is sufficient.

Provide helper functions which avoid the struct stack_trace indirection so
the usage sites can be cleaned up.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.324810708@linutronix.de
---
 include/linux/stacktrace.h |  27 ++++++
 kernel/stacktrace.c        | 170 +++++++++++++++++++++++++++++++++----
 2 files changed, 182 insertions(+), 15 deletions(-)

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index ba29a0613e66f..a24340b3e9e14 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -3,11 +3,26 @@
 #define __LINUX_STACKTRACE_H
 
 #include <linux/types.h>
+#include <asm/errno.h>
 
 struct task_struct;
 struct pt_regs;
 
 #ifdef CONFIG_STACKTRACE
+void stack_trace_print(unsigned long *trace, unsigned int nr_entries,
+		       int spaces);
+int stack_trace_snprint(char *buf, size_t size, unsigned long *entries,
+			unsigned int nr_entries, int spaces);
+unsigned int stack_trace_save(unsigned long *store, unsigned int size,
+			      unsigned int skipnr);
+unsigned int stack_trace_save_tsk(struct task_struct *task,
+				  unsigned long *store, unsigned int size,
+				  unsigned int skipnr);
+unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
+				   unsigned int size, unsigned int skipnr);
+unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
+
+/* Internal interfaces. Do not use in generic code */
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
@@ -41,4 +56,16 @@ extern void save_stack_trace_user(struct stack_trace *trace);
 # define save_stack_trace_tsk_reliable(tsk, trace)	({ -ENOSYS; })
 #endif /* CONFIG_STACKTRACE */
 
+#if defined(CONFIG_STACKTRACE) && defined(CONFIG_HAVE_RELIABLE_STACKTRACE)
+int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
+				  unsigned int size);
+#else
+static inline int stack_trace_save_tsk_reliable(struct task_struct *tsk,
+						unsigned long *store,
+						unsigned int size)
+{
+	return -ENOSYS;
+}
+#endif
+
 #endif /* __LINUX_STACKTRACE_H */
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index f8edee9c792de..b38333b3bc181 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -11,35 +11,54 @@
 #include <linux/kallsyms.h>
 #include <linux/stacktrace.h>
 
-void print_stack_trace(struct stack_trace *trace, int spaces)
+/**
+ * stack_trace_print - Print the entries in the stack trace
+ * @entries:	Pointer to storage array
+ * @nr_entries:	Number of entries in the storage array
+ * @spaces:	Number of leading spaces to print
+ */
+void stack_trace_print(unsigned long *entries, unsigned int nr_entries,
+		       int spaces)
 {
-	int i;
+	unsigned int i;
 
-	if (WARN_ON(!trace->entries))
+	if (WARN_ON(!entries))
 		return;
 
-	for (i = 0; i < trace->nr_entries; i++)
-		printk("%*c%pS\n", 1 + spaces, ' ', (void *)trace->entries[i]);
+	for (i = 0; i < nr_entries; i++)
+		printk("%*c%pS\n", 1 + spaces, ' ', (void *)entries[i]);
+}
+EXPORT_SYMBOL_GPL(stack_trace_print);
+
+void print_stack_trace(struct stack_trace *trace, int spaces)
+{
+	stack_trace_print(trace->entries, trace->nr_entries, spaces);
 }
 EXPORT_SYMBOL_GPL(print_stack_trace);
 
-int snprint_stack_trace(char *buf, size_t size,
-			struct stack_trace *trace, int spaces)
+/**
+ * stack_trace_snprint - Print the entries in the stack trace into a buffer
+ * @buf:	Pointer to the print buffer
+ * @size:	Size of the print buffer
+ * @entries:	Pointer to storage array
+ * @nr_entries:	Number of entries in the storage array
+ * @spaces:	Number of leading spaces to print
+ *
+ * Return: Number of bytes printed.
+ */
+int stack_trace_snprint(char *buf, size_t size, unsigned long *entries,
+			unsigned int nr_entries, int spaces)
 {
-	int i;
-	int generated;
-	int total = 0;
+	unsigned int generated, i, total = 0;
 
-	if (WARN_ON(!trace->entries))
+	if (WARN_ON(!entries))
 		return 0;
 
-	for (i = 0; i < trace->nr_entries; i++) {
+	for (i = 0; i < nr_entries && size; i++) {
 		generated = snprintf(buf, size, "%*c%pS\n", 1 + spaces, ' ',
-				     (void *)trace->entries[i]);
+				     (void *)entries[i]);
 
 		total += generated;
-
-		/* Assume that generated isn't a negative number */
 		if (generated >= size) {
 			buf += size;
 			size = 0;
@@ -51,6 +70,14 @@ int snprint_stack_trace(char *buf, size_t size,
 
 	return total;
 }
+EXPORT_SYMBOL_GPL(stack_trace_snprint);
+
+int snprint_stack_trace(char *buf, size_t size,
+			struct stack_trace *trace, int spaces)
+{
+	return stack_trace_snprint(buf, size, trace->entries,
+				   trace->nr_entries, spaces);
+}
 EXPORT_SYMBOL_GPL(snprint_stack_trace);
 
 /*
@@ -77,3 +104,116 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
 	WARN_ONCE(1, KERN_INFO "save_stack_tsk_reliable() not implemented yet.\n");
 	return -ENOSYS;
 }
+
+/**
+ * stack_trace_save - Save a stack trace into a storage array
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Return: Number of trace entries stored
+ */
+unsigned int stack_trace_save(unsigned long *store, unsigned int size,
+			      unsigned int skipnr)
+{
+	struct stack_trace trace = {
+		.entries	= store,
+		.max_entries	= size,
+		.skip		= skipnr + 1,
+	};
+
+	save_stack_trace(&trace);
+	return trace.nr_entries;
+}
+EXPORT_SYMBOL_GPL(stack_trace_save);
+
+/**
+ * stack_trace_save_tsk - Save a task stack trace into a storage array
+ * @task:	The task to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Return: Number of trace entries stored
+ */
+unsigned int stack_trace_save_tsk(struct task_struct *task,
+				  unsigned long *store, unsigned int size,
+				  unsigned int skipnr)
+{
+	struct stack_trace trace = {
+		.entries	= store,
+		.max_entries	= size,
+		.skip		= skipnr + 1,
+	};
+
+	save_stack_trace_tsk(task, &trace);
+	return trace.nr_entries;
+}
+
+/**
+ * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array
+ * @regs:	Pointer to pt_regs to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Return: Number of trace entries stored
+ */
+unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
+				   unsigned int size, unsigned int skipnr)
+{
+	struct stack_trace trace = {
+		.entries	= store,
+		.max_entries	= size,
+		.skip		= skipnr,
+	};
+
+	save_stack_trace_regs(regs, &trace);
+	return trace.nr_entries;
+}
+
+#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
+/**
+ * stack_trace_save_tsk_reliable - Save task stack with verification
+ * @tsk:	Pointer to the task to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ *
+ * Return:	An error if it detects any unreliable features of the
+ *		stack. Otherwise it guarantees that the stack trace is
+ *		reliable and returns the number of entries stored.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
+				  unsigned int size)
+{
+	struct stack_trace trace = {
+		.entries	= store,
+		.max_entries	= size,
+	};
+	int ret = save_stack_trace_tsk_reliable(tsk, &trace);
+
+	return ret ? ret : trace.nr_entries;
+}
+#endif
+
+#ifdef CONFIG_USER_STACKTRACE_SUPPORT
+/**
+ * stack_trace_save_user - Save a user space stack trace into a storage array
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ *
+ * Return: Number of trace entries stored
+ */
+unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
+{
+	struct stack_trace trace = {
+		.entries	= store,
+		.max_entries	= size,
+	};
+
+	save_stack_trace_user(&trace);
+	return trace.nr_entries;
+}
+#endif /* CONFIG_USER_STACKTRACE_SUPPORT */
-- 
GitLab


From c0cfc337264c5e02e0bc79de6b62857999588879 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:44:56 +0200
Subject: [PATCH 1679/1861] lib/stackdepot: Provide functions which operate on
 plain storage arrays

The struct stack_trace indirection in the stack depot functions is a truly
pointless excercise which requires horrible code at the callsites.

Provide interfaces based on plain storage arrays.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Alexander Potapenko <glider@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.414574828@linutronix.de
---
 include/linux/stackdepot.h |  4 +++
 lib/stackdepot.c           | 70 +++++++++++++++++++++++++++-----------
 2 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 7978b3e2c1e13..4297c6d2991d2 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -26,7 +26,11 @@ typedef u32 depot_stack_handle_t;
 struct stack_trace;
 
 depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags);
+depot_stack_handle_t stack_depot_save(unsigned long *entries,
+				      unsigned int nr_entries, gfp_t gfp_flags);
 
 void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace);
+unsigned int stack_depot_fetch(depot_stack_handle_t handle,
+			       unsigned long **entries);
 
 #endif
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index e513459a5601a..e84f8e58495c1 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -194,40 +194,60 @@ static inline struct stack_record *find_stack(struct stack_record *bucket,
 	return NULL;
 }
 
-void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
+/**
+ * stack_depot_fetch - Fetch stack entries from a depot
+ *
+ * @handle:		Stack depot handle which was returned from
+ *			stack_depot_save().
+ * @entries:		Pointer to store the entries address
+ *
+ * Return: The number of trace entries for this depot.
+ */
+unsigned int stack_depot_fetch(depot_stack_handle_t handle,
+			       unsigned long **entries)
 {
 	union handle_parts parts = { .handle = handle };
 	void *slab = stack_slabs[parts.slabindex];
 	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
 	struct stack_record *stack = slab + offset;
 
-	trace->nr_entries = trace->max_entries = stack->size;
-	trace->entries = stack->entries;
-	trace->skip = 0;
+	*entries = stack->entries;
+	return stack->size;
+}
+EXPORT_SYMBOL_GPL(stack_depot_fetch);
+
+void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
+{
+	unsigned int nent = stack_depot_fetch(handle, &trace->entries);
+
+	trace->max_entries = trace->nr_entries = nent;
 }
 EXPORT_SYMBOL_GPL(depot_fetch_stack);
 
 /**
- * depot_save_stack - save stack in a stack depot.
- * @trace - the stacktrace to save.
- * @alloc_flags - flags for allocating additional memory if required.
+ * stack_depot_save - Save a stack trace from an array
+ *
+ * @entries:		Pointer to storage array
+ * @nr_entries:		Size of the storage array
+ * @alloc_flags:	Allocation gfp flags
  *
- * Returns the handle of the stack struct stored in depot.
+ * Return: The handle of the stack struct stored in depot
  */
-depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
-				    gfp_t alloc_flags)
+depot_stack_handle_t stack_depot_save(unsigned long *entries,
+				      unsigned int nr_entries,
+				      gfp_t alloc_flags)
 {
-	u32 hash;
-	depot_stack_handle_t retval = 0;
 	struct stack_record *found = NULL, **bucket;
-	unsigned long flags;
+	depot_stack_handle_t retval = 0;
 	struct page *page = NULL;
 	void *prealloc = NULL;
+	unsigned long flags;
+	u32 hash;
 
-	if (unlikely(trace->nr_entries == 0))
+	if (unlikely(nr_entries == 0))
 		goto fast_exit;
 
-	hash = hash_stack(trace->entries, trace->nr_entries);
+	hash = hash_stack(entries, nr_entries);
 	bucket = &stack_table[hash & STACK_HASH_MASK];
 
 	/*
@@ -235,8 +255,8 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
 	 * The smp_load_acquire() here pairs with smp_store_release() to
 	 * |bucket| below.
 	 */
-	found = find_stack(smp_load_acquire(bucket), trace->entries,
-			   trace->nr_entries, hash);
+	found = find_stack(smp_load_acquire(bucket), entries,
+			   nr_entries, hash);
 	if (found)
 		goto exit;
 
@@ -264,10 +284,10 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
 
 	spin_lock_irqsave(&depot_lock, flags);
 
-	found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
+	found = find_stack(*bucket, entries, nr_entries, hash);
 	if (!found) {
 		struct stack_record *new =
-			depot_alloc_stack(trace->entries, trace->nr_entries,
+			depot_alloc_stack(entries, nr_entries,
 					  hash, &prealloc, alloc_flags);
 		if (new) {
 			new->next = *bucket;
@@ -297,4 +317,16 @@ exit:
 fast_exit:
 	return retval;
 }
+EXPORT_SYMBOL_GPL(stack_depot_save);
+
+/**
+ * depot_save_stack - save stack in a stack depot.
+ * @trace - the stacktrace to save.
+ * @alloc_flags - flags for allocating additional memory if required.
+ */
+depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
+				      gfp_t alloc_flags)
+{
+	return stack_depot_save(trace->entries, trace->nr_entries, alloc_flags);
+}
 EXPORT_SYMBOL_GPL(depot_save_stack);
-- 
GitLab


From 1b59562d3ab09dcef188eb8055d05f0336380394 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:44:57 +0200
Subject: [PATCH 1680/1861] backtrace-test: Simplify stack trace handling

Replace the indirection through struct stack_trace by using the storage
array based interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.501919093@linutronix.de
---
 kernel/backtracetest.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
index 1323360d90e37..a563c8fdad0d2 100644
--- a/kernel/backtracetest.c
+++ b/kernel/backtracetest.c
@@ -48,19 +48,14 @@ static void backtrace_test_irq(void)
 #ifdef CONFIG_STACKTRACE
 static void backtrace_test_saved(void)
 {
-	struct stack_trace trace;
 	unsigned long entries[8];
+	unsigned int nr_entries;
 
 	pr_info("Testing a saved backtrace.\n");
 	pr_info("The following trace is a kernel self test and not a bug!\n");
 
-	trace.nr_entries = 0;
-	trace.max_entries = ARRAY_SIZE(entries);
-	trace.entries = entries;
-	trace.skip = 0;
-
-	save_stack_trace(&trace);
-	print_stack_trace(&trace, 0);
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
+	stack_trace_print(entries, nr_entries, 0);
 }
 #else
 static void backtrace_test_saved(void)
-- 
GitLab


From e988e5ec18d6081efbef645fc2690298ee23a8db Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:44:58 +0200
Subject: [PATCH 1681/1861] proc: Simplify task stack retrieval

Replace the indirection through struct stack_trace with an invocation of
the storage array based interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.589304463@linutronix.de
---
 fs/proc/base.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5569f215fc54c..f179568b4c767 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -407,7 +407,6 @@ static void unlock_trace(struct task_struct *task)
 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
 			  struct pid *pid, struct task_struct *task)
 {
-	struct stack_trace trace;
 	unsigned long *entries;
 	int err;
 
@@ -430,20 +429,17 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
 	if (!entries)
 		return -ENOMEM;
 
-	trace.nr_entries	= 0;
-	trace.max_entries	= MAX_STACK_TRACE_DEPTH;
-	trace.entries		= entries;
-	trace.skip		= 0;
-
 	err = lock_trace(task);
 	if (!err) {
-		unsigned int i;
+		unsigned int i, nr_entries;
 
-		save_stack_trace_tsk(task, &trace);
+		nr_entries = stack_trace_save_tsk(task, entries,
+						  MAX_STACK_TRACE_DEPTH, 0);
 
-		for (i = 0; i < trace.nr_entries; i++) {
+		for (i = 0; i < nr_entries; i++) {
 			seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
 		}
+
 		unlock_trace(task);
 	}
 	kfree(entries);
-- 
GitLab


From f93877214a83e88373b20801c2d671923d03d07d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:44:59 +0200
Subject: [PATCH 1682/1861] latency_top: Simplify stack trace handling

Replace the indirection through struct stack_trace with an invocation of
the storage array based interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.683039030@linutronix.de
---
 kernel/latencytop.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index f5a90ab3c6b94..99a5b5f46dc5d 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -141,20 +141,6 @@ account_global_scheduler_latency(struct task_struct *tsk,
 	memcpy(&latency_record[i], lat, sizeof(struct latency_record));
 }
 
-/*
- * Iterator to store a backtrace into a latency record entry
- */
-static inline void store_stacktrace(struct task_struct *tsk,
-					struct latency_record *lat)
-{
-	struct stack_trace trace;
-
-	memset(&trace, 0, sizeof(trace));
-	trace.max_entries = LT_BACKTRACEDEPTH;
-	trace.entries = &lat->backtrace[0];
-	save_stack_trace_tsk(tsk, &trace);
-}
-
 /**
  * __account_scheduler_latency - record an occurred latency
  * @tsk - the task struct of the task hitting the latency
@@ -191,7 +177,8 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
 	lat.count = 1;
 	lat.time = usecs;
 	lat.max = usecs;
-	store_stacktrace(tsk, &lat);
+
+	stack_trace_save_tsk(tsk, lat.backtrace, LT_BACKTRACEDEPTH, 0);
 
 	raw_spin_lock_irqsave(&latency_lock, flags);
 
-- 
GitLab


From 7971679994d3a239538ddcfea9f89468f0bd65e2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:00 +0200
Subject: [PATCH 1683/1861] mm/slub: Simplify stack trace retrieval

Replace the indirection through struct stack_trace with an invocation of
the storage array based interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.771410441@linutronix.de
---
 mm/slub.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index e2ccd12b6faa3..6b28cd2b5a58c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -552,18 +552,14 @@ static void set_track(struct kmem_cache *s, void *object,
 
 	if (addr) {
 #ifdef CONFIG_STACKTRACE
-		struct stack_trace trace;
+		unsigned int nr_entries;
 
-		trace.nr_entries = 0;
-		trace.max_entries = TRACK_ADDRS_COUNT;
-		trace.entries = p->addrs;
-		trace.skip = 3;
 		metadata_access_enable();
-		save_stack_trace(&trace);
+		nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
 		metadata_access_disable();
 
-		if (trace.nr_entries < TRACK_ADDRS_COUNT)
-			p->addrs[trace.nr_entries] = 0;
+		if (nr_entries < TRACK_ADDRS_COUNT)
+			p->addrs[nr_entries] = 0;
 #endif
 		p->addr = addr;
 		p->cpu = smp_processor_id();
-- 
GitLab


From 07984aad1c7ed679596f849c0e7d5e029f76e0eb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:01 +0200
Subject: [PATCH 1684/1861] mm/kmemleak: Simplify stacktrace handling

Replace the indirection through struct stack_trace by using the storage
array based interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: linux-mm@kvack.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.863716911@linutronix.de
---
 mm/kmemleak.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 6c318f5ac234f..d12b35de1e7ed 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -410,11 +410,6 @@ static void print_unreferenced(struct seq_file *seq,
  */
 static void dump_object_info(struct kmemleak_object *object)
 {
-	struct stack_trace trace;
-
-	trace.nr_entries = object->trace_len;
-	trace.entries = object->trace;
-
 	pr_notice("Object 0x%08lx (size %zu):\n",
 		  object->pointer, object->size);
 	pr_notice("  comm \"%s\", pid %d, jiffies %lu\n",
@@ -424,7 +419,7 @@ static void dump_object_info(struct kmemleak_object *object)
 	pr_notice("  flags = 0x%x\n", object->flags);
 	pr_notice("  checksum = %u\n", object->checksum);
 	pr_notice("  backtrace:\n");
-	print_stack_trace(&trace, 4);
+	stack_trace_print(object->trace, object->trace_len, 4);
 }
 
 /*
@@ -553,15 +548,7 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
  */
 static int __save_stack_trace(unsigned long *trace)
 {
-	struct stack_trace stack_trace;
-
-	stack_trace.max_entries = MAX_TRACE;
-	stack_trace.nr_entries = 0;
-	stack_trace.entries = trace;
-	stack_trace.skip = 2;
-	save_stack_trace(&stack_trace);
-
-	return stack_trace.nr_entries;
+	return stack_trace_save(trace, MAX_TRACE, 2);
 }
 
 /*
@@ -2019,13 +2006,8 @@ early_param("kmemleak", kmemleak_boot_config);
 
 static void __init print_log_trace(struct early_log *log)
 {
-	struct stack_trace trace;
-
-	trace.nr_entries = log->trace_len;
-	trace.entries = log->trace;
-
 	pr_notice("Early log backtrace:\n");
-	print_stack_trace(&trace, 2);
+	stack_trace_print(log->trace, log->trace_len, 2);
 }
 
 /*
-- 
GitLab


From 880e049c9ce9020384ce305c71375aa1cb54addb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:02 +0200
Subject: [PATCH 1685/1861] mm/kasan: Simplify stacktrace handling

Replace the indirection through struct stack_trace by using the storage
array based interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: kasan-dev@googlegroups.com
Cc: linux-mm@kvack.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094801.963261479@linutronix.de
---
 mm/kasan/common.c | 30 ++++++++++++------------------
 mm/kasan/report.c |  7 ++++---
 2 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 38e5f20a775ad..303a7379d2a35 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -48,34 +48,28 @@ static inline int in_irqentry_text(unsigned long ptr)
 		 ptr < (unsigned long)&__softirqentry_text_end);
 }
 
-static inline void filter_irq_stacks(struct stack_trace *trace)
+static inline unsigned int filter_irq_stacks(unsigned long *entries,
+					     unsigned int nr_entries)
 {
-	int i;
+	unsigned int i;
 
-	if (!trace->nr_entries)
-		return;
-	for (i = 0; i < trace->nr_entries; i++)
-		if (in_irqentry_text(trace->entries[i])) {
+	for (i = 0; i < nr_entries; i++) {
+		if (in_irqentry_text(entries[i])) {
 			/* Include the irqentry function into the stack. */
-			trace->nr_entries = i + 1;
-			break;
+			return i + 1;
 		}
+	}
+	return nr_entries;
 }
 
 static inline depot_stack_handle_t save_stack(gfp_t flags)
 {
 	unsigned long entries[KASAN_STACK_DEPTH];
-	struct stack_trace trace = {
-		.nr_entries = 0,
-		.entries = entries,
-		.max_entries = KASAN_STACK_DEPTH,
-		.skip = 0
-	};
-
-	save_stack_trace(&trace);
-	filter_irq_stacks(&trace);
+	unsigned int nr_entries;
 
-	return depot_save_stack(&trace, flags);
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
+	nr_entries = filter_irq_stacks(entries, nr_entries);
+	return stack_depot_save(entries, nr_entries, flags);
 }
 
 static inline void set_track(struct kasan_track *track, gfp_t flags)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index ca9418fe9232a..882d77568e7ee 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -100,10 +100,11 @@ static void print_track(struct kasan_track *track, const char *prefix)
 {
 	pr_err("%s by task %u:\n", prefix, track->pid);
 	if (track->stack) {
-		struct stack_trace trace;
+		unsigned long *entries;
+		unsigned int nr_entries;
 
-		depot_fetch_stack(track->stack, &trace);
-		print_stack_trace(&trace, 0);
+		nr_entries = stack_depot_fetch(track->stack, &entries);
+		stack_trace_print(entries, nr_entries, 0);
 	} else {
 		pr_err("(stack is not available)\n");
 	}
-- 
GitLab


From af52bf6b92f7d8783c1e712cad6ef7d37cd773b2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:03 +0200
Subject: [PATCH 1686/1861] mm/page_owner: Simplify stack trace handling

Replace the indirection through struct stack_trace by using the storage
array based interfaces.

The original code in all printing functions is really wrong. It allocates a
storage array on stack which is unused because depot_fetch_stack() does not
store anything in it. It overwrites the entries pointer in the stack_trace
struct so it points to the depot storage.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: linux-mm@kvack.org
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.067210525@linutronix.de
---
 mm/page_owner.c | 79 ++++++++++++++++++-------------------------------
 1 file changed, 28 insertions(+), 51 deletions(-)

diff --git a/mm/page_owner.c b/mm/page_owner.c
index df277e6bc3c6a..addcbb2ae4e4f 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -58,15 +58,10 @@ static bool need_page_owner(void)
 static __always_inline depot_stack_handle_t create_dummy_stack(void)
 {
 	unsigned long entries[4];
-	struct stack_trace dummy;
+	unsigned int nr_entries;
 
-	dummy.nr_entries = 0;
-	dummy.max_entries = ARRAY_SIZE(entries);
-	dummy.entries = &entries[0];
-	dummy.skip = 0;
-
-	save_stack_trace(&dummy);
-	return depot_save_stack(&dummy, GFP_KERNEL);
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
+	return stack_depot_save(entries, nr_entries, GFP_KERNEL);
 }
 
 static noinline void register_dummy_stack(void)
@@ -120,46 +115,39 @@ void __reset_page_owner(struct page *page, unsigned int order)
 	}
 }
 
-static inline bool check_recursive_alloc(struct stack_trace *trace,
-					unsigned long ip)
+static inline bool check_recursive_alloc(unsigned long *entries,
+					 unsigned int nr_entries,
+					 unsigned long ip)
 {
-	int i;
-
-	if (!trace->nr_entries)
-		return false;
+	unsigned int i;
 
-	for (i = 0; i < trace->nr_entries; i++) {
-		if (trace->entries[i] == ip)
+	for (i = 0; i < nr_entries; i++) {
+		if (entries[i] == ip)
 			return true;
 	}
-
 	return false;
 }
 
 static noinline depot_stack_handle_t save_stack(gfp_t flags)
 {
 	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
-	struct stack_trace trace = {
-		.nr_entries = 0,
-		.entries = entries,
-		.max_entries = PAGE_OWNER_STACK_DEPTH,
-		.skip = 2
-	};
 	depot_stack_handle_t handle;
+	unsigned int nr_entries;
 
-	save_stack_trace(&trace);
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
 
 	/*
-	 * We need to check recursion here because our request to stackdepot
-	 * could trigger memory allocation to save new entry. New memory
-	 * allocation would reach here and call depot_save_stack() again
-	 * if we don't catch it. There is still not enough memory in stackdepot
-	 * so it would try to allocate memory again and loop forever.
+	 * We need to check recursion here because our request to
+	 * stackdepot could trigger memory allocation to save new
+	 * entry. New memory allocation would reach here and call
+	 * stack_depot_save_entries() again if we don't catch it. There is
+	 * still not enough memory in stackdepot so it would try to
+	 * allocate memory again and loop forever.
 	 */
-	if (check_recursive_alloc(&trace, _RET_IP_))
+	if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
 		return dummy_handle;
 
-	handle = depot_save_stack(&trace, flags);
+	handle = stack_depot_save(entries, nr_entries, flags);
 	if (!handle)
 		handle = failure_handle;
 
@@ -337,16 +325,10 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 		struct page *page, struct page_owner *page_owner,
 		depot_stack_handle_t handle)
 {
-	int ret;
-	int pageblock_mt, page_mt;
+	int ret, pageblock_mt, page_mt;
+	unsigned long *entries;
+	unsigned int nr_entries;
 	char *kbuf;
-	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
-	struct stack_trace trace = {
-		.nr_entries = 0,
-		.entries = entries,
-		.max_entries = PAGE_OWNER_STACK_DEPTH,
-		.skip = 0
-	};
 
 	count = min_t(size_t, count, PAGE_SIZE);
 	kbuf = kmalloc(count, GFP_KERNEL);
@@ -375,8 +357,8 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 	if (ret >= count)
 		goto err;
 
-	depot_fetch_stack(handle, &trace);
-	ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
+	nr_entries = stack_depot_fetch(handle, &entries);
+	ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0);
 	if (ret >= count)
 		goto err;
 
@@ -407,14 +389,9 @@ void __dump_page_owner(struct page *page)
 {
 	struct page_ext *page_ext = lookup_page_ext(page);
 	struct page_owner *page_owner;
-	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
-	struct stack_trace trace = {
-		.nr_entries = 0,
-		.entries = entries,
-		.max_entries = PAGE_OWNER_STACK_DEPTH,
-		.skip = 0
-	};
 	depot_stack_handle_t handle;
+	unsigned long *entries;
+	unsigned int nr_entries;
 	gfp_t gfp_mask;
 	int mt;
 
@@ -438,10 +415,10 @@ void __dump_page_owner(struct page *page)
 		return;
 	}
 
-	depot_fetch_stack(handle, &trace);
+	nr_entries = stack_depot_fetch(handle, &entries);
 	pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
 		 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
-	print_stack_trace(&trace, 0);
+	stack_trace_print(entries, nr_entries, 0);
 
 	if (page_owner->last_migrate_reason != -1)
 		pr_alert("page has been migrated, last migrate reason: %s\n",
-- 
GitLab


From 30191250c2b3ef772cc6125e1c31ac84123f1d20 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:04 +0200
Subject: [PATCH 1687/1861] fault-inject: Simplify stacktrace retrieval

Replace the indirection through struct stack_trace with an invocation of
the storage array based interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.158306076@linutronix.de
---
 lib/fault-inject.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index cf7b129b0b2b0..e26aa4f65eb96 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -65,22 +65,16 @@ static bool fail_task(struct fault_attr *attr, struct task_struct *task)
 
 static bool fail_stacktrace(struct fault_attr *attr)
 {
-	struct stack_trace trace;
 	int depth = attr->stacktrace_depth;
 	unsigned long entries[MAX_STACK_TRACE_DEPTH];
-	int n;
+	int n, nr_entries;
 	bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX);
 
 	if (depth == 0)
 		return found;
 
-	trace.nr_entries = 0;
-	trace.entries = entries;
-	trace.max_entries = depth;
-	trace.skip = 1;
-
-	save_stack_trace(&trace);
-	for (n = 0; n < trace.nr_entries; n++) {
+	nr_entries = stack_trace_save(entries, depth, 1);
+	for (n = 0; n < nr_entries; n++) {
 		if (attr->reject_start <= entries[n] &&
 			       entries[n] < attr->reject_end)
 			return false;
-- 
GitLab


From 746017ed8d4d3c2070bb03aee9536f24da43c778 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:05 +0200
Subject: [PATCH 1688/1861] dma/debug: Simplify stracktrace retrieval

Replace the indirection through struct stack_trace with an invocation of
the storage array based interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.248658135@linutronix.de
---
 kernel/dma/debug.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index a218e43cc3825..badd77670d005 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -89,8 +89,8 @@ struct dma_debug_entry {
 	int		 sg_mapped_ents;
 	enum map_err_types  map_err_type;
 #ifdef CONFIG_STACKTRACE
-	struct		 stack_trace stacktrace;
-	unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
+	unsigned int	stack_len;
+	unsigned long	stack_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
 #endif
 };
 
@@ -174,7 +174,7 @@ static inline void dump_entry_trace(struct dma_debug_entry *entry)
 #ifdef CONFIG_STACKTRACE
 	if (entry) {
 		pr_warning("Mapped at:\n");
-		print_stack_trace(&entry->stacktrace, 0);
+		stack_trace_print(entry->stack_entries, entry->stack_len, 0);
 	}
 #endif
 }
@@ -704,12 +704,10 @@ static struct dma_debug_entry *dma_entry_alloc(void)
 	spin_unlock_irqrestore(&free_entries_lock, flags);
 
 #ifdef CONFIG_STACKTRACE
-	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
-	entry->stacktrace.entries = entry->st_entries;
-	entry->stacktrace.skip = 1;
-	save_stack_trace(&entry->stacktrace);
+	entry->stack_len = stack_trace_save(entry->stack_entries,
+					    ARRAY_SIZE(entry->stack_entries),
+					    1);
 #endif
-
 	return entry;
 }
 
-- 
GitLab


From 6924f5feba21ea2bba64b4dc316ee046c48355ca Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:06 +0200
Subject: [PATCH 1689/1861] btrfs: ref-verify: Simplify stack trace retrieval

Replace the indirection through struct stack_trace with an invocation of
the storage array based interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: David Sterba <dsterba@suse.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.338890064@linutronix.de
---
 fs/btrfs/ref-verify.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index d09b6cdb785a0..b283d3a6e837d 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -205,28 +205,17 @@ static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
 #ifdef CONFIG_STACKTRACE
 static void __save_stack_trace(struct ref_action *ra)
 {
-	struct stack_trace stack_trace;
-
-	stack_trace.max_entries = MAX_TRACE;
-	stack_trace.nr_entries = 0;
-	stack_trace.entries = ra->trace;
-	stack_trace.skip = 2;
-	save_stack_trace(&stack_trace);
-	ra->trace_len = stack_trace.nr_entries;
+	ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2);
 }
 
 static void __print_stack_trace(struct btrfs_fs_info *fs_info,
 				struct ref_action *ra)
 {
-	struct stack_trace trace;
-
 	if (ra->trace_len == 0) {
 		btrfs_err(fs_info, "  ref-verify: no stacktrace");
 		return;
 	}
-	trace.nr_entries = ra->trace_len;
-	trace.entries = ra->trace;
-	print_stack_trace(&trace, 2);
+	stack_trace_print(ra->trace, ra->trace_len, 2);
 }
 #else
 static void inline __save_stack_trace(struct ref_action *ra)
-- 
GitLab


From 741b58f3e23673a666c700361711a91022a69e56 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:07 +0200
Subject: [PATCH 1690/1861] dm bufio: Simplify stack trace retrieval

Replace the indirection through struct stack_trace with an invocation of
the storage array based interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.446326191@linutronix.de
---
 drivers/md/dm-bufio.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 1ecef76225a18..2a48ea3f1b30d 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -150,7 +150,7 @@ struct dm_buffer {
 	void (*end_io)(struct dm_buffer *, blk_status_t);
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
 #define MAX_STACK 10
-	struct stack_trace stack_trace;
+	unsigned int stack_len;
 	unsigned long stack_entries[MAX_STACK];
 #endif
 };
@@ -232,11 +232,7 @@ static DEFINE_MUTEX(dm_bufio_clients_lock);
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
 static void buffer_record_stack(struct dm_buffer *b)
 {
-	b->stack_trace.nr_entries = 0;
-	b->stack_trace.max_entries = MAX_STACK;
-	b->stack_trace.entries = b->stack_entries;
-	b->stack_trace.skip = 2;
-	save_stack_trace(&b->stack_trace);
+	b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2);
 }
 #endif
 
@@ -438,7 +434,7 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
 	adjust_total_allocated(b->data_mode, (long)c->block_size);
 
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-	memset(&b->stack_trace, 0, sizeof(b->stack_trace));
+	b->stack_len = 0;
 #endif
 	return b;
 }
@@ -1520,8 +1516,9 @@ static void drop_buffers(struct dm_bufio_client *c)
 			DMERR("leaked buffer %llx, hold count %u, list %d",
 			      (unsigned long long)b->block, b->hold_count, i);
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-			print_stack_trace(&b->stack_trace, 1);
-			b->hold_count = 0; /* mark unclaimed to avoid BUG_ON below */
+			stack_trace_print(b->stack_entries, b->stack_len, 1);
+			/* mark unclaimed to avoid BUG_ON below */
+			b->hold_count = 0;
 #endif
 		}
 
-- 
GitLab


From be9c52ed84eb0949fed3d4140e35ea70eecb02a2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:08 +0200
Subject: [PATCH 1691/1861] dm persistent data: Simplify stack trace handling

Replace the indirection through struct stack_trace with an invocation of
the storage array based interface. This results in less storage space and
indirection.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.533968922@linutronix.de
---
 drivers/md/persistent-data/dm-block-manager.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 3972232b80378..749ec268d957d 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -35,7 +35,10 @@
 #define MAX_HOLDERS 4
 #define MAX_STACK 10
 
-typedef unsigned long stack_entries[MAX_STACK];
+struct stack_store {
+	unsigned int	nr_entries;
+	unsigned long	entries[MAX_STACK];
+};
 
 struct block_lock {
 	spinlock_t lock;
@@ -44,8 +47,7 @@ struct block_lock {
 	struct task_struct *holders[MAX_HOLDERS];
 
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-	struct stack_trace traces[MAX_HOLDERS];
-	stack_entries entries[MAX_HOLDERS];
+	struct stack_store traces[MAX_HOLDERS];
 #endif
 };
 
@@ -73,7 +75,7 @@ static void __add_holder(struct block_lock *lock, struct task_struct *task)
 {
 	unsigned h = __find_holder(lock, NULL);
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-	struct stack_trace *t;
+	struct stack_store *t;
 #endif
 
 	get_task_struct(task);
@@ -81,11 +83,7 @@ static void __add_holder(struct block_lock *lock, struct task_struct *task)
 
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
 	t = lock->traces + h;
-	t->nr_entries = 0;
-	t->max_entries = MAX_STACK;
-	t->entries = lock->entries[h];
-	t->skip = 2;
-	save_stack_trace(t);
+	t->nr_entries = stack_trace_save(t->entries, MAX_STACK, 2);
 #endif
 }
 
@@ -106,7 +104,8 @@ static int __check_holder(struct block_lock *lock)
 			DMERR("recursive lock detected in metadata");
 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
 			DMERR("previously held here:");
-			print_stack_trace(lock->traces + i, 4);
+			stack_trace_print(lock->traces[i].entries,
+					  lock->traces[i].nr_entries, 4);
 
 			DMERR("subsequent acquisition attempted here:");
 			dump_stack();
-- 
GitLab


From 487f3c7fb1a07ceff78bb18688eb8538a4775227 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:09 +0200
Subject: [PATCH 1692/1861] drm: Simplify stacktrace handling

Replace the indirection through struct stack_trace by using the storage
array based interfaces.

The original code in all printing functions is really wrong. It allocates a
storage array on stack which is unused because depot_fetch_stack() does not
store anything in it. It overwrites the entries pointer in the stack_trace
struct so it points to the depot storage.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.622094226@linutronix.de
---
 drivers/gpu/drm/drm_mm.c                | 22 +++++++---------------
 drivers/gpu/drm/i915/i915_vma.c         | 11 ++++-------
 drivers/gpu/drm/i915/intel_runtime_pm.c | 21 +++++++--------------
 3 files changed, 18 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 69552777e13a4..8b4cd31ce7bdf 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -106,22 +106,19 @@
 static noinline void save_stack(struct drm_mm_node *node)
 {
 	unsigned long entries[STACKDEPTH];
-	struct stack_trace trace = {
-		.entries = entries,
-		.max_entries = STACKDEPTH,
-		.skip = 1
-	};
+	unsigned int n;
 
-	save_stack_trace(&trace);
+	n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
 
 	/* May be called under spinlock, so avoid sleeping */
-	node->stack = depot_save_stack(&trace, GFP_NOWAIT);
+	node->stack = stack_depot_save(entries, n, GFP_NOWAIT);
 }
 
 static void show_leaks(struct drm_mm *mm)
 {
 	struct drm_mm_node *node;
-	unsigned long entries[STACKDEPTH];
+	unsigned long *entries;
+	unsigned int nr_entries;
 	char *buf;
 
 	buf = kmalloc(BUFSZ, GFP_KERNEL);
@@ -129,19 +126,14 @@ static void show_leaks(struct drm_mm *mm)
 		return;
 
 	list_for_each_entry(node, drm_mm_nodes(mm), node_list) {
-		struct stack_trace trace = {
-			.entries = entries,
-			.max_entries = STACKDEPTH
-		};
-
 		if (!node->stack) {
 			DRM_ERROR("node [%08llx + %08llx]: unknown owner\n",
 				  node->start, node->size);
 			continue;
 		}
 
-		depot_fetch_stack(node->stack, &trace);
-		snprint_stack_trace(buf, BUFSZ, &trace, 0);
+		nr_entries = stack_depot_fetch(node->stack, &entries);
+		stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0);
 		DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s",
 			  node->start, node->size, buf);
 	}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index b713bed20c388..41b5bcb803cb5 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -36,11 +36,8 @@
 
 static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 {
-	unsigned long entries[12];
-	struct stack_trace trace = {
-		.entries = entries,
-		.max_entries = ARRAY_SIZE(entries),
-	};
+	unsigned long *entries;
+	unsigned int nr_entries;
 	char buf[512];
 
 	if (!vma->node.stack) {
@@ -49,8 +46,8 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 		return;
 	}
 
-	depot_fetch_stack(vma->node.stack, &trace);
-	snprint_stack_trace(buf, sizeof(buf), &trace, 0);
+	nr_entries = stack_depot_fetch(vma->node.stack, &entries);
+	stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0);
 	DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n",
 			 vma->node.start, vma->node.size, reason, buf);
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 1f8acbb332c9a..20c4434474e3a 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -60,27 +60,20 @@
 static noinline depot_stack_handle_t __save_depot_stack(void)
 {
 	unsigned long entries[STACKDEPTH];
-	struct stack_trace trace = {
-		.entries = entries,
-		.max_entries = ARRAY_SIZE(entries),
-		.skip = 1,
-	};
+	unsigned int n;
 
-	save_stack_trace(&trace);
-	return depot_save_stack(&trace, GFP_NOWAIT | __GFP_NOWARN);
+	n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+	return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
 }
 
 static void __print_depot_stack(depot_stack_handle_t stack,
 				char *buf, int sz, int indent)
 {
-	unsigned long entries[STACKDEPTH];
-	struct stack_trace trace = {
-		.entries = entries,
-		.max_entries = ARRAY_SIZE(entries),
-	};
+	unsigned long *entries;
+	unsigned int nr_entries;
 
-	depot_fetch_stack(stack, &trace);
-	snprint_stack_trace(buf, sz, &trace, indent);
+	nr_entries = stack_depot_fetch(stack, &entries);
+	stack_trace_snprint(buf, sz, entries, nr_entries, indent);
 }
 
 static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
-- 
GitLab


From b1abe4622d4cc32b3b37cfefbc7ac070a8f868e0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:10 +0200
Subject: [PATCH 1693/1861] lockdep: Remove unused trace argument from
 print_circular_bug()

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.716274532@linutronix.de
---
 kernel/locking/lockdep.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 2edf9501d9065..d7615d299d083 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1522,10 +1522,9 @@ static inline int class_equal(struct lock_list *entry, void *data)
 }
 
 static noinline int print_circular_bug(struct lock_list *this,
-				struct lock_list *target,
-				struct held_lock *check_src,
-				struct held_lock *check_tgt,
-				struct stack_trace *trace)
+				       struct lock_list *target,
+				       struct held_lock *check_src,
+				       struct held_lock *check_tgt)
 {
 	struct task_struct *curr = current;
 	struct lock_list *parent;
@@ -2206,7 +2205,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 			 */
 			save(trace);
 		}
-		return print_circular_bug(&this, target_entry, next, prev, trace);
+		return print_circular_bug(&this, target_entry, next, prev);
 	}
 	else if (unlikely(ret < 0))
 		return print_bfs_bug(ret);
-- 
GitLab


From 76b14436b4d98903fef723365170bedd6f28ab2c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:11 +0200
Subject: [PATCH 1694/1861] lockdep: Remove save argument from check_prev_add()

There is only one caller which hands in save_trace as function pointer.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.803362058@linutronix.de
---
 kernel/locking/lockdep.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index d7615d299d083..3603893d5bbde 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2158,8 +2158,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-	       struct held_lock *next, int distance, struct stack_trace *trace,
-	       int (*save)(struct stack_trace *trace))
+	       struct held_lock *next, int distance, struct stack_trace *trace)
 {
 	struct lock_list *uninitialized_var(target_entry);
 	struct lock_list *entry;
@@ -2199,11 +2198,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	if (unlikely(!ret)) {
 		if (!trace->entries) {
 			/*
-			 * If @save fails here, the printing might trigger
-			 * a WARN but because of the !nr_entries it should
-			 * not do bad things.
+			 * If save_trace fails here, the printing might
+			 * trigger a WARN but because of the !nr_entries it
+			 * should not do bad things.
 			 */
-			save(trace);
+			save_trace(trace);
 		}
 		return print_circular_bug(&this, target_entry, next, prev);
 	}
@@ -2253,7 +2252,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 		return print_bfs_bug(ret);
 
 
-	if (!trace->entries && !save(trace))
+	if (!trace->entries && !save_trace(trace))
 		return 0;
 
 	/*
@@ -2318,7 +2317,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
 		 * added:
 		 */
 		if (hlock->read != 2 && hlock->check) {
-			int ret = check_prev_add(curr, hlock, next, distance, &trace, save_trace);
+			int ret = check_prev_add(curr, hlock, next, distance,
+						 &trace);
 			if (!ret)
 				return 0;
 
-- 
GitLab


From c120bce78065cbea460a58b1572c215db9c148ba Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:12 +0200
Subject: [PATCH 1695/1861] lockdep: Simplify stack trace handling

Replace the indirection through struct stack_trace by using the storage
array based interfaces and storing the information is a small lockdep
specific data structure.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.891724020@linutronix.de
---
 include/linux/lockdep.h  |  9 +++++--
 kernel/locking/lockdep.c | 55 ++++++++++++++++++++--------------------
 2 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 79c3873d58acc..6f165d6253200 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -66,6 +66,11 @@ struct lock_class_key {
 
 extern struct lock_class_key __lockdep_no_validate__;
 
+struct lock_trace {
+	unsigned int		nr_entries;
+	unsigned int		offset;
+};
+
 #define LOCKSTAT_POINTS		4
 
 /*
@@ -100,7 +105,7 @@ struct lock_class {
 	 * IRQ/softirq usage tracking bits:
 	 */
 	unsigned long			usage_mask;
-	struct stack_trace		usage_traces[XXX_LOCK_USAGE_STATES];
+	struct lock_trace		usage_traces[XXX_LOCK_USAGE_STATES];
 
 	/*
 	 * Generation counter, when doing certain classes of graph walking,
@@ -188,7 +193,7 @@ struct lock_list {
 	struct list_head		entry;
 	struct lock_class		*class;
 	struct lock_class		*links_to;
-	struct stack_trace		trace;
+	struct lock_trace		trace;
 	int				distance;
 
 	/*
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 3603893d5bbde..45bcaf2e4cb6b 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -434,18 +434,14 @@ static void print_lockdep_off(const char *bug_msg)
 #endif
 }
 
-static int save_trace(struct stack_trace *trace)
+static int save_trace(struct lock_trace *trace)
 {
-	trace->nr_entries = 0;
-	trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
-	trace->entries = stack_trace + nr_stack_trace_entries;
-
-	trace->skip = 3;
-
-	save_stack_trace(trace);
-
-	trace->max_entries = trace->nr_entries;
+	unsigned long *entries = stack_trace + nr_stack_trace_entries;
+	unsigned int max_entries;
 
+	trace->offset = nr_stack_trace_entries;
+	max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
+	trace->nr_entries = stack_trace_save(entries, max_entries, 3);
 	nr_stack_trace_entries += trace->nr_entries;
 
 	if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
@@ -1196,7 +1192,7 @@ static struct lock_list *alloc_list_entry(void)
 static int add_lock_to_list(struct lock_class *this,
 			    struct lock_class *links_to, struct list_head *head,
 			    unsigned long ip, int distance,
-			    struct stack_trace *trace)
+			    struct lock_trace *trace)
 {
 	struct lock_list *entry;
 	/*
@@ -1415,6 +1411,13 @@ static inline int __bfs_backwards(struct lock_list *src_entry,
  * checking.
  */
 
+static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
+{
+	unsigned long *entries = stack_trace + trace->offset;
+
+	stack_trace_print(entries, trace->nr_entries, spaces);
+}
+
 /*
  * Print a dependency chain entry (this is only done when a deadlock
  * has been detected):
@@ -1427,8 +1430,7 @@ print_circular_bug_entry(struct lock_list *target, int depth)
 	printk("\n-> #%u", depth);
 	print_lock_name(target->class);
 	printk(KERN_CONT ":\n");
-	print_stack_trace(&target->trace, 6);
-
+	print_lock_trace(&target->trace, 6);
 	return 0;
 }
 
@@ -1740,7 +1742,7 @@ static void print_lock_class_header(struct lock_class *class, int depth)
 
 			len += printk("%*s   %s", depth, "", usage_str[bit]);
 			len += printk(KERN_CONT " at:\n");
-			print_stack_trace(class->usage_traces + bit, len);
+			print_lock_trace(class->usage_traces + bit, len);
 		}
 	}
 	printk("%*s }\n", depth, "");
@@ -1765,7 +1767,7 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
 	do {
 		print_lock_class_header(entry->class, depth);
 		printk("%*s ... acquired at:\n", depth, "");
-		print_stack_trace(&entry->trace, 2);
+		print_lock_trace(&entry->trace, 2);
 		printk("\n");
 
 		if (depth == 0 && (entry != root)) {
@@ -1878,14 +1880,14 @@ print_bad_irq_dependency(struct task_struct *curr,
 	print_lock_name(backwards_entry->class);
 	pr_warn("\n... which became %s-irq-safe at:\n", irqclass);
 
-	print_stack_trace(backwards_entry->class->usage_traces + bit1, 1);
+	print_lock_trace(backwards_entry->class->usage_traces + bit1, 1);
 
 	pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass);
 	print_lock_name(forwards_entry->class);
 	pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass);
 	pr_warn("...");
 
-	print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
+	print_lock_trace(forwards_entry->class->usage_traces + bit2, 1);
 
 	pr_warn("\nother info that might help us debug this:\n\n");
 	print_irq_lock_scenario(backwards_entry, forwards_entry,
@@ -2158,7 +2160,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-	       struct held_lock *next, int distance, struct stack_trace *trace)
+	       struct held_lock *next, int distance, struct lock_trace *trace)
 {
 	struct lock_list *uninitialized_var(target_entry);
 	struct lock_list *entry;
@@ -2196,7 +2198,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	this.parent = NULL;
 	ret = check_noncircular(&this, hlock_class(prev), &target_entry);
 	if (unlikely(!ret)) {
-		if (!trace->entries) {
+		if (!trace->nr_entries) {
 			/*
 			 * If save_trace fails here, the printing might
 			 * trigger a WARN but because of the !nr_entries it
@@ -2252,7 +2254,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 		return print_bfs_bug(ret);
 
 
-	if (!trace->entries && !save_trace(trace))
+	if (!trace->nr_entries && !save_trace(trace))
 		return 0;
 
 	/*
@@ -2284,14 +2286,9 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 static int
 check_prevs_add(struct task_struct *curr, struct held_lock *next)
 {
+	struct lock_trace trace = { .nr_entries = 0 };
 	int depth = curr->lockdep_depth;
 	struct held_lock *hlock;
-	struct stack_trace trace = {
-		.nr_entries = 0,
-		.max_entries = 0,
-		.entries = NULL,
-		.skip = 0,
-	};
 
 	/*
 	 * Debugging checks.
@@ -2719,6 +2716,10 @@ static inline int validate_chain(struct task_struct *curr,
 {
 	return 1;
 }
+
+static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
+{
+}
 #endif
 
 /*
@@ -2815,7 +2816,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 	print_lock(this);
 
 	pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]);
-	print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
+	print_lock_trace(hlock_class(this)->usage_traces + prev_bit, 1);
 
 	print_irqtrace_events(curr);
 	pr_warn("\nother info that might help us debug this:\n");
-- 
GitLab


From e7d916632b528e8cccc8e9ccca81acfc591a5fde Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:13 +0200
Subject: [PATCH 1696/1861] tracing: Simplify stacktrace retrieval in
 histograms

The indirection through struct stack_trace is not necessary at all. Use the
storage array based interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Reviewed-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094802.979089273@linutronix.de
---
 kernel/trace/trace_events_hist.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 21ceae299f7eb..a1d20421f4b03 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5186,7 +5186,6 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec,
 	u64 var_ref_vals[TRACING_MAP_VARS_MAX];
 	char compound_key[HIST_KEY_SIZE_MAX];
 	struct tracing_map_elt *elt = NULL;
-	struct stack_trace stacktrace;
 	struct hist_field *key_field;
 	u64 field_contents;
 	void *key = NULL;
@@ -5198,14 +5197,9 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec,
 		key_field = hist_data->fields[i];
 
 		if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
-			stacktrace.max_entries = HIST_STACKTRACE_DEPTH;
-			stacktrace.entries = entries;
-			stacktrace.nr_entries = 0;
-			stacktrace.skip = HIST_STACKTRACE_SKIP;
-
-			memset(stacktrace.entries, 0, HIST_STACKTRACE_SIZE);
-			save_stack_trace(&stacktrace);
-
+			memset(entries, 0, HIST_STACKTRACE_SIZE);
+			stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
+					 HIST_STACKTRACE_SKIP);
 			key = entries;
 		} else {
 			field_contents = key_field->fn(key_field, elt, rbe, rec);
-- 
GitLab


From 2a820bf74918d61ea54f7c1001f4a6a2e457577c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:14 +0200
Subject: [PATCH 1697/1861] tracing: Use percpu stack trace buffer more
 intelligently

The per cpu stack trace buffer usage pattern is odd at best. The buffer has
place for 512 stack trace entries on 64-bit and 1024 on 32-bit. When
interrupts or exceptions nest after the per cpu buffer was acquired the
stacktrace length is hardcoded to 8 entries. 512/1024 stack trace entries
in kernel stacks are unrealistic so the buffer is a complete waste.

Split the buffer into 4 nest levels, which are 128/256 entries per
level. This allows nesting contexts (interrupts, exceptions) to utilize the
cpu buffer for stack retrieval and avoids the fixed length allocation along
with the conditional execution pathes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094803.066064076@linutronix.de
---
 kernel/trace/trace.c | 73 ++++++++++++++++++++++----------------------
 1 file changed, 37 insertions(+), 36 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 21153e64bf1c3..4fc93004feabc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2749,12 +2749,21 @@ trace_function(struct trace_array *tr,
 
 #ifdef CONFIG_STACKTRACE
 
-#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
+#define FTRACE_KSTACK_NESTING	4
+
+#define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
+
 struct ftrace_stack {
-	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
+	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
+};
+
+
+struct ftrace_stacks {
+	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
 };
 
-static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
 
 static void __ftrace_trace_stack(struct ring_buffer *buffer,
@@ -2763,10 +2772,11 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 {
 	struct trace_event_call *call = &event_kernel_stack;
 	struct ring_buffer_event *event;
+	struct ftrace_stack *fstack;
 	struct stack_entry *entry;
 	struct stack_trace trace;
-	int use_stack;
-	int size = FTRACE_STACK_ENTRIES;
+	int size = FTRACE_KSTACK_ENTRIES;
+	int stackidx;
 
 	trace.nr_entries	= 0;
 	trace.skip		= skip;
@@ -2788,29 +2798,32 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	 */
 	preempt_disable_notrace();
 
-	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
+	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
+
+	/* This should never happen. If it does, yell once and skip */
+	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+		goto out;
+
 	/*
-	 * We don't need any atomic variables, just a barrier.
-	 * If an interrupt comes in, we don't care, because it would
-	 * have exited and put the counter back to what we want.
-	 * We just need a barrier to keep gcc from moving things
-	 * around.
+	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
+	 * interrupt will either see the value pre increment or post
+	 * increment. If the interrupt happens pre increment it will have
+	 * restored the counter when it returns.  We just need a barrier to
+	 * keep gcc from moving things around.
 	 */
 	barrier();
-	if (use_stack == 1) {
-		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
-		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
 
-		if (regs)
-			save_stack_trace_regs(regs, &trace);
-		else
-			save_stack_trace(&trace);
+	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
+	trace.entries		= fstack->calls;
+	trace.max_entries	= FTRACE_KSTACK_ENTRIES;
 
-		if (trace.nr_entries > size)
-			size = trace.nr_entries;
-	} else
-		/* From now on, use_stack is a boolean */
-		use_stack = 0;
+	if (regs)
+		save_stack_trace_regs(regs, &trace);
+	else
+		save_stack_trace(&trace);
+
+	if (trace.nr_entries > size)
+		size = trace.nr_entries;
 
 	size *= sizeof(unsigned long);
 
@@ -2820,19 +2833,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 		goto out;
 	entry = ring_buffer_event_data(event);
 
-	memset(&entry->caller, 0, size);
-
-	if (use_stack)
-		memcpy(&entry->caller, trace.entries,
-		       trace.nr_entries * sizeof(unsigned long));
-	else {
-		trace.max_entries	= FTRACE_STACK_ENTRIES;
-		trace.entries		= entry->caller;
-		if (regs)
-			save_stack_trace_regs(regs, &trace);
-		else
-			save_stack_trace(&trace);
-	}
+	memcpy(&entry->caller, trace.entries, size);
 
 	entry->size = trace.nr_entries;
 
-- 
GitLab


From c438f140cc16d47fac808d893f5017f6d641cb46 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:15 +0200
Subject: [PATCH 1698/1861] tracing: Make ftrace_trace_userstack() static and
 conditional

It's only used in trace.c and there is absolutely no point in compiling it
in when user space stack traces are not supported.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094803.162400595@linutronix.de
---
 kernel/trace/trace.c | 14 ++++++++------
 kernel/trace/trace.h |  8 --------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4fc93004feabc..d8369d27c1afe 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -159,6 +159,8 @@ static union trace_eval_map_item *trace_eval_maps;
 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 
 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
+static void ftrace_trace_userstack(struct ring_buffer *buffer,
+				   unsigned long flags, int pc);
 
 #define MAX_TRACER_SIZE		100
 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
@@ -2905,9 +2907,10 @@ void trace_dump_stack(int skip)
 }
 EXPORT_SYMBOL_GPL(trace_dump_stack);
 
+#ifdef CONFIG_USER_STACKTRACE_SUPPORT
 static DEFINE_PER_CPU(int, user_stack_count);
 
-void
+static void
 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
 	struct trace_event_call *call = &event_user_stack;
@@ -2958,13 +2961,12 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
  out:
 	preempt_enable();
 }
-
-#ifdef UNUSED
-static void __trace_userstack(struct trace_array *tr, unsigned long flags)
+#else /* CONFIG_USER_STACKTRACE_SUPPORT */
+static void ftrace_trace_userstack(struct ring_buffer *buffer,
+				   unsigned long flags, int pc)
 {
-	ftrace_trace_userstack(tr, flags, preempt_count());
 }
-#endif /* UNUSED */
+#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
 
 #endif /* CONFIG_STACKTRACE */
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d80cee49e0eb4..639047b259d79 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -782,17 +782,9 @@ void update_max_tr_single(struct trace_array *tr,
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
 #ifdef CONFIG_STACKTRACE
-void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
-			    int pc);
-
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 		   int pc);
 #else
-static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
-					  unsigned long flags, int pc)
-{
-}
-
 static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
 				 int skip, int pc)
 {
-- 
GitLab


From ee6dd0db4d8de41a0a0bc37d8d87a0b1623f83b0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:16 +0200
Subject: [PATCH 1699/1861] tracing: Simplify stack trace retrieval

Replace the indirection through struct stack_trace by using the storage
array based interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094803.248604594@linutronix.de
---
 kernel/trace/trace.c | 40 +++++++++++++---------------------------
 1 file changed, 13 insertions(+), 27 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d8369d27c1afe..0ce8515dd4704 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2774,22 +2774,18 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 {
 	struct trace_event_call *call = &event_kernel_stack;
 	struct ring_buffer_event *event;
+	unsigned int size, nr_entries;
 	struct ftrace_stack *fstack;
 	struct stack_entry *entry;
-	struct stack_trace trace;
-	int size = FTRACE_KSTACK_ENTRIES;
 	int stackidx;
 
-	trace.nr_entries	= 0;
-	trace.skip		= skip;
-
 	/*
 	 * Add one, for this function and the call to save_stack_trace()
 	 * If regs is set, then these functions will not be in the way.
 	 */
 #ifndef CONFIG_UNWINDER_ORC
 	if (!regs)
-		trace.skip++;
+		skip++;
 #endif
 
 	/*
@@ -2816,28 +2812,24 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	barrier();
 
 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
-	trace.entries		= fstack->calls;
-	trace.max_entries	= FTRACE_KSTACK_ENTRIES;
-
-	if (regs)
-		save_stack_trace_regs(regs, &trace);
-	else
-		save_stack_trace(&trace);
-
-	if (trace.nr_entries > size)
-		size = trace.nr_entries;
+	size = ARRAY_SIZE(fstack->calls);
 
-	size *= sizeof(unsigned long);
+	if (regs) {
+		nr_entries = stack_trace_save_regs(regs, fstack->calls,
+						   size, skip);
+	} else {
+		nr_entries = stack_trace_save(fstack->calls, size, skip);
+	}
 
+	size = nr_entries * sizeof(unsigned long);
 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
 					    sizeof(*entry) + size, flags, pc);
 	if (!event)
 		goto out;
 	entry = ring_buffer_event_data(event);
 
-	memcpy(&entry->caller, trace.entries, size);
-
-	entry->size = trace.nr_entries;
+	memcpy(&entry->caller, fstack->calls, size);
+	entry->size = nr_entries;
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
@@ -2916,7 +2908,6 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 	struct trace_event_call *call = &event_user_stack;
 	struct ring_buffer_event *event;
 	struct userstack_entry *entry;
-	struct stack_trace trace;
 
 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
 		return;
@@ -2947,12 +2938,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 	entry->tgid		= current->tgid;
 	memset(&entry->caller, 0, sizeof(entry->caller));
 
-	trace.nr_entries	= 0;
-	trace.max_entries	= FTRACE_STACK_ENTRIES;
-	trace.skip		= 0;
-	trace.entries		= entry->caller;
-
-	save_stack_trace_user(&trace);
+	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
 	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
 
-- 
GitLab


From 9f50c91b1195dfffd183d5d8505e45af86623532 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:17 +0200
Subject: [PATCH 1700/1861] tracing: Remove the last struct stack_trace usage

Simplify the stack retrieval code by using the storage array based
interface.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094803.340000461@linutronix.de
---
 kernel/trace/trace_stack.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4efda5f75a0f3..5d16f73898dbd 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -23,11 +23,7 @@
 static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES];
 static unsigned stack_trace_index[STACK_TRACE_ENTRIES];
 
-struct stack_trace stack_trace_max = {
-	.max_entries		= STACK_TRACE_ENTRIES,
-	.entries		= &stack_dump_trace[0],
-};
-
+static unsigned int stack_trace_nr_entries;
 static unsigned long stack_trace_max_size;
 static arch_spinlock_t stack_trace_max_lock =
 	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
@@ -44,10 +40,10 @@ static void print_max_stack(void)
 
 	pr_emerg("        Depth    Size   Location    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   stack_trace_max.nr_entries);
+			   stack_trace_nr_entries);
 
-	for (i = 0; i < stack_trace_max.nr_entries; i++) {
-		if (i + 1 == stack_trace_max.nr_entries)
+	for (i = 0; i < stack_trace_nr_entries; i++) {
+		if (i + 1 == stack_trace_nr_entries)
 			size = stack_trace_index[i];
 		else
 			size = stack_trace_index[i] - stack_trace_index[i+1];
@@ -93,13 +89,12 @@ static void check_stack(unsigned long ip, unsigned long *stack)
 
 	stack_trace_max_size = this_size;
 
-	stack_trace_max.nr_entries = 0;
-	stack_trace_max.skip = 0;
-
-	save_stack_trace(&stack_trace_max);
+	stack_trace_nr_entries = stack_trace_save(stack_dump_trace,
+					       ARRAY_SIZE(stack_dump_trace) - 1,
+					       0);
 
 	/* Skip over the overhead of the stack tracer itself */
-	for (i = 0; i < stack_trace_max.nr_entries; i++) {
+	for (i = 0; i < stack_trace_nr_entries; i++) {
 		if (stack_dump_trace[i] == ip)
 			break;
 	}
@@ -108,7 +103,7 @@ static void check_stack(unsigned long ip, unsigned long *stack)
 	 * Some archs may not have the passed in ip in the dump.
 	 * If that happens, we need to show everything.
 	 */
-	if (i == stack_trace_max.nr_entries)
+	if (i == stack_trace_nr_entries)
 		i = 0;
 
 	/*
@@ -126,13 +121,13 @@ static void check_stack(unsigned long ip, unsigned long *stack)
 	 * loop will only happen once. This code only takes place
 	 * on a new max, so it is far from a fast path.
 	 */
-	while (i < stack_trace_max.nr_entries) {
+	while (i < stack_trace_nr_entries) {
 		int found = 0;
 
 		stack_trace_index[x] = this_size;
 		p = start;
 
-		for (; p < top && i < stack_trace_max.nr_entries; p++) {
+		for (; p < top && i < stack_trace_nr_entries; p++) {
 			/*
 			 * The READ_ONCE_NOCHECK is used to let KASAN know that
 			 * this is not a stack-out-of-bounds error.
@@ -163,7 +158,7 @@ static void check_stack(unsigned long ip, unsigned long *stack)
 			i++;
 	}
 
-	stack_trace_max.nr_entries = x;
+	stack_trace_nr_entries = x;
 
 	if (task_stack_end_corrupted(current)) {
 		print_max_stack();
@@ -265,7 +260,7 @@ __next(struct seq_file *m, loff_t *pos)
 {
 	long n = *pos - 1;
 
-	if (n >= stack_trace_max.nr_entries)
+	if (n >= stack_trace_nr_entries)
 		return NULL;
 
 	m->private = (void *)n;
@@ -329,7 +324,7 @@ static int t_show(struct seq_file *m, void *v)
 		seq_printf(m, "        Depth    Size   Location"
 			   "    (%d entries)\n"
 			   "        -----    ----   --------\n",
-			   stack_trace_max.nr_entries);
+			   stack_trace_nr_entries);
 
 		if (!stack_tracer_enabled && !stack_trace_max_size)
 			print_disabled(m);
@@ -339,10 +334,10 @@ static int t_show(struct seq_file *m, void *v)
 
 	i = *(long *)v;
 
-	if (i >= stack_trace_max.nr_entries)
+	if (i >= stack_trace_nr_entries)
 		return 0;
 
-	if (i + 1 == stack_trace_max.nr_entries)
+	if (i + 1 == stack_trace_nr_entries)
 		size = stack_trace_index[i];
 	else
 		size = stack_trace_index[i] - stack_trace_index[i+1];
-- 
GitLab


From 25e39e32b0a3f99b9db320605f20f91d425b6a65 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:18 +0200
Subject: [PATCH 1701/1861] livepatch: Simplify stack trace retrieval

Replace the indirection through struct stack_trace by using the storage
array based interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094803.437950229@linutronix.de
---
 kernel/livepatch/transition.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 9c89ae8b337a2..c53370d596be6 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -202,15 +202,15 @@ void klp_update_patch_state(struct task_struct *task)
  * Determine whether the given stack trace includes any references to a
  * to-be-patched or to-be-unpatched function.
  */
-static int klp_check_stack_func(struct klp_func *func,
-				struct stack_trace *trace)
+static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
+				unsigned int nr_entries)
 {
 	unsigned long func_addr, func_size, address;
 	struct klp_ops *ops;
 	int i;
 
-	for (i = 0; i < trace->nr_entries; i++) {
-		address = trace->entries[i];
+	for (i = 0; i < nr_entries; i++) {
+		address = entries[i];
 
 		if (klp_target_state == KLP_UNPATCHED) {
 			 /*
@@ -254,29 +254,25 @@ static int klp_check_stack_func(struct klp_func *func,
 static int klp_check_stack(struct task_struct *task, char *err_buf)
 {
 	static unsigned long entries[MAX_STACK_ENTRIES];
-	struct stack_trace trace;
 	struct klp_object *obj;
 	struct klp_func *func;
-	int ret;
+	int ret, nr_entries;
 
-	trace.skip = 0;
-	trace.nr_entries = 0;
-	trace.max_entries = MAX_STACK_ENTRIES;
-	trace.entries = entries;
-	ret = save_stack_trace_tsk_reliable(task, &trace);
+	ret = stack_trace_save_tsk_reliable(task, entries, ARRAY_SIZE(entries));
 	WARN_ON_ONCE(ret == -ENOSYS);
-	if (ret) {
+	if (ret < 0) {
 		snprintf(err_buf, STACK_ERR_BUF_SIZE,
 			 "%s: %s:%d has an unreliable stack\n",
 			 __func__, task->comm, task->pid);
 		return ret;
 	}
+	nr_entries = ret;
 
 	klp_for_each_object(klp_transition_patch, obj) {
 		if (!obj->patched)
 			continue;
 		klp_for_each_func(obj, func) {
-			ret = klp_check_stack_func(func, &trace);
+			ret = klp_check_stack_func(func, entries, nr_entries);
 			if (ret) {
 				snprintf(err_buf, STACK_ERR_BUF_SIZE,
 					 "%s: %s:%d is sleeping on function %s\n",
-- 
GitLab


From 988ec8841ca1e22b2978fce0134d8267e838770e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:19 +0200
Subject: [PATCH 1702/1861] stacktrace: Remove obsolete functions

No more users of the struct stack_trace based interfaces. Remove them.

Remove the macro stubs for !CONFIG_STACKTRACE as well as they are pointless
because the storage on the call sites is conditional on CONFIG_STACKTRACE
already. No point to be 'smart'.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094803.524796783@linutronix.de
---
 include/linux/stacktrace.h | 17 -----------------
 kernel/stacktrace.c        | 14 --------------
 2 files changed, 31 deletions(-)

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index a24340b3e9e14..40decfbb9a241 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -36,24 +36,7 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
 					 struct stack_trace *trace);
-
-extern void print_stack_trace(struct stack_trace *trace, int spaces);
-extern int snprint_stack_trace(char *buf, size_t size,
-			struct stack_trace *trace, int spaces);
-
-#ifdef CONFIG_USER_STACKTRACE_SUPPORT
 extern void save_stack_trace_user(struct stack_trace *trace);
-#else
-# define save_stack_trace_user(trace)              do { } while (0)
-#endif
-
-#else /* !CONFIG_STACKTRACE */
-# define save_stack_trace(trace)			do { } while (0)
-# define save_stack_trace_tsk(tsk, trace)		do { } while (0)
-# define save_stack_trace_user(trace)			do { } while (0)
-# define print_stack_trace(trace, spaces)		do { } while (0)
-# define snprint_stack_trace(buf, size, trace, spaces)	do { } while (0)
-# define save_stack_trace_tsk_reliable(tsk, trace)	({ -ENOSYS; })
 #endif /* CONFIG_STACKTRACE */
 
 #if defined(CONFIG_STACKTRACE) && defined(CONFIG_HAVE_RELIABLE_STACKTRACE)
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index b38333b3bc181..dd55312f3fe9c 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -30,12 +30,6 @@ void stack_trace_print(unsigned long *entries, unsigned int nr_entries,
 }
 EXPORT_SYMBOL_GPL(stack_trace_print);
 
-void print_stack_trace(struct stack_trace *trace, int spaces)
-{
-	stack_trace_print(trace->entries, trace->nr_entries, spaces);
-}
-EXPORT_SYMBOL_GPL(print_stack_trace);
-
 /**
  * stack_trace_snprint - Print the entries in the stack trace into a buffer
  * @buf:	Pointer to the print buffer
@@ -72,14 +66,6 @@ int stack_trace_snprint(char *buf, size_t size, unsigned long *entries,
 }
 EXPORT_SYMBOL_GPL(stack_trace_snprint);
 
-int snprint_stack_trace(char *buf, size_t size,
-			struct stack_trace *trace, int spaces)
-{
-	return stack_trace_snprint(buf, size, trace->entries,
-				   trace->nr_entries, spaces);
-}
-EXPORT_SYMBOL_GPL(snprint_stack_trace);
-
 /*
  * Architectures that do not implement save_stack_trace_*()
  * get these weak aliases and once-per-bootup warnings
-- 
GitLab


From 56d8f079c51afc8b564b9fb0252d48e7b437c1e5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:20 +0200
Subject: [PATCH 1703/1861] lib/stackdepot: Remove obsolete functions

No more users of the struct stack_trace based interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Alexander Potapenko <glider@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: linux-arch@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094803.617937448@linutronix.de
---
 include/linux/stackdepot.h |  4 ----
 lib/stackdepot.c           | 20 --------------------
 2 files changed, 24 deletions(-)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 4297c6d2991d2..0805dee1b6b89 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -23,13 +23,9 @@
 
 typedef u32 depot_stack_handle_t;
 
-struct stack_trace;
-
-depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags);
 depot_stack_handle_t stack_depot_save(unsigned long *entries,
 				      unsigned int nr_entries, gfp_t gfp_flags);
 
-void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace);
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 			       unsigned long **entries);
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index e84f8e58495c1..605c61f65d94b 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -216,14 +216,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 }
 EXPORT_SYMBOL_GPL(stack_depot_fetch);
 
-void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
-{
-	unsigned int nent = stack_depot_fetch(handle, &trace->entries);
-
-	trace->max_entries = trace->nr_entries = nent;
-}
-EXPORT_SYMBOL_GPL(depot_fetch_stack);
-
 /**
  * stack_depot_save - Save a stack trace from an array
  *
@@ -318,15 +310,3 @@ fast_exit:
 	return retval;
 }
 EXPORT_SYMBOL_GPL(stack_depot_save);
-
-/**
- * depot_save_stack - save stack in a stack depot.
- * @trace - the stacktrace to save.
- * @alloc_flags - flags for allocating additional memory if required.
- */
-depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
-				      gfp_t alloc_flags)
-{
-	return stack_depot_save(trace->entries, trace->nr_entries, alloc_flags);
-}
-EXPORT_SYMBOL_GPL(depot_save_stack);
-- 
GitLab


From 214d8ca6ee854f696f75e75511fe66b409e656db Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:21 +0200
Subject: [PATCH 1704/1861] stacktrace: Provide common infrastructure

All architectures which support stacktrace carry duplicated code and
do the stack storage and filtering at the architecture side.

Provide a consolidated interface with a callback function for consuming the
stack entries provided by the architecture specific stack walker. This
removes lots of duplicated code and allows to implement better filtering
than 'skip number of entries' in the future without touching any
architecture specific code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: linux-arch@vger.kernel.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Link: https://lkml.kernel.org/r/20190425094803.713568606@linutronix.de
---
 include/linux/stacktrace.h |  39 +++++++++
 kernel/stacktrace.c        | 173 +++++++++++++++++++++++++++++++++++++
 lib/Kconfig                |   4 +
 3 files changed, 216 insertions(+)

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 40decfbb9a241..f0cfd12cb45eb 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -23,6 +23,44 @@ unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
 unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
 
 /* Internal interfaces. Do not use in generic code */
+#ifdef CONFIG_ARCH_STACKWALK
+
+/**
+ * stack_trace_consume_fn - Callback for arch_stack_walk()
+ * @cookie:	Caller supplied pointer handed back by arch_stack_walk()
+ * @addr:	The stack entry address to consume
+ * @reliable:	True when the stack entry is reliable. Required by
+ *		some printk based consumers.
+ *
+ * Return:	True, if the entry was consumed or skipped
+ *		False, if there is no space left to store
+ */
+typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
+				       bool reliable);
+/**
+ * arch_stack_walk - Architecture specific function to walk the stack
+ * @consume_entry:	Callback which is invoked by the architecture code for
+ *			each entry.
+ * @cookie:		Caller supplied pointer which is handed back to
+ *			@consume_entry
+ * @task:		Pointer to a task struct, can be NULL
+ * @regs:		Pointer to registers, can be NULL
+ *
+ * ============ ======= ============================================
+ * task	        regs
+ * ============ ======= ============================================
+ * task		NULL	Stack trace from task (can be current)
+ * current	regs	Stack trace starting on regs->stackpointer
+ * ============ ======= ============================================
+ */
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+		     struct task_struct *task, struct pt_regs *regs);
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie,
+			     struct task_struct *task);
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+			  const struct pt_regs *regs);
+
+#else /* CONFIG_ARCH_STACKWALK */
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
@@ -37,6 +75,7 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
 extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
 					 struct stack_trace *trace);
 extern void save_stack_trace_user(struct stack_trace *trace);
+#endif /* !CONFIG_ARCH_STACKWALK */
 #endif /* CONFIG_STACKTRACE */
 
 #if defined(CONFIG_STACKTRACE) && defined(CONFIG_HAVE_RELIABLE_STACKTRACE)
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index dd55312f3fe9c..27bafc1e271ee 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -5,6 +5,8 @@
  *
  *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
+#include <linux/sched/task_stack.h>
+#include <linux/sched/debug.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
@@ -66,6 +68,175 @@ int stack_trace_snprint(char *buf, size_t size, unsigned long *entries,
 }
 EXPORT_SYMBOL_GPL(stack_trace_snprint);
 
+#ifdef CONFIG_ARCH_STACKWALK
+
+struct stacktrace_cookie {
+	unsigned long	*store;
+	unsigned int	size;
+	unsigned int	skip;
+	unsigned int	len;
+};
+
+static bool stack_trace_consume_entry(void *cookie, unsigned long addr,
+				      bool reliable)
+{
+	struct stacktrace_cookie *c = cookie;
+
+	if (c->len >= c->size)
+		return false;
+
+	if (c->skip > 0) {
+		c->skip--;
+		return true;
+	}
+	c->store[c->len++] = addr;
+	return c->len < c->size;
+}
+
+static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr,
+					      bool reliable)
+{
+	if (in_sched_functions(addr))
+		return true;
+	return stack_trace_consume_entry(cookie, addr, reliable);
+}
+
+/**
+ * stack_trace_save - Save a stack trace into a storage array
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Return: Number of trace entries stored.
+ */
+unsigned int stack_trace_save(unsigned long *store, unsigned int size,
+			      unsigned int skipnr)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+		.skip	= skipnr + 1,
+	};
+
+	arch_stack_walk(consume_entry, &c, current, NULL);
+	return c.len;
+}
+EXPORT_SYMBOL_GPL(stack_trace_save);
+
+/**
+ * stack_trace_save_tsk - Save a task stack trace into a storage array
+ * @task:	The task to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Return: Number of trace entries stored.
+ */
+unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
+				  unsigned int size, unsigned int skipnr)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry_nosched;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+		.skip	= skipnr + 1,
+	};
+
+	if (!try_get_task_stack(tsk))
+		return 0;
+
+	arch_stack_walk(consume_entry, &c, tsk, NULL);
+	put_task_stack(tsk);
+	return c.len;
+}
+
+/**
+ * stack_trace_save_regs - Save a stack trace based on pt_regs into a storage array
+ * @regs:	Pointer to pt_regs to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ * @skipnr:	Number of entries to skip at the start of the stack trace
+ *
+ * Return: Number of trace entries stored.
+ */
+unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
+				   unsigned int size, unsigned int skipnr)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+		.skip	= skipnr,
+	};
+
+	arch_stack_walk(consume_entry, &c, current, regs);
+	return c.len;
+}
+
+#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
+/**
+ * stack_trace_save_tsk_reliable - Save task stack with verification
+ * @tsk:	Pointer to the task to examine
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ *
+ * Return:	An error if it detects any unreliable features of the
+ *		stack. Otherwise it guarantees that the stack trace is
+ *		reliable and returns the number of entries stored.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store,
+				  unsigned int size)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+	};
+	int ret;
+
+	/*
+	 * If the task doesn't have a stack (e.g., a zombie), the stack is
+	 * "reliably" empty.
+	 */
+	if (!try_get_task_stack(tsk))
+		return 0;
+
+	ret = arch_stack_walk_reliable(consume_entry, &c, tsk);
+	put_task_stack(tsk);
+	return ret;
+}
+#endif
+
+#ifdef CONFIG_USER_STACKTRACE_SUPPORT
+/**
+ * stack_trace_save_user - Save a user space stack trace into a storage array
+ * @store:	Pointer to storage array
+ * @size:	Size of the storage array
+ *
+ * Return: Number of trace entries stored.
+ */
+unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
+{
+	stack_trace_consume_fn consume_entry = stack_trace_consume_entry;
+	struct stacktrace_cookie c = {
+		.store	= store,
+		.size	= size,
+	};
+
+	/* Trace user stack if not a kernel thread */
+	if (!current->mm)
+		return 0;
+
+	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
+	return c.len;
+}
+#endif
+
+#else /* CONFIG_ARCH_STACKWALK */
+
 /*
  * Architectures that do not implement save_stack_trace_*()
  * get these weak aliases and once-per-bootup warnings
@@ -203,3 +374,5 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
 	return trace.nr_entries;
 }
 #endif /* CONFIG_USER_STACKTRACE_SUPPORT */
+
+#endif /* !CONFIG_ARCH_STACKWALK */
diff --git a/lib/Kconfig b/lib/Kconfig
index a9e56539bd116..e86975bfca6ab 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -597,6 +597,10 @@ config ARCH_HAS_UACCESS_FLUSHCACHE
 config ARCH_HAS_UACCESS_MCSAFE
 	bool
 
+# Temporary. Goes away when all archs are cleaned up
+config ARCH_STACKWALK
+       bool
+
 config STACKDEPOT
 	bool
 	select STACKTRACE
-- 
GitLab


From 3599fe12a125fa7118da2bcc5033d7741fb5f3a1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 25 Apr 2019 11:45:22 +0200
Subject: [PATCH 1705/1861] x86/stacktrace: Use common infrastructure

Replace the stack_trace_save*() functions with the new arch_stack_walk()
interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: linux-arch@vger.kernel.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: linux-mm@kvack.org
Cc: David Rientjes <rientjes@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: iommu@lists.linux-foundation.org
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Cc: dm-devel@redhat.com
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Link: https://lkml.kernel.org/r/20190425094803.816485461@linutronix.de
---
 arch/x86/Kconfig             |   1 +
 arch/x86/kernel/stacktrace.c | 116 ++++++-----------------------------
 2 files changed, 20 insertions(+), 97 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19c..b5978e35a8a82 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -74,6 +74,7 @@ config X86
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ACPI
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b2f706f1e0b7a..2abf27d7df6b8 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -12,75 +12,31 @@
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
 
-static int save_stack_address(struct stack_trace *trace, unsigned long addr,
-			      bool nosched)
-{
-	if (nosched && in_sched_functions(addr))
-		return 0;
-
-	if (trace->skip > 0) {
-		trace->skip--;
-		return 0;
-	}
-
-	if (trace->nr_entries >= trace->max_entries)
-		return -1;
-
-	trace->entries[trace->nr_entries++] = addr;
-	return 0;
-}
-
-static void noinline __save_stack_trace(struct stack_trace *trace,
-			       struct task_struct *task, struct pt_regs *regs,
-			       bool nosched)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+		     struct task_struct *task, struct pt_regs *regs)
 {
 	struct unwind_state state;
 	unsigned long addr;
 
-	if (regs)
-		save_stack_address(trace, regs->ip, nosched);
+	if (regs && !consume_entry(cookie, regs->ip, false))
+		return;
 
 	for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
 	     unwind_next_frame(&state)) {
 		addr = unwind_get_return_address(&state);
-		if (!addr || save_stack_address(trace, addr, nosched))
+		if (!addr || !consume_entry(cookie, addr, false))
 			break;
 	}
 }
 
 /*
- * Save stack-backtrace addresses into a stack_trace buffer.
+ * This function returns an error if it detects any unreliable features of the
+ * stack.  Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
  */
-void save_stack_trace(struct stack_trace *trace)
-{
-	trace->skip++;
-	__save_stack_trace(trace, current, NULL, false);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
-	__save_stack_trace(trace, current, regs, false);
-}
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
-	if (!try_get_task_stack(tsk))
-		return;
-
-	if (tsk == current)
-		trace->skip++;
-	__save_stack_trace(trace, tsk, NULL, true);
-
-	put_task_stack(tsk);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
-
-static int __always_inline
-__save_stack_trace_reliable(struct stack_trace *trace,
-			    struct task_struct *task)
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+			     void *cookie, struct task_struct *task)
 {
 	struct unwind_state state;
 	struct pt_regs *regs;
@@ -117,7 +73,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
 		if (!addr)
 			return -EINVAL;
 
-		if (save_stack_address(trace, addr, false))
+		if (!consume_entry(cookie, addr, false))
 			return -EINVAL;
 	}
 
@@ -132,32 +88,6 @@ __save_stack_trace_reliable(struct stack_trace *trace,
 	return 0;
 }
 
-/*
- * This function returns an error if it detects any unreliable features of the
- * stack.  Otherwise it guarantees that the stack trace is reliable.
- *
- * If the task is not 'current', the caller *must* ensure the task is inactive.
- */
-int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-				  struct stack_trace *trace)
-{
-	int ret;
-
-	/*
-	 * If the task doesn't have a stack (e.g., a zombie), the stack is
-	 * "reliably" empty.
-	 */
-	if (!try_get_task_stack(tsk))
-		return 0;
-
-	ret = __save_stack_trace_reliable(trace, tsk);
-
-	put_task_stack(tsk);
-
-	return ret;
-}
-#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
-
 /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
 
 struct stack_frame_user {
@@ -182,15 +112,15 @@ copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
 	return ret;
 }
 
-static inline void __save_stack_trace_user(struct stack_trace *trace)
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+			  const struct pt_regs *regs)
 {
-	const struct pt_regs *regs = task_pt_regs(current);
 	const void __user *fp = (const void __user *)regs->bp;
 
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = regs->ip;
+	if (!consume_entry(cookie, regs->ip, false))
+		return;
 
-	while (trace->nr_entries < trace->max_entries) {
+	while (1) {
 		struct stack_frame_user frame;
 
 		frame.next_fp = NULL;
@@ -200,8 +130,8 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
 		if ((unsigned long)fp < regs->sp)
 			break;
 		if (frame.ret_addr) {
-			trace->entries[trace->nr_entries++] =
-				frame.ret_addr;
+			if (!consume_entry(cookie, frame.ret_addr, false))
+				return;
 		}
 		if (fp == frame.next_fp)
 			break;
@@ -209,11 +139,3 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
 	}
 }
 
-void save_stack_trace_user(struct stack_trace *trace)
-{
-	/*
-	 * Trace user stack if we are not a kernel thread
-	 */
-	if (current->mm)
-		__save_stack_trace_user(trace);
-}
-- 
GitLab


From dbcdae185a704068c22984d6d05acc140ec03a8f Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Mon, 29 Apr 2019 10:27:10 +0200
Subject: [PATCH 1706/1861] Documentation: kvm: fix dirty log ioctl arch lists

KVM_GET_DIRTY_LOG is implemented by all architectures, not just x86,
and KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is additionally implemented by
arm, arm64, and mips.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 67068c47c591a..7f3ebc9e7cee8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -321,7 +321,7 @@ cpu's hardware control block.
 4.8 KVM_GET_DIRTY_LOG (vm ioctl)
 
 Capability: basic
-Architectures: x86
+Architectures: all
 Type: vm ioctl
 Parameters: struct kvm_dirty_log (in/out)
 Returns: 0 on success, -1 on error
@@ -3810,7 +3810,7 @@ to I/O ports.
 4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
 
 Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
-Architectures: x86
+Architectures: x86, arm, arm64, mips
 Type: vm ioctl
 Parameters: struct kvm_dirty_log (in)
 Returns: 0 on success, -1 on error
@@ -4799,7 +4799,7 @@ and injected exceptions.
 
 7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
 
-Architectures: all
+Architectures: x86, arm, arm64, mips
 Parameters: args[0] whether feature should be enabled or not
 
 With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
-- 
GitLab


From bc15cf701fa4875d9710f16ca4d2ce25e66ce4a0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 29 Apr 2019 14:21:11 +0100
Subject: [PATCH 1707/1861] arm64: Kconfig: Tidy up errata workaround help text

The nature of silicon errata means that the Kconfig help text for our
various software workarounds has been written by many different people.
Along the way, we've accumulated typos and inconsistencies which make
the options needlessly difficult to read.

Fix up minor issues with the help text.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4791d48571244..d28f12e9c160f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -298,7 +298,7 @@ menu "Kernel Features"
 menu "ARM errata workarounds via the alternatives framework"
 
 config ARM64_WORKAROUND_CLEAN_CACHE
-	def_bool n
+	bool
 
 config ARM64_ERRATUM_826319
 	bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted"
@@ -465,13 +465,13 @@ config ARM64_ERRATUM_1024718
 	bool "Cortex-A55: 1024718: Update of DBM/AP bits without break before make might result in incorrect update"
 	default y
 	help
-	  This option adds work around for Arm Cortex-A55 Erratum 1024718.
+	  This option adds a workaround for ARM Cortex-A55 Erratum 1024718.
 
 	  Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect
 	  update of the hardware dirty bit when the DBM/AP bits are updated
-	  without a break-before-make. The work around is to disable the usage
+	  without a break-before-make. The workaround is to disable the usage
 	  of hardware DBM locally on the affected cores. CPUs not affected by
-	  erratum will continue to use the feature.
+	  this erratum will continue to use the feature.
 
 	  If unsure, say Y.
 
@@ -480,7 +480,7 @@ config ARM64_ERRATUM_1188873
 	default y
 	select ARM_ARCH_TIMER_OOL_WORKAROUND
 	help
-	  This option adds work arounds for ARM Cortex-A76 erratum 1188873
+	  This option adds a workaround for ARM Cortex-A76 erratum 1188873.
 
 	  Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause
 	  register corruption when accessing the timer registers from
@@ -492,7 +492,7 @@ config ARM64_ERRATUM_1165522
 	bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
 	default y
 	help
-	  This option adds work arounds for ARM Cortex-A76 erratum 1165522
+	  This option adds a workaround for ARM Cortex-A76 erratum 1165522.
 
 	  Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could end-up with
 	  corrupted TLBs by speculating an AT instruction during a guest
@@ -505,7 +505,7 @@ config ARM64_ERRATUM_1286807
 	default y
 	select ARM64_WORKAROUND_REPEAT_TLBI
 	help
-	  This option adds workaround for ARM Cortex-A76 erratum 1286807
+	  This option adds a workaround for ARM Cortex-A76 erratum 1286807.
 
 	  On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual
 	  address for a cacheable mapping of a location is being
@@ -522,10 +522,10 @@ config CAVIUM_ERRATUM_22375
 	bool "Cavium erratum 22375, 24313"
 	default y
 	help
-	  Enable workaround for erratum 22375, 24313.
+	  Enable workaround for errata 22375 and 24313.
 
 	  This implements two gicv3-its errata workarounds for ThunderX. Both
-	  with small impact affecting only ITS table allocation.
+	  with a small impact affecting only ITS table allocation.
 
 	    erratum 22375: only alloc 8MB table size
 	    erratum 24313: ignore memory access type
@@ -589,9 +589,6 @@ config QCOM_FALKOR_ERRATUM_1003
 
 config ARM64_WORKAROUND_REPEAT_TLBI
 	bool
-	help
-	  Enable the repeat TLBI workaround for Falkor erratum 1009 and
-	  Cortex-A76 erratum 1286807.
 
 config QCOM_FALKOR_ERRATUM_1009
 	bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
@@ -627,7 +624,7 @@ config HISILICON_ERRATUM_161600802
 	bool "Hip07 161600802: Erroneous redistributor VLPI base"
 	default y
 	help
-	  The HiSilicon Hip07 SoC usees the wrong redistributor base
+	  The HiSilicon Hip07 SoC uses the wrong redistributor base
 	  when issued ITS commands such as VMOVP and VMAPP, and requires
 	  a 128kB offset to be applied to the target address in this commands.
 
@@ -647,7 +644,7 @@ config FUJITSU_ERRATUM_010001
 	bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly"
 	default y
 	help
-	  This option adds workaround for Fujitsu-A64FX erratum E#010001.
+	  This option adds a workaround for Fujitsu-A64FX erratum E#010001.
 	  On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory
 	  accesses may cause undefined fault (Data abort, DFSC=0b111111).
 	  This fault occurs under a specific hardware condition when a
@@ -658,7 +655,7 @@ config FUJITSU_ERRATUM_010001
 	  case-4  TTBR1_EL2 with TCR_EL2.NFD1 == 1.
 
 	  The workaround is to ensure these bits are clear in TCR_ELx.
-	  The workaround only affect the Fujitsu-A64FX.
+	  The workaround only affects the Fujitsu-A64FX.
 
 	  If unsure, say Y.
 
@@ -1408,7 +1405,7 @@ config ARM64_PSEUDO_NMI
 	help
 	  Adds support for mimicking Non-Maskable Interrupts through the use of
 	  GIC interrupt priority. This support requires version 3 or later of
-	  Arm GIC.
+	  ARM GIC.
 
 	  This high priority configuration for interrupts needs to be
 	  explicitly enabled by setting the kernel parameter
-- 
GitLab


From d671002be6bdd7f77a771e23bf3e95d1f16775e6 Mon Sep 17 00:00:00 2001
From: zhengbin <zhengbin13@huawei.com>
Date: Mon, 29 Apr 2019 20:26:31 +0800
Subject: [PATCH 1708/1861] locking/lockdep: Remove unnecessary unlikely()

DEBUG_LOCKS_WARN_ON() already contains an unlikely(), there is no need
for another one.

Signed-off-by: zhengbin <zhengbin13@huawei.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: houtao1@huawei.com
Link: http://lkml.kernel.org/r/1556540791-23110-1-git-send-email-zhengbin13@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 25ecc6d3058b1..6426d071a3242 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3256,7 +3256,7 @@ void lockdep_hardirqs_on(unsigned long ip)
 	/*
 	 * See the fine text that goes along with this variable definition.
 	 */
-	if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
+	if (DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled))
 		return;
 
 	/*
-- 
GitLab


From 38faed150438be8d6e419137209d25439e6f4c33 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Tue, 26 Mar 2019 13:57:28 -0700
Subject: [PATCH 1709/1861] ath10k: perform crash dump collection in workqueue

Commit 25733c4e67df ("ath10k: pci: use mutex for diagnostic window CE
polling") introduced a regression where we try to sleep (grab a mutex)
in an atomic context:

[  233.602619] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:254
[  233.602626] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0
[  233.602636] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W         5.1.0-rc2 #4
[  233.602642] Hardware name: Google Scarlet (DT)
[  233.602647] Call trace:
[  233.602663]  dump_backtrace+0x0/0x11c
[  233.602672]  show_stack+0x20/0x28
[  233.602681]  dump_stack+0x98/0xbc
[  233.602690]  ___might_sleep+0x154/0x16c
[  233.602696]  __might_sleep+0x78/0x88
[  233.602704]  mutex_lock+0x2c/0x5c
[  233.602717]  ath10k_pci_diag_read_mem+0x68/0x21c [ath10k_pci]
[  233.602725]  ath10k_pci_diag_read32+0x48/0x74 [ath10k_pci]
[  233.602733]  ath10k_pci_dump_registers+0x5c/0x16c [ath10k_pci]
[  233.602741]  ath10k_pci_fw_crashed_dump+0xb8/0x548 [ath10k_pci]
[  233.602749]  ath10k_pci_napi_poll+0x60/0x128 [ath10k_pci]
[  233.602757]  net_rx_action+0x140/0x388
[  233.602766]  __do_softirq+0x1b0/0x35c
[...]

ath10k_pci_fw_crashed_dump() is called from NAPI contexts, and firmware
memory dumps are retrieved using the diag memory interface.

A simple reproduction case is to run this on QCA6174A /
WLAN.RM.4.4.1-00132-QCARMSWP-1, which happens to be a way to b0rk the
firmware:

  dd if=/sys/kernel/debug/ieee80211/phy0/ath10k/mem_value bs=4K count=1
of=/dev/null

(NB: simulated firmware crashes, via debugfs, don't trigger firmware
dumps.)

The fix is to move the crash-dump into a workqueue context, and avoid
relying on 'data_lock' for most mutual exclusion. We only keep using it
here for protecting 'fw_crash_counter', while the rest of the coredump
buffers are protected by a new 'dump_mutex'.

I've tested the above with simulated firmware crashes (debugfs 'reset'
file), real firmware crashes (the 'dd' command above), and a variety of
reboot and suspend/resume configurations on QCA6174A.

Reported here:
http://lkml.kernel.org/linux-wireless/20190325202706.GA68720@google.com

Fixes: 25733c4e67df ("ath10k: pci: use mutex for diagnostic window CE polling")
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/ce.c       |  2 +-
 drivers/net/wireless/ath/ath10k/core.c     |  1 +
 drivers/net/wireless/ath/ath10k/core.h     |  3 +++
 drivers/net/wireless/ath/ath10k/coredump.c |  6 +++---
 drivers/net/wireless/ath/ath10k/pci.c      | 24 +++++++++++++++++-----
 drivers/net/wireless/ath/ath10k/pci.h      |  2 ++
 6 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c
index 24b983edb3575..eca87f7c5b6c1 100644
--- a/drivers/net/wireless/ath/ath10k/ce.c
+++ b/drivers/net/wireless/ath/ath10k/ce.c
@@ -1855,7 +1855,7 @@ void ath10k_ce_dump_registers(struct ath10k *ar,
 	struct ath10k_ce_crash_data ce_data;
 	u32 addr, id;
 
-	lockdep_assert_held(&ar->data_lock);
+	lockdep_assert_held(&ar->dump_mutex);
 
 	ath10k_err(ar, "Copy Engine register dump:\n");
 
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 835b8de92d55e..aff585658fc0f 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -3119,6 +3119,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
 		goto err_free_wq;
 
 	mutex_init(&ar->conf_mutex);
+	mutex_init(&ar->dump_mutex);
 	spin_lock_init(&ar->data_lock);
 
 	INIT_LIST_HEAD(&ar->peers);
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index e08a17b01e035..e35aae5146f10 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -1063,6 +1063,9 @@ struct ath10k {
 	/* prevents concurrent FW reconfiguration */
 	struct mutex conf_mutex;
 
+	/* protects coredump data */
+	struct mutex dump_mutex;
+
 	/* protects shared structure data */
 	spinlock_t data_lock;
 
diff --git a/drivers/net/wireless/ath/ath10k/coredump.c b/drivers/net/wireless/ath/ath10k/coredump.c
index 33838d9c1cb60..45a355fb62b93 100644
--- a/drivers/net/wireless/ath/ath10k/coredump.c
+++ b/drivers/net/wireless/ath/ath10k/coredump.c
@@ -1102,7 +1102,7 @@ struct ath10k_fw_crash_data *ath10k_coredump_new(struct ath10k *ar)
 {
 	struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data;
 
-	lockdep_assert_held(&ar->data_lock);
+	lockdep_assert_held(&ar->dump_mutex);
 
 	if (ath10k_coredump_mask == 0)
 		/* coredump disabled */
@@ -1146,7 +1146,7 @@ static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar)
 	if (!buf)
 		return NULL;
 
-	spin_lock_bh(&ar->data_lock);
+	mutex_lock(&ar->dump_mutex);
 
 	dump_data = (struct ath10k_dump_file_data *)(buf);
 	strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP",
@@ -1213,7 +1213,7 @@ static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar)
 		sofar += sizeof(*dump_tlv) + crash_data->ramdump_buf_len;
 	}
 
-	spin_unlock_bh(&ar->data_lock);
+	mutex_unlock(&ar->dump_mutex);
 
 	return dump_data;
 }
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 271f92c24d446..2c27f407a851f 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -1441,7 +1441,7 @@ static void ath10k_pci_dump_registers(struct ath10k *ar,
 	__le32 reg_dump_values[REG_DUMP_COUNT_QCA988X] = {};
 	int i, ret;
 
-	lockdep_assert_held(&ar->data_lock);
+	lockdep_assert_held(&ar->dump_mutex);
 
 	ret = ath10k_pci_diag_read_hi(ar, &reg_dump_values[0],
 				      hi_failure_state,
@@ -1656,7 +1656,7 @@ static void ath10k_pci_dump_memory(struct ath10k *ar,
 	int ret, i;
 	u8 *buf;
 
-	lockdep_assert_held(&ar->data_lock);
+	lockdep_assert_held(&ar->dump_mutex);
 
 	if (!crash_data)
 		return;
@@ -1734,14 +1734,19 @@ static void ath10k_pci_dump_memory(struct ath10k *ar,
 	}
 }
 
-static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
+static void ath10k_pci_fw_dump_work(struct work_struct *work)
 {
+	struct ath10k_pci *ar_pci = container_of(work, struct ath10k_pci,
+						 dump_work);
 	struct ath10k_fw_crash_data *crash_data;
+	struct ath10k *ar = ar_pci->ar;
 	char guid[UUID_STRING_LEN + 1];
 
-	spin_lock_bh(&ar->data_lock);
+	mutex_lock(&ar->dump_mutex);
 
+	spin_lock_bh(&ar->data_lock);
 	ar->stats.fw_crash_counter++;
+	spin_unlock_bh(&ar->data_lock);
 
 	crash_data = ath10k_coredump_new(ar);
 
@@ -1756,11 +1761,18 @@ static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
 	ath10k_ce_dump_registers(ar, crash_data);
 	ath10k_pci_dump_memory(ar, crash_data);
 
-	spin_unlock_bh(&ar->data_lock);
+	mutex_unlock(&ar->dump_mutex);
 
 	queue_work(ar->workqueue, &ar->restart_work);
 }
 
+static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+	queue_work(ar->workqueue, &ar_pci->dump_work);
+}
+
 void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
 					int force)
 {
@@ -3442,6 +3454,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar)
 	spin_lock_init(&ar_pci->ps_lock);
 	mutex_init(&ar_pci->ce_diag_mutex);
 
+	INIT_WORK(&ar_pci->dump_work, ath10k_pci_fw_dump_work);
+
 	timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0);
 
 	if (QCA_REV_6174(ar) || QCA_REV_9377(ar))
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index 3773c79f322f5..4455ed6c5275a 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -121,6 +121,8 @@ struct ath10k_pci {
 	/* For protecting ce_diag */
 	struct mutex ce_diag_mutex;
 
+	struct work_struct dump_work;
+
 	struct ath10k_ce ce;
 	struct timer_list rx_post_retry;
 
-- 
GitLab


From 9e80ad37f6788ed52b89a3cfcd593e0aa69b216d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 3 Mar 2019 18:24:33 +0100
Subject: [PATCH 1710/1861] ath10k: Drop WARN_ON()s that always trigger during
 system resume

ath10k_mac_vif_chan() always returns an error for the given vif
during system-wide resume which reliably triggers two WARN_ON()s
in ath10k_bss_info_changed() and they are not particularly
useful in that code path, so drop them.

Tested: QCA6174 hw3.2 PCI with WLAN.RM.2.0-00180-QCARMSWPZ-1
Tested: QCA6174 hw3.2 SDIO with WLAN.RMH.4.4.1-00007-QCARMSWP-1

Fixes: cd93b83ad927 ("ath10k: support for multicast rate control")
Fixes: f279294e9ee2 ("ath10k: add support for configuring management packet rate")
Cc: stable@vger.kernel.org
Reviewed-by: Brian Norris <briannorris@chromium.org>
Tested-by: Brian Norris <briannorris@chromium.org>
Tested-by: Claire Chang <tientzu@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 41e89db244d20..9c703d287333e 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -5774,7 +5774,7 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw,
 	}
 
 	if (changed & BSS_CHANGED_MCAST_RATE &&
-	    !WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def))) {
+	    !ath10k_mac_vif_chan(arvif->vif, &def)) {
 		band = def.chan->band;
 		rateidx = vif->bss_conf.mcast_rate[band] - 1;
 
@@ -5812,7 +5812,7 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw,
 	}
 
 	if (changed & BSS_CHANGED_BASIC_RATES) {
-		if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) {
+		if (ath10k_mac_vif_chan(vif, &def)) {
 			mutex_unlock(&ar->conf_mutex);
 			return;
 		}
-- 
GitLab


From f08cae2f28db24d95be5204046b60618d8de4ddc Mon Sep 17 00:00:00 2001
From: Boyang Zhou <zhouby_cn@126.com>
Date: Mon, 29 Apr 2019 15:27:19 +0100
Subject: [PATCH 1711/1861] arm64: mmap: Ensure file offset is treated as
 unsigned

The file offset argument to the arm64 sys_mmap() implementation is
scaled from bytes to pages by shifting right by PAGE_SHIFT.
Unfortunately, the offset is passed in as a signed 'off_t' type and
therefore large offsets (i.e. with the top bit set) are incorrectly
sign-extended by the shift. This has been observed to cause false mmap()
failures when mapping GPU doorbells on an arm64 server part.

Change the type of the file offset argument to sys_mmap() from 'off_t'
to 'unsigned long' so that the shifting scales the value as expected.

Cc: <stable@vger.kernel.org>
Signed-off-by: Boyang Zhou <zhouby_cn@126.com>
[will: rewrote commit message]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/sys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index b44065fb16160..6f91e81165147 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -31,7 +31,7 @@
 
 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags,
-		unsigned long, fd, off_t, off)
+		unsigned long, fd, unsigned long, off)
 {
 	if (offset_in_page(off) != 0)
 		return -EINVAL;
-- 
GitLab


From dfbd199a7cfe3e3cd8531e1353cdbd7175bfbc5e Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <paulo@paulo.ac>
Date: Sun, 24 Feb 2019 21:55:28 -0300
Subject: [PATCH 1712/1861] selinux: use kernel linux/socket.h for genheaders
 and mdp

When compiling genheaders and mdp from a newer host kernel, the
following error happens:

    In file included from scripts/selinux/genheaders/genheaders.c:18:
    ./security/selinux/include/classmap.h:238:2: error: #error New
    address family defined, please update secclass_map.  #error New
    address family defined, please update secclass_map.  ^~~~~
    make[3]: *** [scripts/Makefile.host:107:
    scripts/selinux/genheaders/genheaders] Error 1 make[2]: ***
    [scripts/Makefile.build:599: scripts/selinux/genheaders] Error 2
    make[1]: *** [scripts/Makefile.build:599: scripts/selinux] Error 2
    make[1]: *** Waiting for unfinished jobs....

Instead of relying on the host definition, include linux/socket.h in
classmap.h to have PF_MAX.

Cc: stable@vger.kernel.org
Signed-off-by: Paulo Alcantara <paulo@paulo.ac>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
[PM: manually merge in mdp.c, subject line tweaks]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 scripts/selinux/genheaders/genheaders.c | 1 -
 scripts/selinux/mdp/mdp.c               | 1 -
 security/selinux/include/classmap.h     | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c
index 1ceedea847ddf..544ca126a8a8c 100644
--- a/scripts/selinux/genheaders/genheaders.c
+++ b/scripts/selinux/genheaders/genheaders.c
@@ -9,7 +9,6 @@
 #include <string.h>
 #include <errno.h>
 #include <ctype.h>
-#include <sys/socket.h>
 
 struct security_class_mapping {
 	const char *name;
diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c
index 073fe7537f6c0..6d51b74bc679e 100644
--- a/scripts/selinux/mdp/mdp.c
+++ b/scripts/selinux/mdp/mdp.c
@@ -32,7 +32,6 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
-#include <sys/socket.h>
 
 static void usage(char *name)
 {
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index bd5fe0d3204ae..201f7e588a29d 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/capability.h>
+#include <linux/socket.h>
 
 #define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \
     "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append", "map"
-- 
GitLab


From 2f1d4e24d91b1f475f939685e19bb1e537031661 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Fri, 26 Apr 2019 10:16:36 +0800
Subject: [PATCH 1713/1861] firmware: arm_sdei: Prohibit probing in
 '_sdei_handler'

Functions called in '_sdei_handler' are needed to be marked as
'nokprobe'. Because these functions are called in NMI context and
neither the arch-code's debug infrastructure nor kprobes core supports
this.

Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/firmware/arm_sdei.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index e6376f985ef7b..9cd70d1a56221 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -165,6 +165,7 @@ static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0,
 
 	return err;
 }
+NOKPROBE_SYMBOL(invoke_sdei_fn);
 
 static struct sdei_event *sdei_event_find(u32 event_num)
 {
@@ -879,6 +880,7 @@ static void sdei_smccc_smc(unsigned long function_id,
 {
 	arm_smccc_smc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res);
 }
+NOKPROBE_SYMBOL(sdei_smccc_smc);
 
 static void sdei_smccc_hvc(unsigned long function_id,
 			   unsigned long arg0, unsigned long arg1,
@@ -887,6 +889,7 @@ static void sdei_smccc_hvc(unsigned long function_id,
 {
 	arm_smccc_hvc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res);
 }
+NOKPROBE_SYMBOL(sdei_smccc_hvc);
 
 int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb,
 		       sdei_event_callback *critical_cb)
-- 
GitLab


From 6a5c5d26c4c6c3cc486fef0bf04ff9551132611b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 Apr 2019 09:48:53 -0700
Subject: [PATCH 1714/1861] rdma: fix build errors on s390 and MIPS due to bad
 ZERO_PAGE use

The parameter to ZERO_PAGE() was wrong, but since all architectures
except for MIPS and s390 ignore it, it wasn't noticed until 0-day
reported the build error.

Fixes: 67f269b37f9b ("RDMA/ucontext: Fix regression with disassociate")
Cc: stable@vger.kernel.org
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Leon Romanovsky <leonro@mellanox.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/infiniband/core/uverbs_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 7843e89235c34..c489f545baaee 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -895,7 +895,7 @@ static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
 
 	/* Read only pages can just use the system zero page. */
 	if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
-		vmf->page = ZERO_PAGE(vmf->vm_start);
+		vmf->page = ZERO_PAGE(vmf->address);
 		get_page(vmf->page);
 		return 0;
 	}
-- 
GitLab


From 80871482fd5cb1cb396ea232237a7d9c540854f9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 Apr 2019 09:51:29 -0700
Subject: [PATCH 1715/1861] x86: make ZERO_PAGE() at least parse its argument

This doesn't really do anything, but at least we now parse teh
ZERO_PAGE() address argument so that we'll catch the most obvious errors
in usage next time they'll happen.

See commit 6a5c5d26c4c6 ("rdma: fix build errors on s390 and MIPS due to
bad ZERO_PAGE use") what happens when we don't have any use of the macro
argument at all.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2779ace16d23f..50b3e2d963c9a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -46,7 +46,7 @@ void ptdump_walk_user_pgd_level_checkwx(void);
  */
 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
 	__visible;
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
 
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;
-- 
GitLab


From 95c169251bf734aa555a1e8043e4d88ec97a04ec Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 25 Apr 2019 12:06:54 -0400
Subject: [PATCH 1716/1861] ipv6: invert flowlabel sharing check in process and
 user mode

A request for a flowlabel fails in process or user exclusive mode must
fail if the caller pid or uid does not match. Invert the test.

Previously, the test was unsafe wrt PID recycling, but indeed tested
for inequality: fl1->owner != fl->owner

Fixes: 4f82f45730c68 ("net ip6 flowlabel: Make owner a union of struct pid* and kuid_t")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index cb54a8a3c2735..a05036bc808d5 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -633,9 +633,9 @@ recheck:
 				if (fl1->share == IPV6_FL_S_EXCL ||
 				    fl1->share != fl->share ||
 				    ((fl1->share == IPV6_FL_S_PROCESS) &&
-				     (fl1->owner.pid == fl->owner.pid)) ||
+				     (fl1->owner.pid != fl->owner.pid)) ||
 				    ((fl1->share == IPV6_FL_S_USER) &&
-				     uid_eq(fl1->owner.uid, fl->owner.uid)))
+				     !uid_eq(fl1->owner.uid, fl->owner.uid)))
 					goto release;
 
 				err = -ENOMEM;
-- 
GitLab


From ca2fe2956acef2f87f6c55549874fdd2e92d9824 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 26 Apr 2019 10:10:05 -0700
Subject: [PATCH 1717/1861] tcp: add sanity tests in tcp_add_backlog()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Richard and Bruno both reported that my commit added a bug,
and Bruno was able to determine the problem came when a segment
wih a FIN packet was coalesced to a prior one in tcp backlog queue.

It turns out the header prediction in tcp_rcv_established()
looks back to TCP headers in the packet, not in the metadata
(aka TCP_SKB_CB(skb)->tcp_flags)

The fast path in tcp_rcv_established() is not supposed to
handle a FIN flag (it does not call tcp_fin())

Therefore we need to make sure to propagate the FIN flag,
so that the coalesced packet does not go through the fast path,
the same than a GRO packet carrying a FIN flag.

While we are at it, make sure we do not coalesce packets with
RST or SYN, or if they do not have ACK set.

Many thanks to Richard and Bruno for pinpointing the bad commit,
and to Richard for providing a first version of the fix.

Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Reported-by: Bruno Prémont <bonbons@sysophe.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2f8039a26b08f..a2896944aa377 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1673,7 +1673,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
 	    TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
 	    ((TCP_SKB_CB(tail)->tcp_flags |
-	      TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) ||
+	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
+	    !((TCP_SKB_CB(tail)->tcp_flags &
+	      TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
 	    ((TCP_SKB_CB(tail)->tcp_flags ^
 	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
 #ifdef CONFIG_TLS_DEVICE
@@ -1692,6 +1694,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 		if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
 			TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
 
+		/* We have to update both TCP_SKB_CB(tail)->tcp_flags and
+		 * thtail->fin, so that the fast path in tcp_rcv_established()
+		 * is not entered if we append a packet with a FIN.
+		 * SYN, RST, URG are not present.
+		 * ACK is set on both packets.
+		 * PSH : we do not really care in TCP stack,
+		 *       at least for 'GRO' packets.
+		 */
+		thtail->fin |= th->fin;
 		TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
 
 		if (TCP_SKB_CB(skb)->has_rxtstamp) {
-- 
GitLab


From 1d3fd8a10bedb09006cfc963bfcf051c3021f626 Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Sat, 27 Apr 2019 09:14:33 -0400
Subject: [PATCH 1718/1861] vrf: Use orig netdev to count Ip6InNoRoutes and a
 fresh route lookup when sending dest unreach

When there is no route to an IPv6 dest addr, skb_dst(skb) points
to loopback dev in the case of that the IP6CB(skb)->iif is
enslaved to a vrf. This causes Ip6InNoRoutes to be incremented on the
loopback dev. This also causes the lookup to fail on icmpv6_send() and
the dest unreachable to not sent and Ip6OutNoRoutes gets incremented on
the loopback dev.

To reproduce:
* Gateway configuration:
        ip link add dev vrf_258 type vrf table 258
        ip link set dev enp0s9 master vrf_258
        ip addr add 66:1/64 dev enp0s9
        ip -6 route add unreachable default metric 8192 table 258
        sysctl -w net.ipv6.conf.all.forwarding=1
        sysctl -w net.ipv6.conf.enp0s9.forwarding=1
* Sender configuration:
        ip addr add 66::2/64 dev enp0s9
        ip -6 route add default via 66::1
and ping 67::1 for example from the sender.

Fix this by counting on the original netdev and reset the skb dst to
force a fresh lookup.

v2: Fix typo of destination address in the repro steps.
v3: Simplify the loopback check (per David Ahern) and use reverse
    Christmas tree format (per David Miller).

Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Tested-by: David Ahern <dsahern@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7178e32eb15d0..b4899f0de0d06 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3668,23 +3668,34 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 
 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 {
-	int type;
 	struct dst_entry *dst = skb_dst(skb);
+	struct net *net = dev_net(dst->dev);
+	struct inet6_dev *idev;
+	int type;
+
+	if (netif_is_l3_master(skb->dev) &&
+	    dst->dev == net->loopback_dev)
+		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
+	else
+		idev = ip6_dst_idev(dst);
+
 	switch (ipstats_mib_noroutes) {
 	case IPSTATS_MIB_INNOROUTES:
 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
 		if (type == IPV6_ADDR_ANY) {
-			IP6_INC_STATS(dev_net(dst->dev),
-				      __in6_dev_get_safely(skb->dev),
-				      IPSTATS_MIB_INADDRERRORS);
+			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 			break;
 		}
 		/* FALLTHROUGH */
 	case IPSTATS_MIB_OUTNOROUTES:
-		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
-			      ipstats_mib_noroutes);
+		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
 		break;
 	}
+
+	/* Start over by dropping the dst for l3mdev case */
+	if (netif_is_l3_master(skb->dev))
+		skb_dst_drop(skb);
+
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
 	kfree_skb(skb);
 	return 0;
-- 
GitLab


From 6c0afef5fb0c27758f4d52b2210c61b6bd8b4470 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 27 Apr 2019 16:49:06 -0700
Subject: [PATCH 1719/1861] ipv6/flowlabel: wait rcu grace period before
 put_pid()

syzbot was able to catch a use-after-free read in pid_nr_ns() [1]

ip6fl_seq_show() seems to use RCU protection, dereferencing fl->owner.pid
but fl_free() releases fl->owner.pid before rcu grace period is started.

[1]

BUG: KASAN: use-after-free in pid_nr_ns+0x128/0x140 kernel/pid.c:407
Read of size 4 at addr ffff888094012a04 by task syz-executor.0/18087

CPU: 0 PID: 18087 Comm: syz-executor.0 Not tainted 5.1.0-rc6+ #89
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131
 pid_nr_ns+0x128/0x140 kernel/pid.c:407
 ip6fl_seq_show+0x2f8/0x4f0 net/ipv6/ip6_flowlabel.c:794
 seq_read+0xad3/0x1130 fs/seq_file.c:268
 proc_reg_read+0x1fe/0x2c0 fs/proc/inode.c:227
 do_loop_readv_writev fs/read_write.c:701 [inline]
 do_loop_readv_writev fs/read_write.c:688 [inline]
 do_iter_read+0x4a9/0x660 fs/read_write.c:922
 vfs_readv+0xf0/0x160 fs/read_write.c:984
 kernel_readv fs/splice.c:358 [inline]
 default_file_splice_read+0x475/0x890 fs/splice.c:413
 do_splice_to+0x12a/0x190 fs/splice.c:876
 splice_direct_to_actor+0x2d2/0x970 fs/splice.c:953
 do_splice_direct+0x1da/0x2a0 fs/splice.c:1062
 do_sendfile+0x597/0xd00 fs/read_write.c:1443
 __do_sys_sendfile64 fs/read_write.c:1498 [inline]
 __se_sys_sendfile64 fs/read_write.c:1490 [inline]
 __x64_sys_sendfile64+0x15a/0x220 fs/read_write.c:1490
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x458da9
Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f300d24bc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000028
RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 0000000000458da9
RDX: 00000000200000c0 RSI: 0000000000000008 RDI: 0000000000000007
RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000
R10: 000000000000005a R11: 0000000000000246 R12: 00007f300d24c6d4
R13: 00000000004c5fa3 R14: 00000000004da748 R15: 00000000ffffffff

Allocated by task 17543:
 save_stack+0x45/0xd0 mm/kasan/common.c:75
 set_track mm/kasan/common.c:87 [inline]
 __kasan_kmalloc mm/kasan/common.c:497 [inline]
 __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:470
 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:505
 slab_post_alloc_hook mm/slab.h:437 [inline]
 slab_alloc mm/slab.c:3393 [inline]
 kmem_cache_alloc+0x11a/0x6f0 mm/slab.c:3555
 alloc_pid+0x55/0x8f0 kernel/pid.c:168
 copy_process.part.0+0x3b08/0x7980 kernel/fork.c:1932
 copy_process kernel/fork.c:1709 [inline]
 _do_fork+0x257/0xfd0 kernel/fork.c:2226
 __do_sys_clone kernel/fork.c:2333 [inline]
 __se_sys_clone kernel/fork.c:2327 [inline]
 __x64_sys_clone+0xbf/0x150 kernel/fork.c:2327
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 7789:
 save_stack+0x45/0xd0 mm/kasan/common.c:75
 set_track mm/kasan/common.c:87 [inline]
 __kasan_slab_free+0x102/0x150 mm/kasan/common.c:459
 kasan_slab_free+0xe/0x10 mm/kasan/common.c:467
 __cache_free mm/slab.c:3499 [inline]
 kmem_cache_free+0x86/0x260 mm/slab.c:3765
 put_pid.part.0+0x111/0x150 kernel/pid.c:111
 put_pid+0x20/0x30 kernel/pid.c:105
 fl_free+0xbe/0xe0 net/ipv6/ip6_flowlabel.c:102
 ip6_fl_gc+0x295/0x3e0 net/ipv6/ip6_flowlabel.c:152
 call_timer_fn+0x190/0x720 kernel/time/timer.c:1325
 expire_timers kernel/time/timer.c:1362 [inline]
 __run_timers kernel/time/timer.c:1681 [inline]
 __run_timers kernel/time/timer.c:1649 [inline]
 run_timer_softirq+0x652/0x1700 kernel/time/timer.c:1694
 __do_softirq+0x266/0x95a kernel/softirq.c:293

The buggy address belongs to the object at ffff888094012a00
 which belongs to the cache pid_2 of size 88
The buggy address is located 4 bytes inside of
 88-byte region [ffff888094012a00, ffff888094012a58)
The buggy address belongs to the page:
page:ffffea0002500480 count:1 mapcount:0 mapping:ffff88809a483080 index:0xffff888094012980
flags: 0x1fffc0000000200(slab)
raw: 01fffc0000000200 ffffea00018a3508 ffffea0002524a88 ffff88809a483080
raw: ffff888094012980 ffff888094012000 000000010000001b 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff888094012900: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc
 ffff888094012980: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc
>ffff888094012a00: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc
                   ^
 ffff888094012a80: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc
 ffff888094012b00: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc

Fixes: 4f82f45730c6 ("net ip6 flowlabel: Make owner a union of struct pid * and kuid_t")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index a05036bc808d5..be5f3d7ceb966 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
 	return fl;
 }
 
+static void fl_free_rcu(struct rcu_head *head)
+{
+	struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
+
+	if (fl->share == IPV6_FL_S_PROCESS)
+		put_pid(fl->owner.pid);
+	kfree(fl->opt);
+	kfree(fl);
+}
+
 
 static void fl_free(struct ip6_flowlabel *fl)
 {
-	if (fl) {
-		if (fl->share == IPV6_FL_S_PROCESS)
-			put_pid(fl->owner.pid);
-		kfree(fl->opt);
-		kfree_rcu(fl, rcu);
-	}
+	if (fl)
+		call_rcu(&fl->rcu, fl_free_rcu);
 }
 
 static void fl_release(struct ip6_flowlabel *fl)
-- 
GitLab


From 9a4f26cc98d81b67ecc23b890c28e2df324e29f3 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Tue, 30 Apr 2019 10:11:44 +1000
Subject: [PATCH 1720/1861] sched/cpufreq: Fix kobject memleak

Currently the error return path from kobject_init_and_add() is not
followed by a call to kobject_put() - which means we are leaking
the kobject.

Fix it by adding a call to kobject_put() in the error path of
kobject_init_and_add().

Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tobin C. Harding <tobin@kernel.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Link: http://lkml.kernel.org/r/20190430001144.24890-1-tobin@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cpufreq_schedutil.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 5c41ea3674223..3638d2377e3c6 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -771,6 +771,7 @@ out:
 	return 0;
 
 fail:
+	kobject_put(&tunables->attr_set.kobj);
 	policy->governor_data = NULL;
 	sugov_tunables_free(tunables);
 
-- 
GitLab


From 4ebe36c94aed95de71a8ce6a6762226d31c938ee Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 30 Apr 2019 11:35:52 +0530
Subject: [PATCH 1721/1861] cpufreq: Fix kobject memleak

Currently the error return path from kobject_init_and_add() is not
followed by a call to kobject_put() - which means we are leaking the
kobject.

Fix it by adding a call to kobject_put() in the error path of
kobject_init_and_add().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c          | 1 +
 drivers/cpufreq/cpufreq_governor.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 92604afdeec44..7ea217c88c2ea 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1133,6 +1133,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
 				   cpufreq_global_kobject, "policy%u", cpu);
 	if (ret) {
 		pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret);
+		kobject_put(&policy->kobj);
 		goto err_free_real_cpus;
 	}
 
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index ffa9adeaba31b..9d1d9bf02710b 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -459,6 +459,8 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
 	/* Failure, so roll back. */
 	pr_err("initialization failed (dbs_data kobject init error %d)\n", ret);
 
+	kobject_put(&dbs_data->attr_set.kobj);
+
 	policy->governor_data = NULL;
 
 	if (!have_governor_per_policy())
-- 
GitLab


From 75a19a0202db21638a1c2b424afb867e1f9a2376 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 29 Apr 2019 17:26:22 +0100
Subject: [PATCH 1722/1861] arm64: arch_timer: Ensure counter register reads
 occur with seqlock held

When executing clock_gettime(), either in the vDSO or via a system call,
we need to ensure that the read of the counter register occurs within
the seqlock reader critical section. This ensures that updates to the
clocksource parameters (e.g. the multiplier) are consistent with the
counter value and therefore avoids the situation where time appears to
go backwards across multiple reads.

Extend the vDSO logic so that the seqlock critical section covers the
read of the counter register as well as accesses to the data page. Since
reads of the counter system registers are not ordered by memory barrier
instructions, introduce dependency ordering from the counter read to a
subsequent memory access so that the seqlock memory barriers apply to
the counter access in both the vDSO and the system call paths.

Cc: <stable@vger.kernel.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Link: https://lore.kernel.org/linux-arm-kernel/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/arch_timer.h   | 33 +++++++++++++++++++++++++--
 arch/arm64/kernel/vdso/gettimeofday.S | 15 ++++++++----
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index f2a234d6516cf..93e07512b4b61 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -148,18 +148,47 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
 	isb();
 }
 
+/*
+ * Ensure that reads of the counter are treated the same as memory reads
+ * for the purposes of ordering by subsequent memory barriers.
+ *
+ * This insanity brought to you by speculative system register reads,
+ * out-of-order memory accesses, sequence locks and Thomas Gleixner.
+ *
+ * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
+ */
+#define arch_counter_enforce_ordering(val) do {				\
+	u64 tmp, _val = (val);						\
+									\
+	asm volatile(							\
+	"	eor	%0, %1, %1\n"					\
+	"	add	%0, sp, %0\n"					\
+	"	ldr	xzr, [%0]"					\
+	: "=r" (tmp) : "r" (_val));					\
+} while (0)
+
 static inline u64 arch_counter_get_cntpct(void)
 {
+	u64 cnt;
+
 	isb();
-	return arch_timer_reg_read_stable(cntpct_el0);
+	cnt = arch_timer_reg_read_stable(cntpct_el0);
+	arch_counter_enforce_ordering(cnt);
+	return cnt;
 }
 
 static inline u64 arch_counter_get_cntvct(void)
 {
+	u64 cnt;
+
 	isb();
-	return arch_timer_reg_read_stable(cntvct_el0);
+	cnt = arch_timer_reg_read_stable(cntvct_el0);
+	arch_counter_enforce_ordering(cnt);
+	return cnt;
 }
 
+#undef arch_counter_enforce_ordering
+
 static inline int arch_timer_arch_init(void)
 {
 	return 0;
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index 21805e4164831..856fee6d35129 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -73,6 +73,13 @@ x_tmp		.req	x8
 	movn	x_tmp, #0xff00, lsl #48
 	and	\res, x_tmp, \res
 	mul	\res, \res, \mult
+	/*
+	 * Fake address dependency from the value computed from the counter
+	 * register to subsequent data page accesses so that the sequence
+	 * locking also orders the read of the counter.
+	 */
+	and	x_tmp, \res, xzr
+	add	vdso_data, vdso_data, x_tmp
 	.endm
 
 	/*
@@ -147,12 +154,12 @@ ENTRY(__kernel_gettimeofday)
 	/* w11 = cs_mono_mult, w12 = cs_shift */
 	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
 	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-	seqcnt_check fail=1b
 
 	get_nsec_per_sec res=x9
 	lsl	x9, x9, x12
 
 	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+	seqcnt_check fail=1b
 	get_ts_realtime res_sec=x10, res_nsec=x11, \
 		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
 
@@ -211,13 +218,13 @@ realtime:
 	/* w11 = cs_mono_mult, w12 = cs_shift */
 	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
 	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-	seqcnt_check fail=realtime
 
 	/* All computations are done with left-shifted nsecs. */
 	get_nsec_per_sec res=x9
 	lsl	x9, x9, x12
 
 	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+	seqcnt_check fail=realtime
 	get_ts_realtime res_sec=x10, res_nsec=x11, \
 		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
 	clock_gettime_return, shift=1
@@ -231,7 +238,6 @@ monotonic:
 	ldp	w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
 	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
 	ldp	x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
-	seqcnt_check fail=monotonic
 
 	/* All computations are done with left-shifted nsecs. */
 	lsl	x4, x4, x12
@@ -239,6 +245,7 @@ monotonic:
 	lsl	x9, x9, x12
 
 	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+	seqcnt_check fail=monotonic
 	get_ts_realtime res_sec=x10, res_nsec=x11, \
 		clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
 
@@ -253,13 +260,13 @@ monotonic_raw:
 	/* w11 = cs_raw_mult, w12 = cs_shift */
 	ldp	w12, w11, [vdso_data, #VDSO_CS_SHIFT]
 	ldp	x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
-	seqcnt_check fail=monotonic_raw
 
 	/* All computations are done with left-shifted nsecs. */
 	get_nsec_per_sec res=x9
 	lsl	x9, x9, x12
 
 	get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+	seqcnt_check fail=monotonic_raw
 	get_ts_clock_raw res_sec=x10, res_nsec=x11, \
 		clock_nsec=x15, nsec_to_sec=x9
 
-- 
GitLab


From 359db57c34af15edf35492944af4d4417f59886c Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 29 Apr 2019 18:27:13 +0100
Subject: [PATCH 1723/1861] arm64: compat: Reduce address limit for 64K pages

With the introduction of the config option that allows to enable kuser
helpers, it is now possible to reduce TASK_SIZE_32 when these are
disabled and 64K pages are enabled. This extends the compliance with
the section 6.5.8 of the C standard (C99).

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/processor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 228af56ece480..fcd0e691b1ea2 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -57,7 +57,7 @@
 #define TASK_SIZE_64		(UL(1) << vabits_user)
 
 #ifdef CONFIG_COMPAT
-#ifdef CONFIG_ARM64_64K_PAGES
+#if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS)
 /*
  * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied
  * by the compat vectors page.
-- 
GitLab


From e836673c9b4966bc78e38aeda25f7022c57f0e90 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:21 -0700
Subject: [PATCH 1724/1861] x86/alternatives: Add text_poke_kgdb() to not
 assert the lock when debugging

text_mutex is currently expected to be held before text_poke() is
called, but kgdb does not take the mutex, and instead *supposedly*
ensures the lock is not taken and will not be acquired by any other core
while text_poke() is running.

The reason for the "supposedly" comment is that it is not entirely clear
that this would be the case if gdb_do_roundup is zero.

Create two wrapper functions, text_poke() and text_poke_kgdb(), which do
or do not run the lockdep assertion respectively.

While we are at it, change the return code of text_poke() to something
meaningful. One day, callers might actually respect it and the existing
BUG_ON() when patching fails could be removed. For kgdb, the return
value can actually be used.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9222f606506c ("x86/alternatives: Lockdep-enforce text_mutex in text_poke*()")
Link: https://lkml.kernel.org/r/20190426001143.4983-2-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/text-patching.h |  1 +
 arch/x86/kernel/alternative.c        | 52 ++++++++++++++++++++--------
 arch/x86/kernel/kgdb.c               | 11 +++---
 3 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index e85ff65c43c3e..f8fc8e86cf01b 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -35,6 +35,7 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
  * inconsistent instruction while you patch.
  */
 extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
 extern int poke_int3_handler(struct pt_regs *regs);
 extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
 extern int after_bootmem;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9a79c7808f9cc..0a814d73547a1 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -679,18 +679,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
 	return addr;
 }
 
-/**
- * text_poke - Update instructions on a live kernel
- * @addr: address to modify
- * @opcode: source of the copy
- * @len: length to copy
- *
- * Only atomic text poke/set should be allowed when not doing early patching.
- * It means the size must be writable atomically and the address must be aligned
- * in a way that permits an atomic write. It also makes sure we fit on a single
- * page.
- */
-void *text_poke(void *addr, const void *opcode, size_t len)
+static void *__text_poke(void *addr, const void *opcode, size_t len)
 {
 	unsigned long flags;
 	char *vaddr;
@@ -703,8 +692,6 @@ void *text_poke(void *addr, const void *opcode, size_t len)
 	 */
 	BUG_ON(!after_bootmem);
 
-	lockdep_assert_held(&text_mutex);
-
 	if (!core_kernel_text((unsigned long)addr)) {
 		pages[0] = vmalloc_to_page(addr);
 		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -733,6 +720,43 @@ void *text_poke(void *addr, const void *opcode, size_t len)
 	return addr;
 }
 
+/**
+ * text_poke - Update instructions on a live kernel
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be aligned
+ * in a way that permits an atomic write. It also makes sure we fit on a single
+ * page.
+ */
+void *text_poke(void *addr, const void *opcode, size_t len)
+{
+	lockdep_assert_held(&text_mutex);
+
+	return __text_poke(addr, opcode, len);
+}
+
+/**
+ * text_poke_kgdb - Update instructions on a live kernel by kgdb
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be aligned
+ * in a way that permits an atomic write. It also makes sure we fit on a single
+ * page.
+ *
+ * Context: should only be used by kgdb, which ensures no other core is running,
+ *	    despite the fact it does not hold the text_mutex.
+ */
+void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
+{
+	return __text_poke(addr, opcode, len);
+}
+
 static void do_sync_core(void *info)
 {
 	sync_core();
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 4ff6b4cdb9419..2b203ee5b8796 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -759,13 +759,13 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 	if (!err)
 		return err;
 	/*
-	 * It is safe to call text_poke() because normal kernel execution
+	 * It is safe to call text_poke_kgdb() because normal kernel execution
 	 * is stopped on all cores, so long as the text_mutex is not locked.
 	 */
 	if (mutex_is_locked(&text_mutex))
 		return -EBUSY;
-	text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
-		  BREAK_INSTR_SIZE);
+	text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
+		       BREAK_INSTR_SIZE);
 	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
 	if (err)
 		return err;
@@ -784,12 +784,13 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 	if (bpt->type != BP_POKE_BREAKPOINT)
 		goto knl_write;
 	/*
-	 * It is safe to call text_poke() because normal kernel execution
+	 * It is safe to call text_poke_kgdb() because normal kernel execution
 	 * is stopped on all cores, so long as the text_mutex is not locked.
 	 */
 	if (mutex_is_locked(&text_mutex))
 		goto knl_write;
-	text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
+	text_poke_kgdb((void *)bpt->bpt_addr, bpt->saved_instr,
+		       BREAK_INSTR_SIZE);
 	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
 	if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
 		goto knl_write;
-- 
GitLab


From 5932c9fd19e6e5ac84756c5c32fe5155d9a6b458 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:42 -0700
Subject: [PATCH 1725/1861] mm/tlb: Provide default nmi_uaccess_okay()

x86 has an nmi_uaccess_okay(), but other architectures do not.
Arch-independent code might need to know whether access to user
addresses is ok in an NMI context or in other code whose execution
context is unknown.  Specifically, this function is needed for
bpf_probe_write_user().

Add a default implementation of nmi_uaccess_okay() for architectures
that do not have such a function.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-23-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/tlbflush.h | 2 ++
 include/asm-generic/tlb.h       | 9 +++++++++
 2 files changed, 11 insertions(+)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index f4204bf377fcf..e9eae3d6ef025 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -274,6 +274,8 @@ static inline bool nmi_uaccess_okay(void)
 	return true;
 }
 
+#define nmi_uaccess_okay nmi_uaccess_okay
+
 /* Initialize cr4 shadow for this CPU. */
 static inline void cr4_init_shadow(void)
 {
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 6be86c1c5c583..075b353cae86d 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -20,6 +20,15 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or switching
+ * the loaded mm.
+ */
+#ifndef nmi_uaccess_okay
+# define nmi_uaccess_okay() true
+#endif
+
 #ifdef CONFIG_MMU
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
-- 
GitLab


From c7b6f29b6257532792fc722b68fcc0e00b5a856c Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:43 -0700
Subject: [PATCH 1726/1861] bpf: Fail bpf_probe_write_user() while mm is
 switched

When using a temporary mm, bpf_probe_write_user() should not be able to
write to user memory, since user memory addresses may be used to map
kernel memory.  Detect these cases and fail bpf_probe_write_user() in
such cases.

Suggested-by: Jann Horn <jannh@google.com>
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-24-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/trace/bpf_trace.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d64c00afceb5d..94b0e37d90ef7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,6 +14,8 @@
 #include <linux/syscalls.h>
 #include <linux/error-injection.h>
 
+#include <asm/tlb.h>
+
 #include "trace_probe.h"
 #include "trace.h"
 
@@ -163,6 +165,10 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
 	 * access_ok() should prevent writing to non-user memory, but in
 	 * some situations (nommu, temporary switch, etc) access_ok() does
 	 * not provide enough validation, hence the check on KERNEL_DS.
+	 *
+	 * nmi_uaccess_okay() ensures the probe is not run in an interim
+	 * state, when the task or mm are switched. This is specifically
+	 * required to prevent the use of temporary mm.
 	 */
 
 	if (unlikely(in_interrupt() ||
@@ -170,6 +176,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
 		return -EPERM;
 	if (unlikely(uaccess_kernel()))
 		return -EPERM;
+	if (unlikely(!nmi_uaccess_okay()))
+		return -EPERM;
 	if (!access_ok(unsafe_ptr, size))
 		return -EPERM;
 
-- 
GitLab


From 1fd8de46d01d95f875c12684a6a03559831e8b4c Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:22 -0700
Subject: [PATCH 1727/1861] x86/jump_label: Use text_poke_early() during early
 init

There is no apparent reason not to use text_poke_early() during
early-init, since no patching of code that might be on the stack is done
and only a single core is running.

This is required for the next patches that would set a temporary mm for
text poking, and this mm is only initialized after some static-keys are
enabled/disabled.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-3-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/jump_label.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index f99bd26bd3f11..e7d8c636b2288 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -50,7 +50,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
 	jmp.offset = jump_entry_target(entry) -
 		     (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
 
-	if (early_boot_irqs_disabled)
+	/*
+	 * As long as only a single processor is running and the code is still
+	 * not marked as RO, text_poke_early() can be used; Checking that
+	 * system_state is SYSTEM_BOOTING guarantees it.
+	 */
+	if (system_state == SYSTEM_BOOTING)
 		poker = text_poke_early;
 
 	if (type == JUMP_LABEL_JMP) {
-- 
GitLab


From cefa929c034eb5d9c15c50088235a0093a219687 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 25 Apr 2019 17:11:23 -0700
Subject: [PATCH 1728/1861] x86/mm: Introduce temporary mm structs

Using a dedicated page-table for temporary PTEs prevents other cores
from using - even speculatively - these PTEs, thereby providing two
benefits:

(1) Security hardening: an attacker that gains kernel memory writing
    abilities cannot easily overwrite sensitive data.

(2) Avoiding TLB shootdowns: the PTEs do not need to be flushed in
    remote page-tables.

To do so a temporary mm_struct can be used. Mappings which are private
for this mm can be set in the userspace part of the address-space.
During the whole time in which the temporary mm is loaded, interrupts
must be disabled.

The first use-case for temporary mm struct, which will follow, is for
poking the kernel text.

[ Commit message was written by Nadav Amit ]

Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-4-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 33 ++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 19d18fae6ec66..24dc3b8109701 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -356,4 +356,37 @@ static inline unsigned long __get_current_cr3_fast(void)
 	return cr3;
 }
 
+typedef struct {
+	struct mm_struct *mm;
+} temp_mm_state_t;
+
+/*
+ * Using a temporary mm allows to set temporary mappings that are not accessible
+ * by other CPUs. Such mappings are needed to perform sensitive memory writes
+ * that override the kernel memory protections (e.g., W^X), without exposing the
+ * temporary page-table mappings that are required for these write operations to
+ * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
+ * mapping is torn down.
+ *
+ * Context: The temporary mm needs to be used exclusively by a single core. To
+ *          harden security IRQs must be disabled while the temporary mm is
+ *          loaded, thereby preventing interrupt handler bugs from overriding
+ *          the kernel memory protection.
+ */
+static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
+{
+	temp_mm_state_t temp_state;
+
+	lockdep_assert_irqs_disabled();
+	temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+	switch_mm_irqs_off(NULL, mm, current);
+	return temp_state;
+}
+
+static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
+{
+	lockdep_assert_irqs_disabled();
+	switch_mm_irqs_off(NULL, prev_state.mm, current);
+}
+
 #endif /* _ASM_X86_MMU_CONTEXT_H */
-- 
GitLab


From d97080ebed7811a53c931032a284166ee46b9565 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:24 -0700
Subject: [PATCH 1729/1861] x86/mm: Save debug registers when loading a
 temporary mm

Prevent user watchpoints from mistakenly firing while the temporary mm
is being used. As the addresses of the temporary mm might overlap those
of the user-process, this is necessary to prevent wrong signals or worse
things from happening.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-5-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 24dc3b8109701..93dff19633374 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
 #include <asm/mpx.h>
+#include <asm/debugreg.h>
 
 extern atomic64_t last_mm_ctx_id;
 
@@ -380,6 +381,21 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
 	lockdep_assert_irqs_disabled();
 	temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
 	switch_mm_irqs_off(NULL, mm, current);
+
+	/*
+	 * If breakpoints are enabled, disable them while the temporary mm is
+	 * used. Userspace might set up watchpoints on addresses that are used
+	 * in the temporary mm, which would lead to wrong signals being sent or
+	 * crashes.
+	 *
+	 * Note that breakpoints are not disabled selectively, which also causes
+	 * kernel breakpoints (e.g., perf's) to be disabled. This might be
+	 * undesirable, but still seems reasonable as the code that runs in the
+	 * temporary mm should be short.
+	 */
+	if (hw_breakpoint_active())
+		hw_breakpoint_disable();
+
 	return temp_state;
 }
 
@@ -387,6 +403,13 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
 {
 	lockdep_assert_irqs_disabled();
 	switch_mm_irqs_off(NULL, prev_state.mm, current);
+
+	/*
+	 * Restore the breakpoints if they were disabled before the temporary mm
+	 * was loaded.
+	 */
+	if (hw_breakpoint_active())
+		hw_breakpoint_restore();
 }
 
 #endif /* _ASM_X86_MMU_CONTEXT_H */
-- 
GitLab


From aad42dd44db086c79ca3f470ad563d2ac4ac218d Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Fri, 26 Apr 2019 16:22:44 -0700
Subject: [PATCH 1730/1861] uprobes: Initialize uprobes earlier

In order to have a separate address space for text poking, we need to
duplicate init_mm early during start_kernel(). This, however, introduces
a problem since uprobes functions are called from dup_mmap(), but
uprobes is still not initialized in this early stage.

Since uprobes initialization is necassary for fork, and since all the
dependant initialization has been done when fork is initialized (percpu
and vmalloc), move uprobes initialization to fork_init(). It does not
seem uprobes introduces any security problem for the poking_mm.

Crash and burn if uprobes initialization fails, similarly to other early
initializations. Change the init_probes() name to probes_init() to match
other early initialization functions name convention.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: ard.biesheuvel@linaro.org
Cc: deneen.t.dock@intel.com
Cc: kernel-hardening@lists.openwall.com
Cc: kristen@linux.intel.com
Cc: linux_dti@icloud.com
Cc: will.deacon@arm.com
Link: https://lkml.kernel.org/r/20190426232303.28381-6-nadav.amit@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/uprobes.h | 5 +++++
 kernel/events/uprobes.c | 8 +++-----
 kernel/fork.c           | 1 +
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 103a48a48872c..12bf0b68ed926 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -115,6 +115,7 @@ struct uprobes_state {
 	struct xol_area		*xol_area;
 };
 
+extern void __init uprobes_init(void);
 extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
 extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
 extern bool is_swbp_insn(uprobe_opcode_t *insn);
@@ -154,6 +155,10 @@ extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 struct uprobes_state {
 };
 
+static inline void uprobes_init(void)
+{
+}
+
 #define uprobe_get_trap_addr(regs)	instruction_pointer(regs)
 
 static inline int
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c5cde87329c7c..e6a0d6be87e33 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2294,16 +2294,14 @@ static struct notifier_block uprobe_exception_nb = {
 	.priority		= INT_MAX-1,	/* notified after kprobes, kgdb */
 };
 
-static int __init init_uprobes(void)
+void __init uprobes_init(void)
 {
 	int i;
 
 	for (i = 0; i < UPROBES_HASH_SZ; i++)
 		mutex_init(&uprobes_mmap_mutex[i]);
 
-	if (percpu_init_rwsem(&dup_mmap_sem))
-		return -ENOMEM;
+	BUG_ON(percpu_init_rwsem(&dup_mmap_sem));
 
-	return register_die_notifier(&uprobe_exception_nb);
+	BUG_ON(register_die_notifier(&uprobe_exception_nb));
 }
-__initcall(init_uprobes);
diff --git a/kernel/fork.c b/kernel/fork.c
index 9dcd18aa210b5..44fba5e5e9169 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -815,6 +815,7 @@ void __init fork_init(void)
 #endif
 
 	lockdep_init_task(&init_task);
+	uprobes_init();
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,
-- 
GitLab


From 13585fa0668c724efab9635aaeef6ec390217415 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:25 -0700
Subject: [PATCH 1731/1861] fork: Provide a function for copying init_mm

Provide a function for copying init_mm. This function will be later used
for setting a temporary mm.

Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-6-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/task.h |  1 +
 kernel/fork.c              | 24 ++++++++++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 2e97a2227045e..f1227f2c38a4c 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -76,6 +76,7 @@ extern void exit_itimers(struct signal_struct *);
 extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
+struct mm_struct *copy_init_mm(void);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 44fba5e5e9169..fbe9dfcd8680e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1299,13 +1299,20 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 		complete_vfork_done(tsk);
 }
 
-/*
- * Allocate a new mm structure and copy contents from the
- * mm structure of the passed in task structure.
+/**
+ * dup_mm() - duplicates an existing mm structure
+ * @tsk: the task_struct with which the new mm will be associated.
+ * @oldmm: the mm to duplicate.
+ *
+ * Allocates a new mm structure and duplicates the provided @oldmm structure
+ * content into it.
+ *
+ * Return: the duplicated mm or NULL on failure.
  */
-static struct mm_struct *dup_mm(struct task_struct *tsk)
+static struct mm_struct *dup_mm(struct task_struct *tsk,
+				struct mm_struct *oldmm)
 {
-	struct mm_struct *mm, *oldmm = current->mm;
+	struct mm_struct *mm;
 	int err;
 
 	mm = allocate_mm();
@@ -1372,7 +1379,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
 	}
 
 	retval = -ENOMEM;
-	mm = dup_mm(tsk);
+	mm = dup_mm(tsk, current->mm);
 	if (!mm)
 		goto fail_nomem;
 
@@ -2187,6 +2194,11 @@ struct task_struct *fork_idle(int cpu)
 	return task;
 }
 
+struct mm_struct *copy_init_mm(void)
+{
+	return dup_mm(NULL, &init_mm);
+}
+
 /*
  *  Ok, this is the main fork-routine.
  *
-- 
GitLab


From 4fc19708b165c1c152fa1f12f6600e66184b7786 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Fri, 26 Apr 2019 16:22:46 -0700
Subject: [PATCH 1732/1861] x86/alternatives: Initialize temporary mm for
 patching

To prevent improper use of the PTEs that are used for text patching, the
next patches will use a temporary mm struct. Initailize it by copying
the init mm.

The address that will be used for patching is taken from the lower area
that is usually used for the task memory. Doing so prevents the need to
frequently synchronize the temporary-mm (e.g., when BPF programs are
installed), since different PGDs are used for the task memory.

Finally, randomize the address of the PTEs to harden against exploits
that use these PTEs.

Suggested-by: Andy Lutomirski <luto@kernel.org>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: ard.biesheuvel@linaro.org
Cc: deneen.t.dock@intel.com
Cc: kernel-hardening@lists.openwall.com
Cc: kristen@linux.intel.com
Cc: linux_dti@icloud.com
Cc: will.deacon@arm.com
Link: https://lkml.kernel.org/r/20190426232303.28381-8-nadav.amit@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pgtable.h       |  3 +++
 arch/x86/include/asm/text-patching.h |  2 ++
 arch/x86/kernel/alternative.c        |  3 +++
 arch/x86/mm/init.c                   | 37 ++++++++++++++++++++++++++++
 init/main.c                          |  3 +++
 5 files changed, 48 insertions(+)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2779ace16d23f..702db59047536 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1021,6 +1021,9 @@ static inline void __meminit init_trampoline_default(void)
 	/* Default trampoline pgd value */
 	trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
 }
+
+void __init poking_init(void);
+
 # ifdef CONFIG_RANDOMIZE_MEMORY
 void __meminit init_trampoline(void);
 # else
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index f8fc8e86cf01b..a75eed841eedc 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -39,5 +39,7 @@ extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
 extern int poke_int3_handler(struct pt_regs *regs);
 extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
 extern int after_bootmem;
+extern __ro_after_init struct mm_struct *poking_mm;
+extern __ro_after_init unsigned long poking_addr;
 
 #endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 0a814d73547a1..11d5c710a94fd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -679,6 +679,9 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
 	return addr;
 }
 
+__ro_after_init struct mm_struct *poking_mm;
+__ro_after_init unsigned long poking_addr;
+
 static void *__text_poke(void *addr, const void *opcode, size_t len)
 {
 	unsigned long flags;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 8dacdb96899ec..fd10d91a61152 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -6,6 +6,7 @@
 #include <linux/swapfile.h>
 #include <linux/swapops.h>
 #include <linux/kmemleak.h>
+#include <linux/sched/task.h>
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
@@ -23,6 +24,7 @@
 #include <asm/hypervisor.h>
 #include <asm/cpufeature.h>
 #include <asm/pti.h>
+#include <asm/text-patching.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -701,6 +703,41 @@ void __init init_mem_mapping(void)
 	early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
 }
 
+/*
+ * Initialize an mm_struct to be used during poking and a pointer to be used
+ * during patching.
+ */
+void __init poking_init(void)
+{
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	poking_mm = copy_init_mm();
+	BUG_ON(!poking_mm);
+
+	/*
+	 * Randomize the poking address, but make sure that the following page
+	 * will be mapped at the same PMD. We need 2 pages, so find space for 3,
+	 * and adjust the address if the PMD ends after the first one.
+	 */
+	poking_addr = TASK_UNMAPPED_BASE;
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+		poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
+			(TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
+
+	if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
+		poking_addr += PAGE_SIZE;
+
+	/*
+	 * We need to trigger the allocation of the page-tables that will be
+	 * needed for poking now. Later, poking may be performed in an atomic
+	 * section, which might cause allocation to fail.
+	 */
+	ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+	BUG_ON(!ptep);
+	pte_unmap_unlock(ptep, ptl);
+}
+
 /*
  * devmem_is_allowed() checks to see if /dev/mem access to a certain address
  * is valid. The argument is a physical page number.
diff --git a/init/main.c b/init/main.c
index 7d4025d665eb9..95dd9406ee31e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -504,6 +504,8 @@ void __init __weak thread_stack_cache_init(void)
 
 void __init __weak mem_encrypt_init(void) { }
 
+void __init __weak poking_init(void) { }
+
 bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
@@ -737,6 +739,7 @@ asmlinkage __visible void __init start_kernel(void)
 	taskstats_init_early();
 	delayacct_init();
 
+	poking_init();
 	check_bugs();
 
 	acpi_subsystem_init();
-- 
GitLab


From b3fd8e83ada0d51b71a84297480187e2d40e5ded Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:27 -0700
Subject: [PATCH 1733/1861] x86/alternatives: Use temporary mm for text poking

text_poke() can potentially compromise security as it sets temporary
PTEs in the fixmap. These PTEs might be used to rewrite the kernel code
from other cores accidentally or maliciously, if an attacker gains the
ability to write onto kernel memory.

Moreover, since remote TLBs are not flushed after the temporary PTEs are
removed, the time-window in which the code is writable is not limited if
the fixmap PTEs - maliciously or accidentally - are cached in the TLB.
To address these potential security hazards, use a temporary mm for
patching the code.

Finally, text_poke() is also not conservative enough when mapping pages,
as it always tries to map 2 pages, even when a single one is sufficient.
So try to be more conservative, and do not map more than needed.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-8-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fixmap.h |   2 -
 arch/x86/kernel/alternative.c | 108 +++++++++++++++++++++++++++-------
 arch/x86/xen/mmu_pv.c         |   2 -
 3 files changed, 86 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 50ba74a34a37c..9da8cccdf3fb5 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -103,8 +103,6 @@ enum fixed_addresses {
 #ifdef CONFIG_PARAVIRT
 	FIX_PARAVIRT_BOOTMAP,
 #endif
-	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
-	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 11d5c710a94fd..599203876c32f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kdebug.h>
 #include <linux/kprobes.h>
+#include <linux/mmu_context.h>
 #include <asm/text-patching.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
@@ -684,41 +685,104 @@ __ro_after_init unsigned long poking_addr;
 
 static void *__text_poke(void *addr, const void *opcode, size_t len)
 {
+	bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
+	struct page *pages[2] = {NULL};
+	temp_mm_state_t prev;
 	unsigned long flags;
-	char *vaddr;
-	struct page *pages[2];
-	int i;
+	pte_t pte, *ptep;
+	spinlock_t *ptl;
+	pgprot_t pgprot;
 
 	/*
-	 * While boot memory allocator is runnig we cannot use struct
-	 * pages as they are not yet initialized.
+	 * While boot memory allocator is running we cannot use struct pages as
+	 * they are not yet initialized. There is no way to recover.
 	 */
 	BUG_ON(!after_bootmem);
 
 	if (!core_kernel_text((unsigned long)addr)) {
 		pages[0] = vmalloc_to_page(addr);
-		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
+		if (cross_page_boundary)
+			pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 	} else {
 		pages[0] = virt_to_page(addr);
 		WARN_ON(!PageReserved(pages[0]));
-		pages[1] = virt_to_page(addr + PAGE_SIZE);
+		if (cross_page_boundary)
+			pages[1] = virt_to_page(addr + PAGE_SIZE);
 	}
-	BUG_ON(!pages[0]);
+	/*
+	 * If something went wrong, crash and burn since recovery paths are not
+	 * implemented.
+	 */
+	BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
+
 	local_irq_save(flags);
-	set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
-	if (pages[1])
-		set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
-	vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
-	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
-	clear_fixmap(FIX_TEXT_POKE0);
-	if (pages[1])
-		clear_fixmap(FIX_TEXT_POKE1);
-	local_flush_tlb();
-	sync_core();
-	/* Could also do a CLFLUSH here to speed up CPU recovery; but
-	   that causes hangs on some VIA CPUs. */
-	for (i = 0; i < len; i++)
-		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
+
+	/*
+	 * Map the page without the global bit, as TLB flushing is done with
+	 * flush_tlb_mm_range(), which is intended for non-global PTEs.
+	 */
+	pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
+
+	/*
+	 * The lock is not really needed, but this allows to avoid open-coding.
+	 */
+	ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+
+	/*
+	 * This must not fail; preallocated in poking_init().
+	 */
+	VM_BUG_ON(!ptep);
+
+	pte = mk_pte(pages[0], pgprot);
+	set_pte_at(poking_mm, poking_addr, ptep, pte);
+
+	if (cross_page_boundary) {
+		pte = mk_pte(pages[1], pgprot);
+		set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
+	}
+
+	/*
+	 * Loading the temporary mm behaves as a compiler barrier, which
+	 * guarantees that the PTE will be set at the time memcpy() is done.
+	 */
+	prev = use_temporary_mm(poking_mm);
+
+	kasan_disable_current();
+	memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
+	kasan_enable_current();
+
+	/*
+	 * Ensure that the PTE is only cleared after the instructions of memcpy
+	 * were issued by using a compiler barrier.
+	 */
+	barrier();
+
+	pte_clear(poking_mm, poking_addr, ptep);
+	if (cross_page_boundary)
+		pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
+
+	/*
+	 * Loading the previous page-table hierarchy requires a serializing
+	 * instruction that already allows the core to see the updated version.
+	 * Xen-PV is assumed to serialize execution in a similar manner.
+	 */
+	unuse_temporary_mm(prev);
+
+	/*
+	 * Flushing the TLB might involve IPIs, which would require enabled
+	 * IRQs, but not if the mm is not used, as it is in this point.
+	 */
+	flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
+			   (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
+			   PAGE_SHIFT, false);
+
+	/*
+	 * If the text does not match what we just wrote then something is
+	 * fundamentally screwy; there's nothing we can really do about that.
+	 */
+	BUG_ON(memcmp(addr, opcode, len));
+
+	pte_unmap_unlock(ptep, ptl);
 	local_irq_restore(flags);
 	return addr;
 }
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a21e1734fc1f0..beb44e22afdf7 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2318,8 +2318,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #elif defined(CONFIG_X86_VSYSCALL_EMULATION)
 	case VSYSCALL_PAGE:
 #endif
-	case FIX_TEXT_POKE0:
-	case FIX_TEXT_POKE1:
 		/* All local page mappings */
 		pte = pfn_pte(phys, prot);
 		break;
-- 
GitLab


From 86a22057127d1c0462a18901421bf1ff89491392 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:28 -0700
Subject: [PATCH 1734/1861] x86/kgdb: Avoid redundant comparison of patched
 code

text_poke() already ensures that the written value is the correct one
and fails if that is not the case. There is no need for an additional
comparison. Remove it.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-9-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kgdb.c | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 2b203ee5b8796..13b13311b7925 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -747,7 +747,6 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
 	int err;
-	char opc[BREAK_INSTR_SIZE];
 
 	bpt->type = BP_BREAKPOINT;
 	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -766,11 +765,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 		return -EBUSY;
 	text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
 		       BREAK_INSTR_SIZE);
-	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
-	if (err)
-		return err;
-	if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
-		return -EINVAL;
 	bpt->type = BP_POKE_BREAKPOINT;
 
 	return err;
@@ -778,9 +772,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 
 int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 {
-	int err;
-	char opc[BREAK_INSTR_SIZE];
-
 	if (bpt->type != BP_POKE_BREAKPOINT)
 		goto knl_write;
 	/*
@@ -791,10 +782,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 		goto knl_write;
 	text_poke_kgdb((void *)bpt->bpt_addr, bpt->saved_instr,
 		       BREAK_INSTR_SIZE);
-	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
-	if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
-		goto knl_write;
-	return err;
+	return 0;
 
 knl_write:
 	return probe_kernel_write((char *)bpt->bpt_addr,
-- 
GitLab


From 3c0dab44e22782359a0a706cbce72de99a22aa75 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:29 -0700
Subject: [PATCH 1735/1861] x86/ftrace: Set trampoline pages as executable

Since alloc_module() will not set the pages as executable soon, set
ftrace trampoline pages as executable after they are allocated.

For the time being, do not change ftrace to use the text_poke()
interface. As a result, ftrace still breaks W^X.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-10-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/ftrace.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ef49517f6bb24..53ba1aa3a01f8 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -730,6 +730,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	unsigned long end_offset;
 	unsigned long op_offset;
 	unsigned long offset;
+	unsigned long npages;
 	unsigned long size;
 	unsigned long retq;
 	unsigned long *ptr;
@@ -762,6 +763,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 		return 0;
 
 	*tramp_size = size + RET_SIZE + sizeof(void *);
+	npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
 
 	/* Copy ftrace_caller onto the trampoline memory */
 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
@@ -806,6 +808,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	/* ALLOC_TRAMP flags lets us know we created it */
 	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
 
+	/*
+	 * Module allocation needs to be completed by making the page
+	 * executable. The page is still writable, which is a security hazard,
+	 * but anyhow ftrace breaks W^X completely.
+	 */
+	set_memory_x((unsigned long)trampoline, npages);
 	return (unsigned long)trampoline;
 fail:
 	tramp_free(trampoline, *tramp_size);
-- 
GitLab


From 7298e24f904224fa79eb8fd7e0fbd78950ccf2db Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:30 -0700
Subject: [PATCH 1736/1861] x86/kprobes: Set instruction page as executable

Set the page as executable after allocation.  This patch is a
preparatory patch for a following patch that makes module allocated
pages non-executable.

While at it, do some small cleanup of what appears to be unnecessary
masking.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-11-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/core.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index fed46ddb1eef2..06058c44ab57b 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -431,8 +431,20 @@ void *alloc_insn_page(void)
 	void *page;
 
 	page = module_alloc(PAGE_SIZE);
-	if (page)
-		set_memory_ro((unsigned long)page & PAGE_MASK, 1);
+	if (!page)
+		return NULL;
+
+	/*
+	 * First make the page read-only, and only then make it executable to
+	 * prevent it from being W+X in between.
+	 */
+	set_memory_ro((unsigned long)page, 1);
+
+	/*
+	 * TODO: Once additional kernel code protection mechanisms are set, ensure
+	 * that the page was not maliciously altered and it is still zeroed.
+	 */
+	set_memory_x((unsigned long)page, 1);
 
 	return page;
 }
@@ -440,8 +452,12 @@ void *alloc_insn_page(void)
 /* Recover page to RW mode before releasing it */
 void free_insn_page(void *page)
 {
-	set_memory_nx((unsigned long)page & PAGE_MASK, 1);
-	set_memory_rw((unsigned long)page & PAGE_MASK, 1);
+	/*
+	 * First make the page non-executable, and only then make it writable to
+	 * prevent it from being W+X in between.
+	 */
+	set_memory_nx((unsigned long)page, 1);
+	set_memory_rw((unsigned long)page, 1);
 	module_memfree(page);
 }
 
-- 
GitLab


From f2c65fb3221adc6b73b0549fc7ba892022db9797 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:31 -0700
Subject: [PATCH 1737/1861] x86/modules: Avoid breaking W^X while loading
 modules

When modules and BPF filters are loaded, there is a time window in
which some memory is both writable and executable. An attacker that has
already found another vulnerability (e.g., a dangling pointer) might be
able to exploit this behavior to overwrite kernel code. Prevent having
writable executable PTEs in this stage.

In addition, avoiding having W+X mappings can also slightly simplify the
patching of modules code on initialization (e.g., by alternatives and
static-key), as would be done in the next patch. This was actually the
main motivation for this patch.

To avoid having W+X mappings, set them initially as RW (NX) and after
they are set as RO set them as X as well. Setting them as executable is
done as a separate step to avoid one core in which the old PTE is cached
(hence writable), and another which sees the updated PTE (executable),
which would break the W^X protection.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Link: https://lkml.kernel.org/r/20190426001143.4983-12-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/alternative.c | 28 +++++++++++++++++++++-------
 arch/x86/kernel/module.c      |  2 +-
 include/linux/filter.h        |  1 +
 kernel/module.c               |  5 +++++
 4 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 599203876c32f..3d2b6b6fb20c4 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -668,15 +668,29 @@ void __init alternative_instructions(void)
  * handlers seeing an inconsistent instruction while you patch.
  */
 void *__init_or_module text_poke_early(void *addr, const void *opcode,
-					      size_t len)
+				       size_t len)
 {
 	unsigned long flags;
-	local_irq_save(flags);
-	memcpy(addr, opcode, len);
-	local_irq_restore(flags);
-	sync_core();
-	/* Could also do a CLFLUSH here to speed up CPU recovery; but
-	   that causes hangs on some VIA CPUs. */
+
+	if (boot_cpu_has(X86_FEATURE_NX) &&
+	    is_module_text_address((unsigned long)addr)) {
+		/*
+		 * Modules text is marked initially as non-executable, so the
+		 * code cannot be running and speculative code-fetches are
+		 * prevented. Just change the code.
+		 */
+		memcpy(addr, opcode, len);
+	} else {
+		local_irq_save(flags);
+		memcpy(addr, opcode, len);
+		local_irq_restore(flags);
+		sync_core();
+
+		/*
+		 * Could also do a CLFLUSH here to speed up CPU recovery; but
+		 * that causes hangs on some VIA CPUs.
+		 */
+	}
 	return addr;
 }
 
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b052e883dd8cc..cfa3106faee42 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -87,7 +87,7 @@ void *module_alloc(unsigned long size)
 	p = __vmalloc_node_range(size, MODULE_ALIGN,
 				    MODULES_VADDR + get_module_load_offset(),
 				    MODULES_END, GFP_KERNEL,
-				    PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+				    PAGE_KERNEL, 0, NUMA_NO_NODE,
 				    __builtin_return_address(0));
 	if (p && (kasan_module_alloc(p, size) < 0)) {
 		vfree(p);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6074aa064b540..14ec3bdad9a90 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -746,6 +746,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
 	set_memory_ro((unsigned long)hdr, hdr->pages);
+	set_memory_x((unsigned long)hdr, hdr->pages);
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
diff --git a/kernel/module.c b/kernel/module.c
index 0b9aa8ab89f08..2b2845ae983ed 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1950,8 +1950,13 @@ void module_enable_ro(const struct module *mod, bool after_init)
 		return;
 
 	frob_text(&mod->core_layout, set_memory_ro);
+	frob_text(&mod->core_layout, set_memory_x);
+
 	frob_rodata(&mod->core_layout, set_memory_ro);
+
 	frob_text(&mod->init_layout, set_memory_ro);
+	frob_text(&mod->init_layout, set_memory_x);
+
 	frob_rodata(&mod->init_layout, set_memory_ro);
 
 	if (after_init)
-- 
GitLab


From bb0a008d6a2c543efc11313b448d2f26f91dc4f8 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:32 -0700
Subject: [PATCH 1738/1861] x86/jump-label: Remove support for custom text
 poker

There are only two types of text poking: early and breakpoint based. The use
of a function pointer to perform text poking complicates the code and is
probably inefficient due to the use of indirect branches.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-13-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/jump_label.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e7d8c636b2288..e631c358f7f4a 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -37,7 +37,6 @@ static void bug_at(unsigned char *ip, int line)
 
 static void __ref __jump_label_transform(struct jump_entry *entry,
 					 enum jump_label_type type,
-					 void *(*poker)(void *, const void *, size_t),
 					 int init)
 {
 	union jump_code_union jmp;
@@ -50,14 +49,6 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
 	jmp.offset = jump_entry_target(entry) -
 		     (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
 
-	/*
-	 * As long as only a single processor is running and the code is still
-	 * not marked as RO, text_poke_early() can be used; Checking that
-	 * system_state is SYSTEM_BOOTING guarantees it.
-	 */
-	if (system_state == SYSTEM_BOOTING)
-		poker = text_poke_early;
-
 	if (type == JUMP_LABEL_JMP) {
 		if (init) {
 			expect = default_nop; line = __LINE__;
@@ -80,16 +71,19 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
 		bug_at((void *)jump_entry_code(entry), line);
 
 	/*
-	 * Make text_poke_bp() a default fallback poker.
+	 * As long as only a single processor is running and the code is still
+	 * not marked as RO, text_poke_early() can be used; Checking that
+	 * system_state is SYSTEM_BOOTING guarantees it. It will be set to
+	 * SYSTEM_SCHEDULING before other cores are awaken and before the
+	 * code is write-protected.
 	 *
 	 * At the time the change is being done, just ignore whether we
 	 * are doing nop -> jump or jump -> nop transition, and assume
 	 * always nop being the 'currently valid' instruction
-	 *
 	 */
-	if (poker) {
-		(*poker)((void *)jump_entry_code(entry), code,
-			 JUMP_LABEL_NOP_SIZE);
+	if (init || system_state == SYSTEM_BOOTING) {
+		text_poke_early((void *)jump_entry_code(entry), code,
+				JUMP_LABEL_NOP_SIZE);
 		return;
 	}
 
@@ -101,7 +95,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 			       enum jump_label_type type)
 {
 	mutex_lock(&text_mutex);
-	__jump_label_transform(entry, type, NULL, 0);
+	__jump_label_transform(entry, type, 0);
 	mutex_unlock(&text_mutex);
 }
 
@@ -131,5 +125,5 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
 			jlstate = JL_STATE_NO_UPDATE;
 	}
 	if (jlstate == JL_STATE_UPDATE)
-		__jump_label_transform(entry, type, text_poke_early, 1);
+		__jump_label_transform(entry, type, 1);
 }
-- 
GitLab


From 0a203df5cf0eb709be4f190314e262b72d7e5b76 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:33 -0700
Subject: [PATCH 1739/1861] x86/alternatives: Remove the return value of
 text_poke_*()

The return value of text_poke_early() and text_poke_bp() is useless.
Remove it.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-14-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/text-patching.h |  4 ++--
 arch/x86/kernel/alternative.c        | 11 ++++-------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index a75eed841eedc..c90678fd391a4 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,7 +18,7 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 #define __parainstructions_end	NULL
 #endif
 
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+extern void text_poke_early(void *addr, const void *opcode, size_t len);
 
 /*
  * Clear and restore the kernel write-protection flag on the local CPU.
@@ -37,7 +37,7 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
 extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
 extern int after_bootmem;
 extern __ro_after_init struct mm_struct *poking_mm;
 extern __ro_after_init unsigned long poking_addr;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3d2b6b6fb20c4..18f959975ea0c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -265,7 +265,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
-void *text_poke_early(void *addr, const void *opcode, size_t len);
+void text_poke_early(void *addr, const void *opcode, size_t len);
 
 /*
  * Are we looking at a near JMP with a 1 or 4-byte displacement.
@@ -667,8 +667,8 @@ void __init alternative_instructions(void)
  * instructions. And on the local CPU you need to be protected again NMI or MCE
  * handlers seeing an inconsistent instruction while you patch.
  */
-void *__init_or_module text_poke_early(void *addr, const void *opcode,
-				       size_t len)
+void __init_or_module text_poke_early(void *addr, const void *opcode,
+				      size_t len)
 {
 	unsigned long flags;
 
@@ -691,7 +691,6 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
 		 * that causes hangs on some VIA CPUs.
 		 */
 	}
-	return addr;
 }
 
 __ro_after_init struct mm_struct *poking_mm;
@@ -893,7 +892,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
  *	  replacing opcode
  *	- sync cores
  */
-void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 {
 	unsigned char int3 = 0xcc;
 
@@ -935,7 +934,5 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 	 * the writing of the new instruction.
 	 */
 	bp_patching_in_progress = false;
-
-	return addr;
 }
 
-- 
GitLab


From d253ca0c3865a8d9a8c01143cf20425e0be4d0ce Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 25 Apr 2019 17:11:34 -0700
Subject: [PATCH 1740/1861] x86/mm/cpa: Add set_direct_map_*() functions

Add two new functions set_direct_map_default_noflush() and
set_direct_map_invalid_noflush() for setting the direct map alias for the
page to its default valid permissions and to an invalid state that cannot
be cached in a TLB, respectively. These functions do not flush the TLB.

Note, __kernel_map_pages() does something similar but flushes the TLB and
doesn't reset the permission bits to default on all architectures.

Also add an ARCH config ARCH_HAS_SET_DIRECT_MAP for specifying whether
these have an actual implementation or a default empty one.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-15-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig                      |  4 ++++
 arch/x86/Kconfig                  |  1 +
 arch/x86/include/asm/set_memory.h |  3 +++
 arch/x86/mm/pageattr.c            | 14 +++++++++++---
 include/linux/set_memory.h        | 11 +++++++++++
 5 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 33687dddd86a7..88e5a92a9e587 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -249,6 +249,10 @@ config ARCH_HAS_FORTIFY_SOURCE
 config ARCH_HAS_SET_MEMORY
 	bool
 
+# Select if arch has all set_direct_map_invalid/default() functions
+config ARCH_HAS_SET_DIRECT_MAP
+	bool
+
 # Select if arch init_task must go in the __init_task_data section
 config ARCH_TASK_STRUCT_ON_STACK
        bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bd6f93ce06332..cee3f22ce8d19 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -65,6 +65,7 @@ config X86
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
 	select ARCH_HAS_UACCESS_MCSAFE		if X86_64 && X86_MCE
 	select ARCH_HAS_SET_MEMORY
+	select ARCH_HAS_SET_DIRECT_MAP
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 07a25753e85c5..ae7b909dc242d 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -85,6 +85,9 @@ int set_pages_nx(struct page *page, int numpages);
 int set_pages_ro(struct page *page, int numpages);
 int set_pages_rw(struct page *page, int numpages);
 
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+
 extern int kernel_set_to_readonly;
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4c570612e24ee..3574550192c60 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -2209,8 +2209,6 @@ int set_pages_rw(struct page *page, int numpages)
 	return set_memory_rw(addr, numpages);
 }
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-
 static int __set_pages_p(struct page *page, int numpages)
 {
 	unsigned long tempaddr = (unsigned long) page_address(page);
@@ -2249,6 +2247,17 @@ static int __set_pages_np(struct page *page, int numpages)
 	return __change_page_attr_set_clr(&cpa, 0);
 }
 
+int set_direct_map_invalid_noflush(struct page *page)
+{
+	return __set_pages_np(page, 1);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+	return __set_pages_p(page, 1);
+}
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
 void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (PageHighMem(page))
@@ -2282,7 +2291,6 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 }
 
 #ifdef CONFIG_HIBERNATION
-
 bool kernel_page_present(struct page *page)
 {
 	unsigned int level;
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index 2a986d282a975..b5071497b8cbe 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,6 +17,17 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+#ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP
+static inline int set_direct_map_invalid_noflush(struct page *page)
+{
+	return 0;
+}
+static inline int set_direct_map_default_noflush(struct page *page)
+{
+	return 0;
+}
+#endif
+
 #ifndef set_mce_nospec
 static inline int set_mce_nospec(unsigned long pfn)
 {
-- 
GitLab


From d63326928611600ad65baff54a70f53b02b3cdfe Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 25 Apr 2019 17:11:35 -0700
Subject: [PATCH 1741/1861] mm/hibernation: Make hibernation handle unmapped
 pages

Make hibernate handle unmapped pages on the direct map when
CONFIG_ARCH_HAS_SET_ALIAS=y is set. These functions allow for setting pages
to invalid configurations, so now hibernate should check if the pages have
valid mappings and handle if they are unmapped when doing a hibernate
save operation.

Previously this checking was already done when CONFIG_DEBUG_PAGEALLOC=y
was configured. It does not appear to have a big hibernating performance
impact. The speed of the saving operation before this change was measured
as 819.02 MB/s, and after was measured at 813.32 MB/s.

Before:
[    4.670938] PM: Wrote 171996 kbytes in 0.21 seconds (819.02 MB/s)

After:
[    4.504714] PM: Wrote 178932 kbytes in 0.22 seconds (813.32 MB/s)

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-16-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pageattr.c  |  4 ----
 include/linux/mm.h      | 18 ++++++------------
 kernel/power/snapshot.c |  5 +++--
 mm/page_alloc.c         |  7 +++++--
 4 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3574550192c60..daf4d645e5370 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -2257,7 +2257,6 @@ int set_direct_map_default_noflush(struct page *page)
 	return __set_pages_p(page, 1);
 }
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
 void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (PageHighMem(page))
@@ -2302,11 +2301,8 @@ bool kernel_page_present(struct page *page)
 	pte = lookup_address((unsigned long)page_address(page), &level);
 	return (pte_val(*pte) & _PAGE_PRESENT);
 }
-
 #endif /* CONFIG_HIBERNATION */
 
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
 int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
 				   unsigned numpages, unsigned long page_flags)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6b10c21630f54..083d7b4863eda 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2610,37 +2610,31 @@ static inline void kernel_poison_pages(struct page *page, int numpages,
 					int enable) { }
 #endif
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
 extern bool _debug_pagealloc_enabled;
-extern void __kernel_map_pages(struct page *page, int numpages, int enable);
 
 static inline bool debug_pagealloc_enabled(void)
 {
-	return _debug_pagealloc_enabled;
+	return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && _debug_pagealloc_enabled;
 }
 
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
+extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable)
 {
-	if (!debug_pagealloc_enabled())
-		return;
-
 	__kernel_map_pages(page, numpages, enable);
 }
 #ifdef CONFIG_HIBERNATION
 extern bool kernel_page_present(struct page *page);
 #endif	/* CONFIG_HIBERNATION */
-#else	/* CONFIG_DEBUG_PAGEALLOC */
+#else	/* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable) {}
 #ifdef CONFIG_HIBERNATION
 static inline bool kernel_page_present(struct page *page) { return true; }
 #endif	/* CONFIG_HIBERNATION */
-static inline bool debug_pagealloc_enabled(void)
-{
-	return false;
-}
-#endif	/* CONFIG_DEBUG_PAGEALLOC */
+#endif	/* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
 
 #ifdef __HAVE_ARCH_GATE_AREA
 extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index f08a1e4ee1d45..bc9558ab1e5b3 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1342,8 +1342,9 @@ static inline void do_copy_page(long *dst, long *src)
  * safe_copy_page - Copy a page in a safe way.
  *
  * Check if the page we are going to copy is marked as present in the kernel
- * page tables (this always is the case if CONFIG_DEBUG_PAGEALLOC is not set
- * and in that case kernel_page_present() always returns 'true').
+ * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
+ * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
+ * always returns 'true'.
  */
 static void safe_copy_page(void *dst, struct page *s_page)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c02cff1ed56eb..59661106da167 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1144,7 +1144,9 @@ static __always_inline bool free_pages_prepare(struct page *page,
 	}
 	arch_free_page(page, order);
 	kernel_poison_pages(page, 1 << order, 0);
-	kernel_map_pages(page, 1 << order, 0);
+	if (debug_pagealloc_enabled())
+		kernel_map_pages(page, 1 << order, 0);
+
 	kasan_free_nondeferred_pages(page, order);
 
 	return true;
@@ -2014,7 +2016,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 	set_page_refcounted(page);
 
 	arch_alloc_page(page, order);
-	kernel_map_pages(page, 1 << order, 1);
+	if (debug_pagealloc_enabled())
+		kernel_map_pages(page, 1 << order, 1);
 	kasan_alloc_pages(page, order);
 	kernel_poison_pages(page, 1 << order, 1);
 	set_page_owner(page, order, gfp_flags);
-- 
GitLab


From 868b104d7379e28013e9d48bdd2db25e0bdcf751 Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 25 Apr 2019 17:11:36 -0700
Subject: [PATCH 1742/1861] mm/vmalloc: Add flag for freeing of special
 permsissions

Add a new flag VM_FLUSH_RESET_PERMS, for enabling vfree operations to
immediately clear executable TLB entries before freeing pages, and handle
resetting permissions on the directmap. This flag is useful for any kind
of memory with elevated permissions, or where there can be related
permissions changes on the directmap. Today this is RO+X and RO memory.

Although this enables directly vfreeing non-writeable memory now,
non-writable memory cannot be freed in an interrupt because the allocation
itself is used as a node on deferred free list. So when RO memory needs to
be freed in an interrupt the code doing the vfree needs to have its own
work queue, as was the case before the deferred vfree list was added to
vmalloc.

For architectures with set_direct_map_ implementations this whole operation
can be done with one TLB flush when centralized like this. For others with
directmap permissions, currently only arm64, a backup method using
set_memory functions is used to reset the directmap. When arm64 adds
set_direct_map_ functions, this backup can be removed.

When the TLB is flushed to both remove TLB entries for the vmalloc range
mapping and the direct map permissions, the lazy purge operation could be
done to try to save a TLB flush later. However today vm_unmap_aliases
could flush a TLB range that does not include the directmap. So a helper
is added with extra parameters that can allow both the vmalloc address and
the direct mapping to be flushed during this operation. The behavior of the
normal vm_unmap_aliases function is unchanged.

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Suggested-by: Andy Lutomirski <luto@kernel.org>
Suggested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-17-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/vmalloc.h |  15 ++++++
 mm/vmalloc.c            | 113 +++++++++++++++++++++++++++++++++-------
 2 files changed, 109 insertions(+), 19 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 398e9c95cd616..c6eebb839552f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -21,6 +21,11 @@ struct notifier_block;		/* in notifier.h */
 #define VM_UNINITIALIZED	0x00000020	/* vm_struct is not fully initialized */
 #define VM_NO_GUARD		0x00000040      /* don't add guard page */
 #define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */
+/*
+ * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with
+ * vfree_atomic().
+ */
+#define VM_FLUSH_RESET_PERMS	0x00000100      /* Reset direct map and flush TLB on unmap */
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
@@ -142,6 +147,13 @@ extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
 				    pgprot_t prot, struct page **pages);
 extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size);
 extern void unmap_kernel_range(unsigned long addr, unsigned long size);
+static inline void set_vm_flush_reset_perms(void *addr)
+{
+	struct vm_struct *vm = find_vm_area(addr);
+
+	if (vm)
+		vm->flags |= VM_FLUSH_RESET_PERMS;
+}
 #else
 static inline int
 map_kernel_range_noflush(unsigned long start, unsigned long size,
@@ -157,6 +169,9 @@ static inline void
 unmap_kernel_range(unsigned long addr, unsigned long size)
 {
 }
+static inline void set_vm_flush_reset_perms(void *addr)
+{
+}
 #endif
 
 /* Allocate/destroy a 'vmalloc' VM area. */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e86ba6e74b50d..e5e9e1fcac014 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/set_memory.h>
 #include <linux/debugobjects.h>
 #include <linux/kallsyms.h>
 #include <linux/list.h>
@@ -1059,24 +1060,9 @@ static void vb_free(const void *addr, unsigned long size)
 		spin_unlock(&vb->lock);
 }
 
-/**
- * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
- *
- * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
- * to amortize TLB flushing overheads. What this means is that any page you
- * have now, may, in a former life, have been mapped into kernel virtual
- * address by the vmap layer and so there might be some CPUs with TLB entries
- * still referencing that page (additional to the regular 1:1 kernel mapping).
- *
- * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
- * be sure that none of the pages we have control over will have any aliases
- * from the vmap layer.
- */
-void vm_unmap_aliases(void)
+static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
 {
-	unsigned long start = ULONG_MAX, end = 0;
 	int cpu;
-	int flush = 0;
 
 	if (unlikely(!vmap_initialized))
 		return;
@@ -1113,6 +1099,27 @@ void vm_unmap_aliases(void)
 		flush_tlb_kernel_range(start, end);
 	mutex_unlock(&vmap_purge_lock);
 }
+
+/**
+ * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
+ *
+ * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
+ * to amortize TLB flushing overheads. What this means is that any page you
+ * have now, may, in a former life, have been mapped into kernel virtual
+ * address by the vmap layer and so there might be some CPUs with TLB entries
+ * still referencing that page (additional to the regular 1:1 kernel mapping).
+ *
+ * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
+ * be sure that none of the pages we have control over will have any aliases
+ * from the vmap layer.
+ */
+void vm_unmap_aliases(void)
+{
+	unsigned long start = ULONG_MAX, end = 0;
+	int flush = 0;
+
+	_vm_unmap_aliases(start, end, flush);
+}
 EXPORT_SYMBOL_GPL(vm_unmap_aliases);
 
 /**
@@ -1505,6 +1512,72 @@ struct vm_struct *remove_vm_area(const void *addr)
 	return NULL;
 }
 
+static inline void set_area_direct_map(const struct vm_struct *area,
+				       int (*set_direct_map)(struct page *page))
+{
+	int i;
+
+	for (i = 0; i < area->nr_pages; i++)
+		if (page_address(area->pages[i]))
+			set_direct_map(area->pages[i]);
+}
+
+/* Handle removing and resetting vm mappings related to the vm_struct. */
+static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
+{
+	unsigned long addr = (unsigned long)area->addr;
+	unsigned long start = ULONG_MAX, end = 0;
+	int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
+	int i;
+
+	/*
+	 * The below block can be removed when all architectures that have
+	 * direct map permissions also have set_direct_map_() implementations.
+	 * This is concerned with resetting the direct map any an vm alias with
+	 * execute permissions, without leaving a RW+X window.
+	 */
+	if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
+		set_memory_nx(addr, area->nr_pages);
+		set_memory_rw(addr, area->nr_pages);
+	}
+
+	remove_vm_area(area->addr);
+
+	/* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
+	if (!flush_reset)
+		return;
+
+	/*
+	 * If not deallocating pages, just do the flush of the VM area and
+	 * return.
+	 */
+	if (!deallocate_pages) {
+		vm_unmap_aliases();
+		return;
+	}
+
+	/*
+	 * If execution gets here, flush the vm mapping and reset the direct
+	 * map. Find the start and end range of the direct mappings to make sure
+	 * the vm_unmap_aliases() flush includes the direct map.
+	 */
+	for (i = 0; i < area->nr_pages; i++) {
+		if (page_address(area->pages[i])) {
+			start = min(addr, start);
+			end = max(addr, end);
+		}
+	}
+
+	/*
+	 * Set direct map to something invalid so that it won't be cached if
+	 * there are any accesses after the TLB flush, then flush the TLB and
+	 * reset the direct map permissions to the default.
+	 */
+	set_area_direct_map(area, set_direct_map_invalid_noflush);
+	_vm_unmap_aliases(start, end, 1);
+	set_area_direct_map(area, set_direct_map_default_noflush);
+}
+
 static void __vunmap(const void *addr, int deallocate_pages)
 {
 	struct vm_struct *area;
@@ -1526,7 +1599,8 @@ static void __vunmap(const void *addr, int deallocate_pages)
 	debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
 	debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
 
-	remove_vm_area(addr);
+	vm_remove_mappings(area, deallocate_pages);
+
 	if (deallocate_pages) {
 		int i;
 
@@ -1961,8 +2035,9 @@ EXPORT_SYMBOL(vzalloc_node);
  */
 void *vmalloc_exec(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC,
-			      NUMA_NO_NODE, __builtin_return_address(0));
+	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+			GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
+			NUMA_NO_NODE, __builtin_return_address(0));
 }
 
 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
-- 
GitLab


From 1a7b7d9220819afe79d1ec5d759fe4349bd2453e Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 25 Apr 2019 17:11:37 -0700
Subject: [PATCH 1743/1861] modules: Use vmalloc special flag

Use new flag for handling freeing of special permissioned memory in vmalloc
and remove places where memory was set RW before freeing which is no longer
needed.

Since freeing of VM_FLUSH_RESET_PERMS memory is not supported in an
interrupt by vmalloc, the freeing of init sections is moved to a work
queue. Instead of call_rcu it now uses synchronize_rcu() in the work
queue.

Lastly, there is now a WARN_ON in module_memfree since it should not be
called in an interrupt with special memory as is required for
VM_FLUSH_RESET_PERMS.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-18-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/module.c | 77 +++++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 2b2845ae983ed..a9020bdd4cf6b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -98,6 +98,10 @@ DEFINE_MUTEX(module_mutex);
 EXPORT_SYMBOL_GPL(module_mutex);
 static LIST_HEAD(modules);
 
+/* Work queue for freeing init sections in success case */
+static struct work_struct init_free_wq;
+static struct llist_head init_free_list;
+
 #ifdef CONFIG_MODULES_TREE_LOOKUP
 
 /*
@@ -1949,6 +1953,8 @@ void module_enable_ro(const struct module *mod, bool after_init)
 	if (!rodata_enabled)
 		return;
 
+	set_vm_flush_reset_perms(mod->core_layout.base);
+	set_vm_flush_reset_perms(mod->init_layout.base);
 	frob_text(&mod->core_layout, set_memory_ro);
 	frob_text(&mod->core_layout, set_memory_x);
 
@@ -1972,15 +1978,6 @@ static void module_enable_nx(const struct module *mod)
 	frob_writable_data(&mod->init_layout, set_memory_nx);
 }
 
-static void module_disable_nx(const struct module *mod)
-{
-	frob_rodata(&mod->core_layout, set_memory_x);
-	frob_ro_after_init(&mod->core_layout, set_memory_x);
-	frob_writable_data(&mod->core_layout, set_memory_x);
-	frob_rodata(&mod->init_layout, set_memory_x);
-	frob_writable_data(&mod->init_layout, set_memory_x);
-}
-
 /* Iterate through all modules and set each module's text as RW */
 void set_all_modules_text_rw(void)
 {
@@ -2024,23 +2021,8 @@ void set_all_modules_text_ro(void)
 	}
 	mutex_unlock(&module_mutex);
 }
-
-static void disable_ro_nx(const struct module_layout *layout)
-{
-	if (rodata_enabled) {
-		frob_text(layout, set_memory_rw);
-		frob_rodata(layout, set_memory_rw);
-		frob_ro_after_init(layout, set_memory_rw);
-	}
-	frob_rodata(layout, set_memory_x);
-	frob_ro_after_init(layout, set_memory_x);
-	frob_writable_data(layout, set_memory_x);
-}
-
 #else
-static void disable_ro_nx(const struct module_layout *layout) { }
 static void module_enable_nx(const struct module *mod) { }
-static void module_disable_nx(const struct module *mod) { }
 #endif
 
 #ifdef CONFIG_LIVEPATCH
@@ -2120,6 +2102,11 @@ static void free_module_elf(struct module *mod)
 
 void __weak module_memfree(void *module_region)
 {
+	/*
+	 * This memory may be RO, and freeing RO memory in an interrupt is not
+	 * supported by vmalloc.
+	 */
+	WARN_ON(in_interrupt());
 	vfree(module_region);
 }
 
@@ -2171,7 +2158,6 @@ static void free_module(struct module *mod)
 	mutex_unlock(&module_mutex);
 
 	/* This may be empty, but that's OK */
-	disable_ro_nx(&mod->init_layout);
 	module_arch_freeing_init(mod);
 	module_memfree(mod->init_layout.base);
 	kfree(mod->args);
@@ -2181,7 +2167,6 @@ static void free_module(struct module *mod)
 	lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
 
 	/* Finally, free the core (containing the module structure) */
-	disable_ro_nx(&mod->core_layout);
 	module_memfree(mod->core_layout.base);
 }
 
@@ -3420,17 +3405,34 @@ static void do_mod_ctors(struct module *mod)
 
 /* For freeing module_init on success, in case kallsyms traversing */
 struct mod_initfree {
-	struct rcu_head rcu;
+	struct llist_node node;
 	void *module_init;
 };
 
-static void do_free_init(struct rcu_head *head)
+static void do_free_init(struct work_struct *w)
 {
-	struct mod_initfree *m = container_of(head, struct mod_initfree, rcu);
-	module_memfree(m->module_init);
-	kfree(m);
+	struct llist_node *pos, *n, *list;
+	struct mod_initfree *initfree;
+
+	list = llist_del_all(&init_free_list);
+
+	synchronize_rcu();
+
+	llist_for_each_safe(pos, n, list) {
+		initfree = container_of(pos, struct mod_initfree, node);
+		module_memfree(initfree->module_init);
+		kfree(initfree);
+	}
 }
 
+static int __init modules_wq_init(void)
+{
+	INIT_WORK(&init_free_wq, do_free_init);
+	init_llist_head(&init_free_list);
+	return 0;
+}
+module_init(modules_wq_init);
+
 /*
  * This is where the real work happens.
  *
@@ -3507,7 +3509,6 @@ static noinline int do_init_module(struct module *mod)
 #endif
 	module_enable_ro(mod, true);
 	mod_tree_remove_init(mod);
-	disable_ro_nx(&mod->init_layout);
 	module_arch_freeing_init(mod);
 	mod->init_layout.base = NULL;
 	mod->init_layout.size = 0;
@@ -3518,14 +3519,18 @@ static noinline int do_init_module(struct module *mod)
 	 * We want to free module_init, but be aware that kallsyms may be
 	 * walking this with preempt disabled.  In all the failure paths, we
 	 * call synchronize_rcu(), but we don't want to slow down the success
-	 * path, so use actual RCU here.
+	 * path. module_memfree() cannot be called in an interrupt, so do the
+	 * work and call synchronize_rcu() in a work queue.
+	 *
 	 * Note that module_alloc() on most architectures creates W+X page
 	 * mappings which won't be cleaned up until do_free_init() runs.  Any
 	 * code such as mark_rodata_ro() which depends on those mappings to
 	 * be cleaned up needs to sync with the queued work - ie
 	 * rcu_barrier()
 	 */
-	call_rcu(&freeinit->rcu, do_free_init);
+	if (llist_add(&freeinit->node, &init_free_list))
+		schedule_work(&init_free_wq);
+
 	mutex_unlock(&module_mutex);
 	wake_up_all(&module_wq);
 
@@ -3822,10 +3827,6 @@ static int load_module(struct load_info *info, const char __user *uargs,
 	module_bug_cleanup(mod);
 	mutex_unlock(&module_mutex);
 
-	/* we can't deallocate the module until we clear memory protection */
-	module_disable_ro(mod);
-	module_disable_nx(mod);
-
  ddebug_cleanup:
 	ftrace_release_mod(mod);
 	dynamic_debug_remove(mod, info->debug);
-- 
GitLab


From d53d2f78ceadba081fc7785570798c3c8d50a718 Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 25 Apr 2019 17:11:38 -0700
Subject: [PATCH 1744/1861] bpf: Use vmalloc special flag

Use new flag VM_FLUSH_RESET_PERMS for handling freeing of special
permissioned memory in vmalloc and remove places where memory was set RW
before freeing which is no longer needed. Don't track if the memory is RO
anymore because it is now tracked in vmalloc.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-19-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/filter.h | 17 +++--------------
 kernel/bpf/core.c      |  1 -
 2 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 14ec3bdad9a90..7d3abde3f183c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,6 +20,7 @@
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
 #include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
 
 #include <net/sch_generic.h>
 
@@ -503,7 +504,6 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	u16			jited:1,	/* Is our filter JIT'ed? */
 				jit_requested:1,/* archs need to JIT the prog */
-				undo_set_mem:1,	/* Passed set_memory_ro() checkpoint */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
@@ -733,27 +733,17 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-	fp->undo_set_mem = 1;
+	set_vm_flush_reset_perms(fp);
 	set_memory_ro((unsigned long)fp, fp->pages);
 }
 
-static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
-{
-	if (fp->undo_set_mem)
-		set_memory_rw((unsigned long)fp, fp->pages);
-}
-
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
+	set_vm_flush_reset_perms(hdr);
 	set_memory_ro((unsigned long)hdr, hdr->pages);
 	set_memory_x((unsigned long)hdr, hdr->pages);
 }
 
-static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
-{
-	set_memory_rw((unsigned long)hdr, hdr->pages);
-}
-
 static inline struct bpf_binary_header *
 bpf_jit_binary_hdr(const struct bpf_prog *fp)
 {
@@ -789,7 +779,6 @@ void __bpf_prog_free(struct bpf_prog *fp);
 
 static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 {
-	bpf_prog_unlock_ro(fp);
 	__bpf_prog_free(fp);
 }
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ff09d32a8a1be..c605397c79f03 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -848,7 +848,6 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
 	if (fp->jited) {
 		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
 
-		bpf_jit_binary_unlock_ro(hdr);
 		bpf_jit_binary_free(hdr);
 
 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
-- 
GitLab


From 7fdfe1e40b225b1d163f9afed2fa3f04442dbaad Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 25 Apr 2019 17:11:39 -0700
Subject: [PATCH 1745/1861] x86/ftrace: Use vmalloc special flag

Use new flag VM_FLUSH_RESET_PERMS for handling freeing of special
permissioned memory in vmalloc and remove places where memory was set NX
and RW before freeing which is no longer needed.

Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-20-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/ftrace.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 53ba1aa3a01f8..0caf8122d6807 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -678,12 +678,8 @@ static inline void *alloc_tramp(unsigned long size)
 {
 	return module_alloc(size);
 }
-static inline void tramp_free(void *tramp, int size)
+static inline void tramp_free(void *tramp)
 {
-	int npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-	set_memory_nx((unsigned long)tramp, npages);
-	set_memory_rw((unsigned long)tramp, npages);
 	module_memfree(tramp);
 }
 #else
@@ -692,7 +688,7 @@ static inline void *alloc_tramp(unsigned long size)
 {
 	return NULL;
 }
-static inline void tramp_free(void *tramp, int size) { }
+static inline void tramp_free(void *tramp) { }
 #endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
@@ -808,6 +804,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	/* ALLOC_TRAMP flags lets us know we created it */
 	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
 
+	set_vm_flush_reset_perms(trampoline);
+
 	/*
 	 * Module allocation needs to be completed by making the page
 	 * executable. The page is still writable, which is a security hazard,
@@ -816,7 +814,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	set_memory_x((unsigned long)trampoline, npages);
 	return (unsigned long)trampoline;
 fail:
-	tramp_free(trampoline, *tramp_size);
+	tramp_free(trampoline);
 	return 0;
 }
 
@@ -947,7 +945,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 		return;
 
-	tramp_free((void *)ops->trampoline, ops->trampoline_size);
+	tramp_free((void *)ops->trampoline);
 	ops->trampoline = 0;
 }
 
-- 
GitLab


From 241a1f22380646bc4d1dd18e5bc246877513da68 Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 25 Apr 2019 17:11:40 -0700
Subject: [PATCH 1746/1861] x86/kprobes: Use vmalloc special flag

Use new flag VM_FLUSH_RESET_PERMS for handling freeing of special
permissioned memory in vmalloc and remove places where memory was set NX
and RW before freeing which is no longer needed.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-21-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/core.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 06058c44ab57b..800593f4ddf75 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -434,6 +434,7 @@ void *alloc_insn_page(void)
 	if (!page)
 		return NULL;
 
+	set_vm_flush_reset_perms(page);
 	/*
 	 * First make the page read-only, and only then make it executable to
 	 * prevent it from being W+X in between.
@@ -452,12 +453,6 @@ void *alloc_insn_page(void)
 /* Recover page to RW mode before releasing it */
 void free_insn_page(void *page)
 {
-	/*
-	 * First make the page non-executable, and only then make it writable to
-	 * prevent it from being W+X in between.
-	 */
-	set_memory_nx((unsigned long)page, 1);
-	set_memory_rw((unsigned long)page, 1);
 	module_memfree(page);
 }
 
-- 
GitLab


From 3950746d9d8ef981c1cb842384e0e86e8d1aad76 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 25 Apr 2019 17:11:41 -0700
Subject: [PATCH 1747/1861] x86/alternatives: Add comment about module removal
 races

Add a comment to clarify that users of text_poke() must ensure that
no races with module removal take place.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <akpm@linux-foundation.org>
Cc: <ard.biesheuvel@linaro.org>
Cc: <deneen.t.dock@intel.com>
Cc: <kernel-hardening@lists.openwall.com>
Cc: <kristen@linux.intel.com>
Cc: <linux_dti@icloud.com>
Cc: <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190426001143.4983-22-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/alternative.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 18f959975ea0c..7b9b49dfc05af 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -810,6 +810,11 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
  * It means the size must be writable atomically and the address must be aligned
  * in a way that permits an atomic write. It also makes sure we fit on a single
  * page.
+ *
+ * Note that the caller must ensure that if the modified code is part of a
+ * module, the module would not be removed during poking. This can be achieved
+ * by registering a module notifier, and ordering module removal and patching
+ * trough a mutex.
  */
 void *text_poke(void *addr, const void *opcode, size_t len)
 {
-- 
GitLab


From 74dd022f9e6260c3b5b8d15901d27ebcc5f21eda Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Mon, 29 Apr 2019 13:37:01 -0400
Subject: [PATCH 1748/1861] arm64: Fix compiler warning from pte_unmap() with
 -Wunused-but-set-variable

When building with -Wunused-but-set-variable, the compiler shouts about
a number of pte_unmap() users, since this expands to an empty macro on
arm64:

  | mm/gup.c: In function 'gup_pte_range':
  | mm/gup.c:1727:16: warning: variable 'ptem' set but not used
  | [-Wunused-but-set-variable]
  | mm/gup.c: At top level:
  | mm/memory.c: In function 'copy_pte_range':
  | mm/memory.c:821:24: warning: variable 'orig_dst_pte' set but not used
  | [-Wunused-but-set-variable]
  | mm/memory.c:821:9: warning: variable 'orig_src_pte' set but not used
  | [-Wunused-but-set-variable]
  | mm/swap_state.c: In function 'swap_ra_info':
  | mm/swap_state.c:641:15: warning: variable 'orig_pte' set but not used
  | [-Wunused-but-set-variable]
  | mm/madvise.c: In function 'madvise_free_pte_range':
  | mm/madvise.c:318:9: warning: variable 'orig_pte' set but not used
  | [-Wunused-but-set-variable]

Rewrite pte_unmap() as a static inline function, which silences the
warnings.

Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index de70c1eabf336..74ebe96937141 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -478,6 +478,8 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 	return __pmd_to_phys(pmd);
 }
 
+static inline void pte_unmap(pte_t *pte) { }
+
 /* Find an entry in the third-level page table. */
 #define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
@@ -486,7 +488,6 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
 #define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte)			do { } while (0)
 #define pte_unmap_nested(pte)		do { } while (0)
 
 #define pte_set_fixmap(addr)		((pte_t *)set_fixmap_offset(FIX_PTE, addr))
-- 
GitLab


From 5fbbeedb9a8f039e0a531435c7894905c1d51e77 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Mon, 29 Apr 2019 13:37:02 -0400
Subject: [PATCH 1749/1861] arm64: mm: Remove pte_unmap_nested()

As of commit ece0e2b6406a ("mm: remove pte_*map_nested()"),
pte_unmap_nested() is no longer used and can be removed from the arm64
code.

Signed-off-by: Qian Cai <cai@lca.pw>
[will: also remove pte_offset_map_nested()]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 74ebe96937141..2c41b04708fe3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -487,8 +487,6 @@ static inline void pte_unmap(pte_t *pte) { }
 #define pte_offset_kernel(dir,addr)	((pte_t *)__va(pte_offset_phys((dir), (addr))))
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
-#define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
-#define pte_unmap_nested(pte)		do { } while (0)
 
 #define pte_set_fixmap(addr)		((pte_t *)set_fixmap_offset(FIX_PTE, addr))
 #define pte_set_fixmap_offset(pmd, addr)	pte_set_fixmap(pte_offset_phys(pmd, addr))
-- 
GitLab


From ab042b824c11502bd39abfdfd4c7f285347d483a Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Fri, 26 Apr 2019 07:33:24 +0200
Subject: [PATCH 1750/1861] Revert "drm/qxl: drop prime import/export
 callbacks"

This reverts commit f4c34b1e2a37d5676180901fa6ff188bcb6371f8.

Simliar to commit a0cecc23cfcb Revert "drm/virtio: drop prime
import/export callbacks".  We have to do the same with qxl,
for the same reasons (it breaks DRI3).

Drop the WARN_ON_ONCE().

Fixes: f4c34b1e2a37d5676 ("drm/qxl: drop prime import/export callbacks")
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20190426053324.26443-1-kraxel@redhat.com
Acked-by: Daniel Vetter <daniel@ffwll.ch>
---
 drivers/gpu/drm/qxl/qxl_drv.c   |  4 ++++
 drivers/gpu/drm/qxl/qxl_prime.c | 12 ++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 578d867a81d59..f33e349c4ec5b 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -255,10 +255,14 @@ static struct drm_driver qxl_driver = {
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = qxl_debugfs_init,
 #endif
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = qxl_gem_prime_pin,
 	.gem_prime_unpin = qxl_gem_prime_unpin,
+	.gem_prime_get_sg_table = qxl_gem_prime_get_sg_table,
+	.gem_prime_import_sg_table = qxl_gem_prime_import_sg_table,
 	.gem_prime_vmap = qxl_gem_prime_vmap,
 	.gem_prime_vunmap = qxl_gem_prime_vunmap,
 	.gem_prime_mmap = qxl_gem_prime_mmap,
diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c
index 8b448eca1cd99..114653b471c6a 100644
--- a/drivers/gpu/drm/qxl/qxl_prime.c
+++ b/drivers/gpu/drm/qxl/qxl_prime.c
@@ -42,6 +42,18 @@ void qxl_gem_prime_unpin(struct drm_gem_object *obj)
 	qxl_bo_unpin(bo);
 }
 
+struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+struct drm_gem_object *qxl_gem_prime_import_sg_table(
+	struct drm_device *dev, struct dma_buf_attachment *attach,
+	struct sg_table *table)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 void *qxl_gem_prime_vmap(struct drm_gem_object *obj)
 {
 	struct qxl_bo *bo = gem_to_qxl_bo(obj);
-- 
GitLab


From 3887c26c0e24d50a4d0ce20cf4726737cee1a2fd Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 30 Apr 2019 15:10:01 +0200
Subject: [PATCH 1751/1861] ALSA: hda/realtek - Apply the fixup for ASUS
 Q325UAR

Some ASUS models like Q325UAR with ALC295 codec requires the same
fixup that has been applied to ALC294 codec.  Just copy the entry with
the pin matching to cover ALC295 too.

BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1784485
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 7da7fa4f42016..42cd3945e0dee 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7547,6 +7547,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x12, 0x90a60130},
 		{0x17, 0x90170110},
 		{0x21, 0x04211020}),
+	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC294_FIXUP_ASUS_SPK,
+		{0x12, 0x90a60130},
+		{0x17, 0x90170110},
+		{0x21, 0x03211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x14, 0x90170110},
 		{0x21, 0x04211020}),
-- 
GitLab


From 0f80cad3124f986d0e46c14d46b8da06d87a2bf4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 15 Apr 2019 13:03:51 +0100
Subject: [PATCH 1752/1861] arm64: Restrict ARM64_ERRATUM_1188873 mitigation to
 AArch32

We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0
accesses for both instruction sets. Although nothing wrong comes out
of that, people trying to squeeze the last drop of performance from
buggy HW find this over the top. Oh well.

Let's change the mitigation by flipping the counter enable bit
on return to userspace. Non-broken HW gets an extra branch on
the fast path, which is hopefully not the end of the world.
The arch timer workaround is also removed.

Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/entry.S            | 19 +++++++++++++++++--
 drivers/clocksource/arm_arch_timer.c | 15 ---------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c50a7a75f2e0f..1a7811b7e3c46 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -336,6 +336,21 @@ alternative_if ARM64_WORKAROUND_845719
 alternative_else_nop_endif
 #endif
 3:
+#ifdef CONFIG_ARM64_ERRATUM_1188873
+alternative_if_not ARM64_WORKAROUND_1188873
+	b	4f
+alternative_else_nop_endif
+	/*
+	 * if (x22.mode32 == cntkctl_el1.el0vcten)
+	 *     cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten
+	 */
+	mrs	x1, cntkctl_el1
+	eon	x0, x1, x22, lsr #3
+	tbz	x0, #1, 4f
+	eor	x1, x1, #2	// ARCH_TIMER_USR_VCT_ACCESS_EN
+	msr	cntkctl_el1, x1
+4:
+#endif
 	apply_ssbd 0, x0, x1
 	.endif
 
@@ -362,11 +377,11 @@ alternative_else_nop_endif
 	.if	\el == 0
 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	bne	4f
+	bne	5f
 	msr	far_el1, x30
 	tramp_alias	x30, tramp_exit_native
 	br	x30
-4:
+5:
 	tramp_alias	x30, tramp_exit_compat
 	br	x30
 #endif
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index aa4ec53281cea..da11a9508b778 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -319,13 +319,6 @@ static u64 notrace arm64_858921_read_cntvct_el0(void)
 }
 #endif
 
-#ifdef CONFIG_ARM64_ERRATUM_1188873
-static u64 notrace arm64_1188873_read_cntvct_el0(void)
-{
-	return read_sysreg(cntvct_el0);
-}
-#endif
-
 #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
 /*
  * The low bits of the counter registers are indeterminate while bit 10 or
@@ -457,14 +450,6 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = {
 		.read_cntvct_el0 = arm64_858921_read_cntvct_el0,
 	},
 #endif
-#ifdef CONFIG_ARM64_ERRATUM_1188873
-	{
-		.match_type = ate_match_local_cap_id,
-		.id = (void *)ARM64_WORKAROUND_1188873,
-		.desc = "ARM erratum 1188873",
-		.read_cntvct_el0 = arm64_1188873_read_cntvct_el0,
-	},
-#endif
 #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
 	{
 		.match_type = ate_match_dt,
-- 
GitLab


From c2b5bba3967a000764e9148e6f020d776b7ecd82 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 15 Apr 2019 13:03:52 +0100
Subject: [PATCH 1753/1861] arm64: Make ARM64_ERRATUM_1188873 depend on COMPAT

Since ARM64_ERRATUM_1188873 only affects AArch32 EL0, it makes some
sense that it should depend on COMPAT.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de1..560f2a860637b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -477,6 +477,7 @@ config ARM64_ERRATUM_1024718
 config ARM64_ERRATUM_1188873
 	bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result"
 	default y
+	depends on COMPAT
 	select ARM_ARCH_TIMER_OOL_WORKAROUND
 	help
 	  This option adds work arounds for ARM Cortex-A76 erratum 1188873
-- 
GitLab


From 0cf57b86859c49381addb3ce47be70aadf5fd2c0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 15 Apr 2019 13:03:53 +0100
Subject: [PATCH 1754/1861] arm64: Add part number for Neoverse N1

New CPU, new part number. You know the drill.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cputype.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5f1437099b997..2602bae334fb7 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -89,6 +89,7 @@
 #define ARM_CPU_PART_CORTEX_A35		0xD04
 #define ARM_CPU_PART_CORTEX_A55		0xD05
 #define ARM_CPU_PART_CORTEX_A76		0xD0B
+#define ARM_CPU_PART_NEOVERSE_N1	0xD0C
 
 #define APM_CPU_PART_POTENZA		0x000
 
@@ -118,6 +119,7 @@
 #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35)
 #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)
 #define MIDR_CORTEX_A76	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
+#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
-- 
GitLab


From 6989303a3b2d864fd8e17d3fa3365d3e9649a598 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 15 Apr 2019 13:03:54 +0100
Subject: [PATCH 1755/1861] arm64: Apply ARM64_ERRATUM_1188873 to Neoverse-N1

Neoverse-N1 is also affected by ARM64_ERRATUM_1188873, so let's
add it to the list of affected CPUs.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
[will: Update silicon-errata.txt]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 arch/arm64/Kconfig                     | 11 ++++++-----
 arch/arm64/kernel/cpu_errata.c         | 13 +++++++++++--
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index d1e2bb801e1bd..d5a124d7e2420 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -61,6 +61,7 @@ stable kernels.
 | ARM            | Cortex-A76      | #1188873        | ARM64_ERRATUM_1188873       |
 | ARM            | Cortex-A76      | #1165522        | ARM64_ERRATUM_1165522       |
 | ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807       |
+| ARM            | Neoverse-N1     | #1188873        | ARM64_ERRATUM_1188873       |
 | ARM            | MMU-500         | #841119,#826419 | N/A                         |
 |                |                 |                 |                             |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375        |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 560f2a860637b..fcda4e21fa8fe 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -475,16 +475,17 @@ config ARM64_ERRATUM_1024718
 	  If unsure, say Y.
 
 config ARM64_ERRATUM_1188873
-	bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result"
+	bool "Cortex-A76/Neoverse-N1: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result"
 	default y
 	depends on COMPAT
 	select ARM_ARCH_TIMER_OOL_WORKAROUND
 	help
-	  This option adds work arounds for ARM Cortex-A76 erratum 1188873
+	  This option adds work arounds for ARM Cortex-A76/Neoverse-N1
+	  erratum 1188873
 
-	  Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause
-	  register corruption when accessing the timer registers from
-	  AArch32 userspace.
+	  Affected Cortex-A76/Neoverse-N1 cores (r0p0, r1p0, r2p0) could
+	  cause register corruption when accessing the timer registers
+	  from AArch32 userspace.
 
 	  If unsure, say Y.
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9950bb0cbd521..06f1c8aae1dcb 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -603,6 +603,16 @@ static const struct midr_range workaround_clean_cache[] = {
 };
 #endif
 
+#ifdef CONFIG_ARM64_ERRATUM_1188873
+static const struct midr_range erratum_1188873_list[] = {
+	/* Cortex-A76 r0p0 to r2p0 */
+	MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+	/* Neoverse-N1 r0p0 to r2p0 */
+	MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 0, 2, 0),
+	{},
+};
+#endif
+
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
 	{
@@ -725,10 +735,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_1188873
 	{
-		/* Cortex-A76 r0p0 to r2p0 */
 		.desc = "ARM erratum 1188873",
 		.capability = ARM64_WORKAROUND_1188873,
-		ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+		ERRATA_MIDR_RANGE_LIST(erratum_1188873_list),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_1165522
-- 
GitLab


From b13023421b5179413421333f602850914f6a7ad8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 30 Apr 2019 08:34:08 +0100
Subject: [PATCH 1756/1861] rxrpc: Fix net namespace cleanup

In rxrpc_destroy_all_calls(), there are two phases: (1) make sure the
->calls list is empty, emitting error messages if not, and (2) wait for the
RCU cleanup to happen on outstanding calls (ie. ->nr_calls becomes 0).

To avoid taking the call_lock, the function prechecks ->calls and if empty,
it returns to avoid taking the lock - this is wrong, however: it still
needs to go and do the second phase and wait for ->nr_calls to become 0.

Without this, the rxrpc_net struct may get deallocated before we get to the
RCU cleanup for the last calls.  This can lead to:

  Slab corruption (Not tainted): kmalloc-16k start=ffff88802b178000, len=16384
  050: 6b 6b 6b 6b 6b 6b 6b 6b 61 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkakkkkkkk

Note the "61" at offset 0x58.  This corresponds to the ->nr_calls member of
struct rxrpc_net (which is >9k in size, and thus allocated out of the 16k
slab).

Fix this by flipping the condition on the if-statement, putting the locked
section inside the if-body and dropping the return from there.  The
function will then always go on to wait for the RCU cleanup on outstanding
calls.

Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/call_object.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8aa2937b069f7..fe96881a334da 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -604,30 +604,30 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
 
 	_enter("");
 
-	if (list_empty(&rxnet->calls))
-		return;
+	if (!list_empty(&rxnet->calls)) {
+		write_lock(&rxnet->call_lock);
 
-	write_lock(&rxnet->call_lock);
+		while (!list_empty(&rxnet->calls)) {
+			call = list_entry(rxnet->calls.next,
+					  struct rxrpc_call, link);
+			_debug("Zapping call %p", call);
 
-	while (!list_empty(&rxnet->calls)) {
-		call = list_entry(rxnet->calls.next, struct rxrpc_call, link);
-		_debug("Zapping call %p", call);
+			rxrpc_see_call(call);
+			list_del_init(&call->link);
 
-		rxrpc_see_call(call);
-		list_del_init(&call->link);
+			pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+			       call, atomic_read(&call->usage),
+			       rxrpc_call_states[call->state],
+			       call->flags, call->events);
 
-		pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
-		       call, atomic_read(&call->usage),
-		       rxrpc_call_states[call->state],
-		       call->flags, call->events);
+			write_unlock(&rxnet->call_lock);
+			cond_resched();
+			write_lock(&rxnet->call_lock);
+		}
 
 		write_unlock(&rxnet->call_lock);
-		cond_resched();
-		write_lock(&rxnet->call_lock);
 	}
 
-	write_unlock(&rxnet->call_lock);
-
 	atomic_dec(&rxnet->nr_calls);
 	wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
 }
-- 
GitLab


From f949a12fd697479f68d99dc65e9bbab68ee49043 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 30 Apr 2019 13:44:19 +0300
Subject: [PATCH 1757/1861] net: dsa: bcm_sf2: fix buffer overflow doing
 set_rxnfc

The "fs->location" is a u32 that comes from the user in ethtool_set_rxnfc().
We can't pass unclamped values to test_bit() or it results in an out of
bounds access beyond the end of the bitmap.

Fixes: 7318166cacad ("net: dsa: bcm_sf2: Add support for ethtool::rxnfc")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2_cfp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index e6234d2097878..4212bc4a5f31a 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -886,6 +886,9 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
 	     fs->m_ext.data[1]))
 		return -EINVAL;
 
+	if (fs->location != RX_CLS_LOC_ANY && fs->location >= CFP_NUM_RULES)
+		return -EINVAL;
+
 	if (fs->location != RX_CLS_LOC_ANY &&
 	    test_bit(fs->location, priv->cfp.used))
 		return -EBUSY;
@@ -974,6 +977,9 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, u32 loc)
 	struct cfp_rule *rule;
 	int ret;
 
+	if (loc >= CFP_NUM_RULES)
+		return -EINVAL;
+
 	/* Refuse deleting unused rules, and those that are not unique since
 	 * that could leave IPv6 rules with one of the chained rule in the
 	 * table.
-- 
GitLab


From 1f5b62f09f6b314c8d70b9de5182dae4de1f94da Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 8 Apr 2019 16:49:01 +0100
Subject: [PATCH 1758/1861] ARM: vdso: Remove dependency with the arch_timer
 driver internals

The VDSO code uses the kernel helper that was originally designed
to abstract the access between 32 and 64bit systems. It worked so
far because this function is declared as 'inline'.

As we're about to revamp that part of the code, the VDSO would
break. Let's fix it by doing what should have been done from
the start, a proper system register access.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/cp15.h   | 2 ++
 arch/arm/vdso/vgettimeofday.c | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 07e27f212dc75..d2453e2d3f1f3 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -68,6 +68,8 @@
 #define BPIALL				__ACCESS_CP15(c7, 0, c5, 6)
 #define ICIALLU				__ACCESS_CP15(c7, 0, c5, 0)
 
+#define CNTVCT				__ACCESS_CP15_64(1, c14)
+
 extern unsigned long cr_alignment;	/* defined in entry-armv.S */
 
 static inline unsigned long get_cr(void)
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
index a9dd619c6c290..7bdbf5d5c47d3 100644
--- a/arch/arm/vdso/vgettimeofday.c
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -18,9 +18,9 @@
 #include <linux/compiler.h>
 #include <linux/hrtimer.h>
 #include <linux/time.h>
-#include <asm/arch_timer.h>
 #include <asm/barrier.h>
 #include <asm/bug.h>
+#include <asm/cp15.h>
 #include <asm/page.h>
 #include <asm/unistd.h>
 #include <asm/vdso_datapage.h>
@@ -123,7 +123,8 @@ static notrace u64 get_ns(struct vdso_data *vdata)
 	u64 cycle_now;
 	u64 nsec;
 
-	cycle_now = arch_counter_get_cntvct();
+	isb();
+	cycle_now = read_sysreg(CNTVCT);
 
 	cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask;
 
-- 
GitLab


From eae1ddc615bef9e1a1958a4b867a0a1678049bb3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 8 Apr 2019 16:49:02 +0100
Subject: [PATCH 1759/1861] watchdog/sbsa: Use arch_timer_read_counter instead
 of arch_counter_get_cntvct

Only arch_timer_read_counter will guarantee that workarounds are
applied. So let's use this one instead of arch_counter_get_cntvct.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/watchdog/sbsa_gwdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c
index e8bd9887c5663..e221e47396ab7 100644
--- a/drivers/watchdog/sbsa_gwdt.c
+++ b/drivers/watchdog/sbsa_gwdt.c
@@ -161,7 +161,7 @@ static unsigned int sbsa_gwdt_get_timeleft(struct watchdog_device *wdd)
 		timeleft += readl(gwdt->control_base + SBSA_GWDT_WOR);
 
 	timeleft += lo_hi_readq(gwdt->control_base + SBSA_GWDT_WCV) -
-		    arch_counter_get_cntvct();
+		    arch_timer_read_counter();
 
 	do_div(timeleft, gwdt->clk);
 
-- 
GitLab


From c93ad1337ad06a718890a89cdd85188ff9a5a5cc Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 30 Apr 2019 19:34:08 +0800
Subject: [PATCH 1760/1861] appletalk: Set error code if register_snap_client
 failed

If register_snap_client fails in atalk_init,
error code should be set, otherwise it will
triggers NULL pointer dereference while unloading
module.

Fixes: 9804501fa122 ("appletalk: Fix potential NULL pointer dereference in unregister_snap_client")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/ddp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 709d2542f7295..dbe8b1993be9e 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1920,6 +1920,7 @@ static int __init atalk_init(void)
 	ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
 	if (!ddp_dl) {
 		pr_crit("Unable to register DDP with SNAP.\n");
+		rc = -ENOMEM;
 		goto out_sock;
 	}
 
-- 
GitLab


From dea86a80033f8b0fb25a805f46dde9f3b1a7c23a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 8 Apr 2019 16:49:03 +0100
Subject: [PATCH 1761/1861] arm64: Use arch_timer_read_counter instead of
 arch_counter_get_cntvct

Only arch_timer_read_counter will guarantee that workarounds are
applied. So let's use this one instead of arch_counter_get_cntvct.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/traps.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8ad119c3f665d..6190a60388cf3 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -493,7 +493,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
 {
 	int rt = ESR_ELx_SYS64_ISS_RT(esr);
 
-	pt_regs_write_reg(regs, rt, arch_counter_get_cntvct());
+	pt_regs_write_reg(regs, rt, arch_timer_read_counter());
 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
 
@@ -665,7 +665,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
 {
 	int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT;
 	int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT;
-	u64 val = arch_counter_get_cntvct();
+	u64 val = arch_timer_read_counter();
 
 	pt_regs_write_reg(regs, rt, lower_32_bits(val));
 	pt_regs_write_reg(regs, rt2, upper_32_bits(val));
-- 
GitLab


From 5ef19a161cfa88a59508979e2f39d3d092c1d5c0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 8 Apr 2019 16:49:04 +0100
Subject: [PATCH 1762/1861] clocksource/arm_arch_timer: Direcly assign
 set_next_event workaround

When a given timer is affected by an erratum and requires an
alternative implementation of set_next_event, we do a rather
complicated dance to detect and call the workaround on each
set_next_event call.

This is clearly idiotic, as we can perfectly detect whether
this CPU requires a workaround while setting up the clock event
device.

This only requires the CPU-specific detection to be done a bit
earlier, and we can then safely override the set_next_event pointer
if we have a workaround associated to that CPU.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by; Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/arch_timer.h    |  4 +++
 arch/arm64/include/asm/arch_timer.h  | 16 ++++++++++
 drivers/clocksource/arm_arch_timer.c | 46 +++++-----------------------
 3 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 0a8d7bba2cb01..3f0a0191f763d 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -11,6 +11,10 @@
 #include <clocksource/arm_arch_timer.h>
 
 #ifdef CONFIG_ARM_ARCH_TIMER
+/* 32bit ARM doesn't know anything about timer errata... */
+#define has_erratum_handler(h)		(false)
+#define erratum_handler(h)		(arch_timer_##h)
+
 int arch_timer_arch_init(void);
 
 /*
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index f2a234d6516cf..c3762ffcc9330 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -31,10 +31,26 @@
 #include <clocksource/arm_arch_timer.h>
 
 #if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND)
+#define has_erratum_handler(h)						\
+	({								\
+		const struct arch_timer_erratum_workaround *__wa;	\
+		__wa = __this_cpu_read(timer_unstable_counter_workaround); \
+		(__wa && __wa->h);					\
+	})
+
+#define erratum_handler(h)						\
+	({								\
+		const struct arch_timer_erratum_workaround *__wa;	\
+		__wa = __this_cpu_read(timer_unstable_counter_workaround); \
+		(__wa && __wa->h) ? __wa->h : arch_timer_##h;		\
+	})
+
 extern struct static_key_false arch_timer_read_ool_enabled;
 #define needs_unstable_timer_counter_workaround() \
 	static_branch_unlikely(&arch_timer_read_ool_enabled)
 #else
+#define has_erratum_handler(h)			   false
+#define erratum_handler(h)			   (arch_timer_##h)
 #define needs_unstable_timer_counter_workaround()  false
 #endif
 
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index da11a9508b778..b2a88a64aab49 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -598,36 +598,12 @@ static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type t
 		local ? "local" : "global", wa->desc);
 }
 
-#define erratum_handler(fn, r, ...)					\
-({									\
-	bool __val;							\
-	if (needs_unstable_timer_counter_workaround()) {		\
-		const struct arch_timer_erratum_workaround *__wa;	\
-		__wa = __this_cpu_read(timer_unstable_counter_workaround); \
-		if (__wa && __wa->fn) {					\
-			r = __wa->fn(__VA_ARGS__);			\
-			__val = true;					\
-		} else {						\
-			__val = false;					\
-		}							\
-	} else {							\
-		__val = false;						\
-	}								\
-	__val;								\
-})
-
 static bool arch_timer_this_cpu_has_cntvct_wa(void)
 {
-	const struct arch_timer_erratum_workaround *wa;
-
-	wa = __this_cpu_read(timer_unstable_counter_workaround);
-	return wa && wa->read_cntvct_el0;
+	return has_erratum_handler(read_cntvct_el0);
 }
 #else
 #define arch_timer_check_ool_workaround(t,a)		do { } while(0)
-#define erratum_set_next_event_tval_virt(...)		({BUG(); 0;})
-#define erratum_set_next_event_tval_phys(...)		({BUG(); 0;})
-#define erratum_handler(fn, r, ...)			({false;})
 #define arch_timer_this_cpu_has_cntvct_wa()		({false;})
 #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
 
@@ -721,11 +697,6 @@ static __always_inline void set_next_event(const int access, unsigned long evt,
 static int arch_timer_set_next_event_virt(unsigned long evt,
 					  struct clock_event_device *clk)
 {
-	int ret;
-
-	if (erratum_handler(set_next_event_virt, ret, evt, clk))
-		return ret;
-
 	set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
 	return 0;
 }
@@ -733,11 +704,6 @@ static int arch_timer_set_next_event_virt(unsigned long evt,
 static int arch_timer_set_next_event_phys(unsigned long evt,
 					  struct clock_event_device *clk)
 {
-	int ret;
-
-	if (erratum_handler(set_next_event_phys, ret, evt, clk))
-		return ret;
-
 	set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
 	return 0;
 }
@@ -762,6 +728,10 @@ static void __arch_timer_setup(unsigned type,
 	clk->features = CLOCK_EVT_FEAT_ONESHOT;
 
 	if (type == ARCH_TIMER_TYPE_CP15) {
+		typeof(clk->set_next_event) sne;
+
+		arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
+
 		if (arch_timer_c3stop)
 			clk->features |= CLOCK_EVT_FEAT_C3STOP;
 		clk->name = "arch_sys_timer";
@@ -772,20 +742,20 @@ static void __arch_timer_setup(unsigned type,
 		case ARCH_TIMER_VIRT_PPI:
 			clk->set_state_shutdown = arch_timer_shutdown_virt;
 			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
-			clk->set_next_event = arch_timer_set_next_event_virt;
+			sne = erratum_handler(set_next_event_virt);
 			break;
 		case ARCH_TIMER_PHYS_SECURE_PPI:
 		case ARCH_TIMER_PHYS_NONSECURE_PPI:
 		case ARCH_TIMER_HYP_PPI:
 			clk->set_state_shutdown = arch_timer_shutdown_phys;
 			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
-			clk->set_next_event = arch_timer_set_next_event_phys;
+			sne = erratum_handler(set_next_event_phys);
 			break;
 		default:
 			BUG();
 		}
 
-		arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
+		clk->set_next_event = sne;
 	} else {
 		clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
 		clk->name = "arch_mem_timer";
-- 
GitLab


From 57f27666f91a85431492b092f5db53ecab1a0739 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 8 Apr 2019 16:49:05 +0100
Subject: [PATCH 1763/1861] clocksource/arm_arch_timer: Drop use of static key
 in arch_timer_reg_read_stable

Let's start with the removal of the arch_timer_read_ool_enabled
static key in arch_timer_reg_read_stable. It is not a fast path,
and we can simplify things a bit.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/arch_timer.h | 42 +++++++++++++++++++----------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index c3762ffcc9330..4a06d46def7ed 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -77,23 +77,37 @@ struct arch_timer_erratum_workaround {
 DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
 		timer_unstable_counter_workaround);
 
+/* inline sysreg accessors that make erratum_handler() work */
+static inline notrace u32 arch_timer_read_cntp_tval_el0(void)
+{
+	return read_sysreg(cntp_tval_el0);
+}
+
+static inline notrace u32 arch_timer_read_cntv_tval_el0(void)
+{
+	return read_sysreg(cntv_tval_el0);
+}
+
+static inline notrace u64 arch_timer_read_cntpct_el0(void)
+{
+	return read_sysreg(cntpct_el0);
+}
+
+static inline notrace u64 arch_timer_read_cntvct_el0(void)
+{
+	return read_sysreg(cntvct_el0);
+}
+
 #define arch_timer_reg_read_stable(reg)					\
-({									\
-	u64 _val;							\
-	if (needs_unstable_timer_counter_workaround()) {		\
-		const struct arch_timer_erratum_workaround *wa;		\
+	({								\
+		u64 _val;						\
+									\
 		preempt_disable_notrace();				\
-		wa = __this_cpu_read(timer_unstable_counter_workaround); \
-		if (wa && wa->read_##reg)				\
-			_val = wa->read_##reg();			\
-		else							\
-			_val = read_sysreg(reg);			\
+		_val = erratum_handler(read_ ## reg)();			\
 		preempt_enable_notrace();				\
-	} else {							\
-		_val = read_sysreg(reg);				\
-	}								\
-	_val;								\
-})
+									\
+		_val;							\
+	})
 
 /*
  * These register accessors are marked inline so the compiler can
-- 
GitLab


From a862fc2254bdbcee3b5da4f730984e5d8393a2f1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 8 Apr 2019 16:49:06 +0100
Subject: [PATCH 1764/1861] clocksource/arm_arch_timer: Remove use of
 workaround static key

The use of a static key in a hotplug path has proved to be a real
nightmare, and makes it impossible to have scream-free lockdep
kernel.

Let's remove the static key altogether, and focus on something saner.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/arch_timer.h  |  4 ----
 drivers/clocksource/arm_arch_timer.c | 25 +++++++------------------
 2 files changed, 7 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 4a06d46def7ed..5502ea049b63c 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -45,13 +45,9 @@
 		(__wa && __wa->h) ? __wa->h : arch_timer_##h;		\
 	})
 
-extern struct static_key_false arch_timer_read_ool_enabled;
-#define needs_unstable_timer_counter_workaround() \
-	static_branch_unlikely(&arch_timer_read_ool_enabled)
 #else
 #define has_erratum_handler(h)			   false
 #define erratum_handler(h)			   (arch_timer_##h)
-#define needs_unstable_timer_counter_workaround()  false
 #endif
 
 enum arch_timer_erratum_match_type {
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index b2a88a64aab49..8f22976247c00 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -365,8 +365,6 @@ static u32 notrace sun50i_a64_read_cntv_tval_el0(void)
 DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround);
 EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
 
-DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
-EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
 
 static void erratum_set_next_event_tval_generic(const int access, unsigned long evt,
 						struct clock_event_device *clk)
@@ -537,12 +535,6 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
 			per_cpu(timer_unstable_counter_workaround, i) = wa;
 	}
 
-	/*
-	 * Use the locked version, as we're called from the CPU
-	 * hotplug framework. Otherwise, we end-up in deadlock-land.
-	 */
-	static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled);
-
 	/*
 	 * Don't use the vdso fastpath if errata require using the
 	 * out-of-line counter accessor. We may change our mind pretty
@@ -558,7 +550,7 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
 static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type,
 					    void *arg)
 {
-	const struct arch_timer_erratum_workaround *wa;
+	const struct arch_timer_erratum_workaround *wa, *__wa;
 	ate_match_fn_t match_fn = NULL;
 	bool local = false;
 
@@ -582,16 +574,13 @@ static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type t
 	if (!wa)
 		return;
 
-	if (needs_unstable_timer_counter_workaround()) {
-		const struct arch_timer_erratum_workaround *__wa;
-		__wa = __this_cpu_read(timer_unstable_counter_workaround);
-		if (__wa && wa != __wa)
-			pr_warn("Can't enable workaround for %s (clashes with %s\n)",
-				wa->desc, __wa->desc);
+	__wa = __this_cpu_read(timer_unstable_counter_workaround);
+	if (__wa && wa != __wa)
+		pr_warn("Can't enable workaround for %s (clashes with %s\n)",
+			wa->desc, __wa->desc);
 
-		if (__wa)
-			return;
-	}
+	if (__wa)
+		return;
 
 	arch_timer_enable_workaround(wa, local);
 	pr_info("Enabling %s workaround for %s\n",
-- 
GitLab


From 0ea415390cd345b7d09e8c9ebd4b68adfe873043 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 8 Apr 2019 16:49:07 +0100
Subject: [PATCH 1765/1861] clocksource/arm_arch_timer: Use
 arch_timer_read_counter to access stable counters

Instead of always going via arch_counter_get_cntvct_stable to access the
counter workaround, let's have arch_timer_read_counter point to the
right method.

For that, we need to track whether any CPU in the system has a
workaround for the counter. This is done by having an atomic variable
tracking this.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/arch_timer.h    | 14 ++++++--
 arch/arm64/include/asm/arch_timer.h  | 16 ++++++++--
 drivers/clocksource/arm_arch_timer.c | 48 +++++++++++++++++++++++++---
 3 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 3f0a0191f763d..4b66ecd6be99d 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -83,7 +83,7 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
-static inline u64 arch_counter_get_cntpct(void)
+static inline u64 __arch_counter_get_cntpct(void)
 {
 	u64 cval;
 
@@ -92,7 +92,12 @@ static inline u64 arch_counter_get_cntpct(void)
 	return cval;
 }
 
-static inline u64 arch_counter_get_cntvct(void)
+static inline u64 __arch_counter_get_cntpct_stable(void)
+{
+	return __arch_counter_get_cntpct();
+}
+
+static inline u64 __arch_counter_get_cntvct(void)
 {
 	u64 cval;
 
@@ -101,6 +106,11 @@ static inline u64 arch_counter_get_cntvct(void)
 	return cval;
 }
 
+static inline u64 __arch_counter_get_cntvct_stable(void)
+{
+	return __arch_counter_get_cntvct();
+}
+
 static inline u32 arch_timer_get_cntkctl(void)
 {
 	u32 cntkctl;
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 5502ea049b63c..48b2100f4aaad 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -174,18 +174,30 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
 	isb();
 }
 
-static inline u64 arch_counter_get_cntpct(void)
+static inline u64 __arch_counter_get_cntpct_stable(void)
 {
 	isb();
 	return arch_timer_reg_read_stable(cntpct_el0);
 }
 
-static inline u64 arch_counter_get_cntvct(void)
+static inline u64 __arch_counter_get_cntpct(void)
+{
+	isb();
+	return read_sysreg(cntpct_el0);
+}
+
+static inline u64 __arch_counter_get_cntvct_stable(void)
 {
 	isb();
 	return arch_timer_reg_read_stable(cntvct_el0);
 }
 
+static inline u64 __arch_counter_get_cntvct(void)
+{
+	isb();
+	return read_sysreg(cntvct_el0);
+}
+
 static inline int arch_timer_arch_init(void)
 {
 	return 0;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 8f22976247c00..27acc9eb0f7c4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -152,6 +152,26 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
 	return val;
 }
 
+static u64 arch_counter_get_cntpct_stable(void)
+{
+	return __arch_counter_get_cntpct_stable();
+}
+
+static u64 arch_counter_get_cntpct(void)
+{
+	return __arch_counter_get_cntpct();
+}
+
+static u64 arch_counter_get_cntvct_stable(void)
+{
+	return __arch_counter_get_cntvct_stable();
+}
+
+static u64 arch_counter_get_cntvct(void)
+{
+	return __arch_counter_get_cntvct();
+}
+
 /*
  * Default to cp15 based access because arm64 uses this function for
  * sched_clock() before DT is probed and the cp15 method is guaranteed
@@ -365,6 +385,7 @@ static u32 notrace sun50i_a64_read_cntv_tval_el0(void)
 DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround);
 EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
 
+static atomic_t timer_unstable_counter_workaround_in_use = ATOMIC_INIT(0);
 
 static void erratum_set_next_event_tval_generic(const int access, unsigned long evt,
 						struct clock_event_device *clk)
@@ -535,6 +556,9 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
 			per_cpu(timer_unstable_counter_workaround, i) = wa;
 	}
 
+	if (wa->read_cntvct_el0 || wa->read_cntpct_el0)
+		atomic_set(&timer_unstable_counter_workaround_in_use, 1);
+
 	/*
 	 * Don't use the vdso fastpath if errata require using the
 	 * out-of-line counter accessor. We may change our mind pretty
@@ -591,9 +615,15 @@ static bool arch_timer_this_cpu_has_cntvct_wa(void)
 {
 	return has_erratum_handler(read_cntvct_el0);
 }
+
+static bool arch_timer_counter_has_wa(void)
+{
+	return atomic_read(&timer_unstable_counter_workaround_in_use);
+}
 #else
 #define arch_timer_check_ool_workaround(t,a)		do { } while(0)
 #define arch_timer_this_cpu_has_cntvct_wa()		({false;})
+#define arch_timer_counter_has_wa()			({false;})
 #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
 
 static __always_inline irqreturn_t timer_handler(const int access,
@@ -942,12 +972,22 @@ static void __init arch_counter_register(unsigned type)
 
 	/* Register the CP15 based counter if we have one */
 	if (type & ARCH_TIMER_TYPE_CP15) {
+		u64 (*rd)(void);
+
 		if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
-		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI)
-			arch_timer_read_counter = arch_counter_get_cntvct;
-		else
-			arch_timer_read_counter = arch_counter_get_cntpct;
+		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
+			if (arch_timer_counter_has_wa())
+				rd = arch_counter_get_cntvct_stable;
+			else
+				rd = arch_counter_get_cntvct;
+		} else {
+			if (arch_timer_counter_has_wa())
+				rd = arch_counter_get_cntpct_stable;
+			else
+				rd = arch_counter_get_cntpct;
+		}
 
+		arch_timer_read_counter = rd;
 		clocksource_counter.archdata.vdso_direct = vdso_default;
 	} else {
 		arch_timer_read_counter = arch_counter_get_cntvct_mem;
-- 
GitLab


From a622b40035d16196bf19b2b33b854862595245fc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 30 Apr 2019 06:27:58 -0700
Subject: [PATCH 1766/1861] l2ip: fix possible use-after-free

Before taking a refcount on a rcu protected structure,
we need to make sure the refcount is not zero.

syzbot reported :

refcount_t: increment on 0; use-after-free.
WARNING: CPU: 1 PID: 23533 at lib/refcount.c:156 refcount_inc_checked lib/refcount.c:156 [inline]
WARNING: CPU: 1 PID: 23533 at lib/refcount.c:156 refcount_inc_checked+0x61/0x70 lib/refcount.c:154
Kernel panic - not syncing: panic_on_warn set ...
CPU: 1 PID: 23533 Comm: syz-executor.2 Not tainted 5.1.0-rc7+ #93
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 panic+0x2cb/0x65c kernel/panic.c:214
 __warn.cold+0x20/0x45 kernel/panic.c:571
 report_bug+0x263/0x2b0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:179 [inline]
 fixup_bug arch/x86/kernel/traps.c:174 [inline]
 do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272
 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291
 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973
RIP: 0010:refcount_inc_checked lib/refcount.c:156 [inline]
RIP: 0010:refcount_inc_checked+0x61/0x70 lib/refcount.c:154
Code: 1d 98 2b 2a 06 31 ff 89 de e8 db 2c 40 fe 84 db 75 dd e8 92 2b 40 fe 48 c7 c7 20 7a a1 87 c6 05 78 2b 2a 06 01 e8 7d d9 12 fe <0f> 0b eb c1 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 57 41
RSP: 0018:ffff888069f0fba8 EFLAGS: 00010286
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 000000000000f353 RSI: ffffffff815afcb6 RDI: ffffed100d3e1f67
RBP: ffff888069f0fbb8 R08: ffff88809b1845c0 R09: ffffed1015d23ef1
R10: ffffed1015d23ef0 R11: ffff8880ae91f787 R12: ffff8880a8f26968
R13: 0000000000000004 R14: dffffc0000000000 R15: ffff8880a49a6440
 l2tp_tunnel_inc_refcount net/l2tp/l2tp_core.h:240 [inline]
 l2tp_tunnel_get+0x250/0x580 net/l2tp/l2tp_core.c:173
 pppol2tp_connect+0xc00/0x1c70 net/l2tp/l2tp_ppp.c:702
 __sys_connect+0x266/0x330 net/socket.c:1808
 __do_sys_connect net/socket.c:1819 [inline]
 __se_sys_connect net/socket.c:1816 [inline]
 __x64_sys_connect+0x73/0xb0 net/socket.c:1816

Fixes: 54652eb12c1b ("l2tp: hold tunnel while looking up sessions in l2tp_netlink")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index aee33d1320184..52b5a2797c0c6 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -169,8 +169,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
 
 	rcu_read_lock_bh();
 	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
-		if (tunnel->tunnel_id == tunnel_id) {
-			l2tp_tunnel_inc_refcount(tunnel);
+		if (tunnel->tunnel_id == tunnel_id &&
+		    refcount_inc_not_zero(&tunnel->ref_count)) {
 			rcu_read_unlock_bh();
 
 			return tunnel;
@@ -190,8 +190,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
 
 	rcu_read_lock_bh();
 	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
-		if (++count > nth) {
-			l2tp_tunnel_inc_refcount(tunnel);
+		if (++count > nth &&
+		    refcount_inc_not_zero(&tunnel->ref_count)) {
 			rcu_read_unlock_bh();
 			return tunnel;
 		}
-- 
GitLab


From 8449eedaa1da6a51d67190c905b1b54243e095f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Sat, 27 Apr 2019 20:34:19 +0200
Subject: [PATCH 1767/1861] io_uring: fix handling SQEs requesting NOWAIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not all request types set REQ_F_FORCE_NONBLOCK when they needed async
punting; reverse logic instead and set REQ_F_NOWAIT if request mustn't
be punted.

Signed-off-by: Stefan Bühler <source@stbuehler.de>

Merged with my previous patch for this.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0e9fb2cb1984b..d5e23a6dd6aa2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -221,7 +221,7 @@ struct io_kiocb {
 	struct list_head	list;
 	unsigned int		flags;
 	refcount_t		refs;
-#define REQ_F_FORCE_NONBLOCK	1	/* inline submission attempt */
+#define REQ_F_NOWAIT		1	/* must not punt to workers */
 #define REQ_F_IOPOLL_COMPLETED	2	/* polled IO has completed */
 #define REQ_F_FIXED_FILE	4	/* ctx owns file */
 #define REQ_F_SEQ_PREV		8	/* sequential with previous */
@@ -774,10 +774,14 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
 	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
 	if (unlikely(ret))
 		return ret;
-	if (force_nonblock) {
+
+	/* don't allow async punt if RWF_NOWAIT was requested */
+	if (kiocb->ki_flags & IOCB_NOWAIT)
+		req->flags |= REQ_F_NOWAIT;
+
+	if (force_nonblock)
 		kiocb->ki_flags |= IOCB_NOWAIT;
-		req->flags |= REQ_F_FORCE_NONBLOCK;
-	}
+
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
 		if (!(kiocb->ki_flags & IOCB_DIRECT) ||
 		    !kiocb->ki_filp->f_op->iopoll)
@@ -1436,8 +1440,7 @@ restart:
 		struct sqe_submit *s = &req->submit;
 		const struct io_uring_sqe *sqe = s->sqe;
 
-		/* Ensure we clear previously set forced non-block flag */
-		req->flags &= ~REQ_F_FORCE_NONBLOCK;
+		/* Ensure we clear previously set non-block flag */
 		req->rw.ki_flags &= ~IOCB_NOWAIT;
 
 		ret = 0;
@@ -1623,7 +1626,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
 		goto out;
 
 	ret = __io_submit_sqe(ctx, req, s, true);
-	if (ret == -EAGAIN) {
+	if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
 		struct io_uring_sqe *sqe_copy;
 
 		sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
-- 
GitLab


From 1e84b97b7377bd0198f87b49ad3e396e84bf0458 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Wed, 24 Apr 2019 23:54:16 +0200
Subject: [PATCH 1768/1861] io_uring: fix notes on barriers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The application reading the CQ ring needs a barrier to pair with the
smp_store_release in io_commit_cqring, not the barrier after it.

Also a write barrier *after* writing something (but not *before*
writing anything interesting) doesn't order anything, so an smp_wmb()
after writing SQ tail is not needed.

Additionally consider reading SQ head and writing CQ tail in the notes.

Also add some clarifications how the various other fields in the ring
buffers are used.

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 110 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d5e23a6dd6aa2..7ab93e854eb21 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4,15 +4,28 @@
  * supporting fast/efficient IO.
  *
  * A note on the read/write ordering memory barriers that are matched between
- * the application and kernel side. When the application reads the CQ ring
- * tail, it must use an appropriate smp_rmb() to order with the smp_wmb()
- * the kernel uses after writing the tail. Failure to do so could cause a
- * delay in when the application notices that completion events available.
- * This isn't a fatal condition. Likewise, the application must use an
- * appropriate smp_wmb() both before writing the SQ tail, and after writing
- * the SQ tail. The first one orders the sqe writes with the tail write, and
- * the latter is paired with the smp_rmb() the kernel will issue before
- * reading the SQ tail on submission.
+ * the application and kernel side.
+ *
+ * After the application reads the CQ ring tail, it must use an
+ * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses
+ * before writing the tail (using smp_load_acquire to read the tail will
+ * do). It also needs a smp_mb() before updating CQ head (ordering the
+ * entry load(s) with the head store), pairing with an implicit barrier
+ * through a control-dependency in io_get_cqring (smp_store_release to
+ * store head will do). Failure to do so could lead to reading invalid
+ * CQ entries.
+ *
+ * Likewise, the application must use an appropriate smp_wmb() before
+ * writing the SQ tail (ordering SQ entry stores with the tail store),
+ * which pairs with smp_load_acquire in io_get_sqring (smp_store_release
+ * to store the tail will do). And it needs a barrier ordering the SQ
+ * head load before writing new SQ entries (smp_load_acquire to read
+ * head will do).
+ *
+ * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application
+ * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after*
+ * updating the SQ tail; a full memory barrier smp_mb() is needed
+ * between.
  *
  * Also see the examples in the liburing library:
  *
@@ -70,20 +83,108 @@ struct io_uring {
 	u32 tail ____cacheline_aligned_in_smp;
 };
 
+/*
+ * This data is shared with the application through the mmap at offset
+ * IORING_OFF_SQ_RING.
+ *
+ * The offsets to the member fields are published through struct
+ * io_sqring_offsets when calling io_uring_setup.
+ */
 struct io_sq_ring {
+	/*
+	 * Head and tail offsets into the ring; the offsets need to be
+	 * masked to get valid indices.
+	 *
+	 * The kernel controls head and the application controls tail.
+	 */
 	struct io_uring		r;
+	/*
+	 * Bitmask to apply to head and tail offsets (constant, equals
+	 * ring_entries - 1)
+	 */
 	u32			ring_mask;
+	/* Ring size (constant, power of 2) */
 	u32			ring_entries;
+	/*
+	 * Number of invalid entries dropped by the kernel due to
+	 * invalid index stored in array
+	 *
+	 * Written by the kernel, shouldn't be modified by the
+	 * application (i.e. get number of "new events" by comparing to
+	 * cached value).
+	 *
+	 * After a new SQ head value was read by the application this
+	 * counter includes all submissions that were dropped reaching
+	 * the new SQ head (and possibly more).
+	 */
 	u32			dropped;
+	/*
+	 * Runtime flags
+	 *
+	 * Written by the kernel, shouldn't be modified by the
+	 * application.
+	 *
+	 * The application needs a full memory barrier before checking
+	 * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
+	 */
 	u32			flags;
+	/*
+	 * Ring buffer of indices into array of io_uring_sqe, which is
+	 * mmapped by the application using the IORING_OFF_SQES offset.
+	 *
+	 * This indirection could e.g. be used to assign fixed
+	 * io_uring_sqe entries to operations and only submit them to
+	 * the queue when needed.
+	 *
+	 * The kernel modifies neither the indices array nor the entries
+	 * array.
+	 */
 	u32			array[];
 };
 
+/*
+ * This data is shared with the application through the mmap at offset
+ * IORING_OFF_CQ_RING.
+ *
+ * The offsets to the member fields are published through struct
+ * io_cqring_offsets when calling io_uring_setup.
+ */
 struct io_cq_ring {
+	/*
+	 * Head and tail offsets into the ring; the offsets need to be
+	 * masked to get valid indices.
+	 *
+	 * The application controls head and the kernel tail.
+	 */
 	struct io_uring		r;
+	/*
+	 * Bitmask to apply to head and tail offsets (constant, equals
+	 * ring_entries - 1)
+	 */
 	u32			ring_mask;
+	/* Ring size (constant, power of 2) */
 	u32			ring_entries;
+	/*
+	 * Number of completion events lost because the queue was full;
+	 * this should be avoided by the application by making sure
+	 * there are not more requests pending thatn there is space in
+	 * the completion queue.
+	 *
+	 * Written by the kernel, shouldn't be modified by the
+	 * application (i.e. get number of "new events" by comparing to
+	 * cached value).
+	 *
+	 * As completion events come in out of order this counter is not
+	 * ordered with any other data.
+	 */
 	u32			overflow;
+	/*
+	 * Ring buffer of completion events.
+	 *
+	 * The kernel writes completion events fresh every time they are
+	 * produced, so the application is allowed to modify pending
+	 * entries.
+	 */
 	struct io_uring_cqe	cqes[];
 };
 
-- 
GitLab


From 4f7067c3fb7f2974363a28c597a41949d971af02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Wed, 24 Apr 2019 23:54:17 +0200
Subject: [PATCH 1769/1861] io_uring: remove unnecessary barrier before
 wq_has_sleeper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

wq_has_sleeper has a full barrier internally. The smp_rmb barrier in
io_uring_poll synchronizes with it.

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7ab93e854eb21..bb71b7f00bb38 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -418,12 +418,6 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
 		/* order cqe stores with ring update */
 		smp_store_release(&ring->r.tail, ctx->cached_cq_tail);
 
-		/*
-		 * Write sider barrier of tail update, app has read side. See
-		 * comment at the top of this file.
-		 */
-		smp_wmb();
-
 		if (wq_has_sleeper(&ctx->cq_wait)) {
 			wake_up_interruptible(&ctx->cq_wait);
 			kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
@@ -2677,7 +2671,10 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 	__poll_t mask = 0;
 
 	poll_wait(file, &ctx->cq_wait, wait);
-	/* See comment at the top of this file */
+	/*
+	 * synchronizes with barrier from wq_has_sleeper call in
+	 * io_commit_cqring
+	 */
 	smp_rmb();
 	if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head !=
 	    ctx->sq_ring->ring_entries)
-- 
GitLab


From 115e12e58dbc055e98c965e3255aed7b20214f95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Wed, 24 Apr 2019 23:54:18 +0200
Subject: [PATCH 1770/1861] io_uring: remove unnecessary barrier before reading
 cq head
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The memory operations before reading cq head are unrelated and we
don't care about their order.

Document that the control dependency in combination with READ_ONCE and
WRITE_ONCE forms a barrier we need.

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bb71b7f00bb38..3671a654a1469 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -431,8 +431,11 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 	unsigned tail;
 
 	tail = ctx->cached_cq_tail;
-	/* See comment at the top of the file */
-	smp_rmb();
+	/*
+	 * writes to the cq entry need to come after reading head; the
+	 * control dependency is enough as we're using WRITE_ONCE to
+	 * fill the cq entry
+	 */
 	if (tail - READ_ONCE(ring->r.head) == ring->ring_entries)
 		return NULL;
 
-- 
GitLab


From 9e4c15a3939448d2ea9b9bf59561183bbe3fdc49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Wed, 24 Apr 2019 23:54:19 +0200
Subject: [PATCH 1771/1861] io_uring: remove unnecessary barrier after updating
 SQ head
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no operation afterwards to order with.

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3671a654a1469..d3c57ee233fe7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1798,12 +1798,6 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
 		 * write new data to them.
 		 */
 		smp_store_release(&ring->r.head, ctx->cached_sq_head);
-
-		/*
-		 * write side barrier of head update, app has read side. See
-		 * comment at the top of this file
-		 */
-		smp_wmb();
 	}
 }
 
-- 
GitLab


From 82ab082c0e2f8592c2ff6b2ab99a92d8406c8c2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Wed, 24 Apr 2019 23:54:20 +0200
Subject: [PATCH 1772/1861] io_uring: remove unnecessary barrier before reading
 SQ tail
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no operation before to order with.

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d3c57ee233fe7..662f1c070c8cc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1831,8 +1831,6 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
 	 *    though the application is the one updating it.
 	 */
 	head = ctx->cached_sq_head;
-	/* See comment at the top of this file */
-	smp_rmb();
 	/* make sure SQ entry isn't read before tail */
 	if (head == smp_load_acquire(&ring->r.tail))
 		return false;
-- 
GitLab


From b841f19524a16cd93a39f9306191f85c549a2bc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Wed, 24 Apr 2019 23:54:21 +0200
Subject: [PATCH 1773/1861] io_uring: remove unnecessary barrier after
 incrementing dropped counter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

smp_store_release in io_commit_sqring already orders the store to
dropped before the update to SQ head.

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 662f1c070c8cc..2ebc33cc907bf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1846,8 +1846,6 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
 	/* drop invalid entries */
 	ctx->cached_sq_head++;
 	ring->dropped++;
-	/* See comment at the top of this file */
-	smp_wmb();
 	return false;
 }
 
-- 
GitLab


From 62977281a6384d3904c02272a638cc3ac3bac54d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20B=C3=BChler?= <source@stbuehler.de>
Date: Wed, 24 Apr 2019 23:54:22 +0200
Subject: [PATCH 1774/1861] io_uring: remove unnecessary barrier after
 unsetting IORING_SQ_NEED_WAKEUP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no operation to order with afterwards, and removing the flag is
not critical in any way.

There will always be a "race condition" where the application will
trigger IORING_ENTER_SQ_WAKEUP when it isn't actually needed.

Signed-off-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2ebc33cc907bf..77b247b5d10b8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1969,13 +1969,11 @@ static int io_sq_thread(void *data)
 				finish_wait(&ctx->sqo_wait, &wait);
 
 				ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
-				smp_wmb();
 				continue;
 			}
 			finish_wait(&ctx->sqo_wait, &wait);
 
 			ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
-			smp_wmb();
 		}
 
 		i = 0;
-- 
GitLab


From b51ce3744f115850166f3d6c292b9c8cb849ad4f Mon Sep 17 00:00:00 2001
From: Gary Hook <Gary.Hook@amd.com>
Date: Mon, 29 Apr 2019 22:22:58 +0000
Subject: [PATCH 1775/1861] x86/mm/mem_encrypt: Disable all instrumentation for
 early SME setup

Enablement of AMD's Secure Memory Encryption feature is determined very
early after start_kernel() is entered. Part of this procedure involves
scanning the command line for the parameter 'mem_encrypt'.

To determine intended state, the function sme_enable() uses library
functions cmdline_find_option() and strncmp(). Their use occurs early
enough such that it cannot be assumed that any instrumentation subsystem
is initialized.

For example, making calls to a KASAN-instrumented function before KASAN
is set up will result in the use of uninitialized memory and a boot
failure.

When AMD's SME support is enabled, conditionally disable instrumentation
of these dependent functions in lib/string.c and arch/x86/lib/cmdline.c.

 [ bp: Get rid of intermediary nostackp var and cleanup whitespace. ]

Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption")
Reported-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Boris Brezillon <bbrezillon@kernel.org>
Cc: Coly Li <colyli@suse.de>
Cc: "dave.hansen@linux.intel.com" <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: "luto@kernel.org" <luto@kernel.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "mingo@redhat.com" <mingo@redhat.com>
Cc: "peterz@infradead.org" <peterz@infradead.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/155657657552.7116.18363762932464011367.stgit@sosrh3.amd.com
---
 arch/x86/lib/Makefile | 12 ++++++++++++
 lib/Makefile          | 11 +++++++++++
 2 files changed, 23 insertions(+)

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 140e61843a079..3cb3af51ec897 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -6,6 +6,18 @@
 # Produces uninteresting flaky coverage.
 KCOV_INSTRUMENT_delay.o	:= n
 
+# Early boot use of cmdline; don't instrument it
+ifdef CONFIG_AMD_MEM_ENCRYPT
+KCOV_INSTRUMENT_cmdline.o := n
+KASAN_SANITIZE_cmdline.o  := n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_cmdline.o = -pg
+endif
+
+CFLAGS_cmdline.o := $(call cc-option, -fno-stack-protector)
+endif
+
 inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
 inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
 quiet_cmd_inat_tables = GEN     $@
diff --git a/lib/Makefile b/lib/Makefile
index 3b08673e8881a..18c2be516ab48 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -17,6 +17,17 @@ KCOV_INSTRUMENT_list_debug.o := n
 KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
 
+# Early boot use of cmdline, don't instrument it
+ifdef CONFIG_AMD_MEM_ENCRYPT
+KASAN_SANITIZE_string.o := n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_string.o = -pg
+endif
+
+CFLAGS_string.o := $(call cc-option, -fno-stack-protector)
+endif
+
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o timerqueue.o xarray.o \
 	 idr.o int_sqrt.o extable.o \
-- 
GitLab


From 2c2a2fb1e2a9256714338875bede6b7cbd4b9542 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 30 Apr 2019 11:18:21 +0200
Subject: [PATCH 1776/1861] Revert "ACPICA: Clear status of GPEs before
 enabling them"

Revert commit c8b1917c8987 ("ACPICA: Clear status of GPEs before
enabling them") that causes problems with Thunderbolt controllers
to occur if a dock device is connected at init time (the xhci_hcd
and thunderbolt modules crash which prevents peripherals connected
through them from working).

Commit c8b1917c8987 effectively causes commit ecc1165b8b74 ("ACPICA:
Dispatch active GPEs at init time") to get undone, so the problem
addressed by commit ecc1165b8b74 appears again as a result of it.

Fixes: c8b1917c8987 ("ACPICA: Clear status of GPEs before enabling them")
Link: https://lore.kernel.org/lkml/s5hy33siofw.wl-tiwai@suse.de/T/#u
Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1132943
Reported-by: Michael Hirmke <opensuse@mike.franken.de>
Reported-by: Takashi Iwai <tiwai@suse.de>
Cc: 4.17+ <stable@vger.kernel.org> # 4.17+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/evgpe.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 5e9d7348c16f7..62d3aa74277b4 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -81,12 +81,8 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info)
 
 	ACPI_FUNCTION_TRACE(ev_enable_gpe);
 
-	/* Clear the GPE status */
-	status = acpi_hw_clear_gpe(gpe_event_info);
-	if (ACPI_FAILURE(status))
-		return_ACPI_STATUS(status);
-
 	/* Enable the requested GPE */
+
 	status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE);
 	return_ACPI_STATUS(status);
 }
-- 
GitLab


From 8764ed55c9705e426d889ff16c26f398bba70b9b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Mon, 29 Apr 2019 07:04:15 -0700
Subject: [PATCH 1777/1861] KVM: x86: Whitelist port 0x7e for pre-incrementing
 %rip

KVM's recent bug fix to update %rip after emulating I/O broke userspace
that relied on the previous behavior of incrementing %rip prior to
exiting to userspace.  When running a Windows XP guest on AMD hardware,
Qemu may patch "OUT 0x7E" instructions in reaction to the OUT itself.
Because KVM's old behavior was to increment %rip before exiting to
userspace to handle the I/O, Qemu manually adjusted %rip to account for
the OUT instruction.

Arguably this is a userspace bug as KVM requires userspace to re-enter
the kernel to complete instruction emulation before taking any other
actions.  That being said, this is a bit of a grey area and breaking
userspace that has worked for many years is bad.

Pre-increment %rip on OUT to port 0x7e before exiting to userspace to
hack around the issue.

Fixes: 45def77ebf79e ("KVM: x86: update %rip after emulating IO")
Reported-by: Simon Becherer <simon@becherer.de>
Reported-and-tested-by: Iakov Karpov <srid@rkmail.ru>
Reported-by: Gabriele Balducci <balducci@units.it>
Reported-by: Antti Antinoja <reader@fennosys.fi>
Cc: stable@vger.kernel.org
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/kvm.h |  1 +
 arch/x86/kvm/x86.c              | 21 +++++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index dabfcf7c3941a..7a0e64ccd6ff5 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -381,6 +381,7 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
 #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE	(1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP	(1 << 3)
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9482cb36b92ac..57621313deadc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6539,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
 
+static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pio.count = 0;
+	return 1;
+}
+
 static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pio.count = 0;
@@ -6555,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
 	unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
 					    size, port, &val, 1);
+	if (ret)
+		return ret;
 
-	if (!ret) {
+	/*
+	 * Workaround userspace that relies on old KVM behavior of %rip being
+	 * incremented prior to exiting to userspace to handle "OUT 0x7e".
+	 */
+	if (port == 0x7e &&
+	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
+		vcpu->arch.complete_userspace_io =
+			complete_fast_pio_out_port_0x7e;
+		kvm_skip_emulated_instruction(vcpu);
+	} else {
 		vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
 		vcpu->arch.complete_userspace_io = complete_fast_pio_out;
 	}
-	return ret;
+	return 0;
 }
 
 static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
-- 
GitLab


From 0699c64a4be6e4a6137240379a1f82c752e663d8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 30 Apr 2019 19:33:26 +0200
Subject: [PATCH 1778/1861] x86/kvm/mmu: reset MMU context when 32-bit guest
 switches PAE

Commit 47c42e6b4192 ("KVM: x86: fix handling of role.cr4_pae and rename it
to 'gpte_size'") introduced a regression: 32-bit PAE guests stopped
working. The issue appears to be: when guest switches (enables) PAE we need
to re-initialize MMU context (set context->root_level, do
reset_rsvds_bits_mask(), ...) but init_kvm_tdp_mmu() doesn't do that
because we threw away is_pae(vcpu) flag from mmu role. Restore it to
kvm_mmu_extended_role (as we now don't need it in base role) to fix
the issue.

Fixes: 47c42e6b4192 ("KVM: x86: fix handling of role.cr4_pae and rename it to 'gpte_size'")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/mmu.c              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a9d03af340307..c79abe7ca093c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -295,6 +295,7 @@ union kvm_mmu_extended_role {
 		unsigned int valid:1;
 		unsigned int execonly:1;
 		unsigned int cr0_pg:1;
+		unsigned int cr4_pae:1;
 		unsigned int cr4_pse:1;
 		unsigned int cr4_pke:1;
 		unsigned int cr4_smap:1;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e10962dfc2032..d9c7b45d231f1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4781,6 +4781,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
 	union kvm_mmu_extended_role ext = {0};
 
 	ext.cr0_pg = !!is_paging(vcpu);
+	ext.cr4_pae = !!is_pae(vcpu);
 	ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
 	ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
 	ext.cr4_pse = !!is_pse(vcpu);
-- 
GitLab


From 76d58e0f07ec203bbdfcaabd9a9fc10a5a3ed5ea Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 17 Apr 2019 15:28:44 +0200
Subject: [PATCH 1779/1861] KVM: fix KVM_CLEAR_DIRTY_LOG for memory slots of
 unaligned size

If a memory slot's size is not a multiple of 64 pages (256K), then
the KVM_CLEAR_DIRTY_LOG API is unusable: clearing the final 64 pages
either requires the requested page range to go beyond memslot->npages,
or requires log->num_pages to be unaligned, and kvm_clear_dirty_log_protect
requires log->num_pages to be both in range and aligned.

To allow this case, allow log->num_pages not to be a multiple of 64 if
it ends exactly on the last page of the slot.

Reported-by: Peter Xu <peterx@redhat.com>
Fixes: 98938aa8edd6 ("KVM: validate userspace input in kvm_clear_dirty_log_protect()", 2019-01-02)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt            | 5 +++--
 tools/testing/selftests/kvm/dirty_log_test.c | 9 ++++++---
 virt/kvm/kvm_main.c                          | 7 ++++---
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7f3ebc9e7cee8..64b38dfcc2439 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3830,8 +3830,9 @@ The ioctl clears the dirty status of pages in a memory slot, according to
 the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
 field.  Bit 0 of the bitmap corresponds to page "first_page" in the
 memory slot, and num_pages is the size in bits of the input bitmap.
-Both first_page and num_pages must be a multiple of 64.  For each bit
-that is set in the input bitmap, the corresponding page is marked "clean"
+first_page must be a multiple of 64; num_pages must also be a multiple of
+64 unless first_page + num_pages is the size of the memory slot.  For each
+bit that is set in the input bitmap, the corresponding page is marked "clean"
 in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
 (for example via write-protection, or by clearing the dirty bit in
 a page table entry).
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 4715cfba20dce..93f99c6b7d79e 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -288,8 +288,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 #endif
 	max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
 	guest_page_size = (1ul << guest_page_shift);
-	/* 1G of guest page sized pages */
-	guest_num_pages = (1ul << (30 - guest_page_shift));
+	/*
+	 * A little more than 1G of guest page sized pages.  Cover the
+	 * case where the size is not aligned to 64 pages.
+	 */
+	guest_num_pages = (1ul << (30 - guest_page_shift)) + 3;
 	host_page_size = getpagesize();
 	host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
 			 !!((guest_num_pages * guest_page_size) % host_page_size);
@@ -359,7 +362,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
 #ifdef USE_CLEAR_DIRTY_LOG
 		kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
-				       DIV_ROUND_UP(host_num_pages, 64) * 64);
+				       host_num_pages);
 #endif
 		vm_dirty_log_verify(bmap);
 		iteration++;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dc8edc97ba850..a704d1f9bd962 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1240,7 +1240,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
 	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
 		return -EINVAL;
 
-	if ((log->first_page & 63) || (log->num_pages & 63))
+	if (log->first_page & 63)
 		return -EINVAL;
 
 	slots = __kvm_memslots(kvm, as_id);
@@ -1253,8 +1253,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
 	n = kvm_dirty_bitmap_bytes(memslot);
 
 	if (log->first_page > memslot->npages ||
-	    log->num_pages > memslot->npages - log->first_page)
-			return -EINVAL;
+	    log->num_pages > memslot->npages - log->first_page ||
+	    (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63)))
+	    return -EINVAL;
 
 	*flush = false;
 	dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
-- 
GitLab


From b904cb8dff824b79233e82c078837627ebd52717 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 25 Apr 2019 19:01:09 -0700
Subject: [PATCH 1780/1861] KVM: lapic: Check for in-kernel LAPIC before
 deferencing apic pointer

...to avoid dereferencing a null pointer when querying the per-vCPU
timer advance.

Fixes: 39497d7660d98 ("KVM: lapic: Track lapic timer advance per vCPU")
Reported-by: syzbot+f7e65445a40d3e0e4ebf@syzkaller.appspotmail.com
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 3 ---
 arch/x86/kvm/x86.c   | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e0fa6fc0b2d89..bd13fdddbdc4a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1507,9 +1507,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
 	u64 guest_tsc, tsc_deadline, ns;
 
-	if (!lapic_in_kernel(vcpu))
-		return;
-
 	if (apic->lapic_timer.expired_tscdeadline == 0)
 		return;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 57621313deadc..b5edc8e3ce1df 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7894,7 +7894,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	trace_kvm_entry(vcpu->vcpu_id);
-	if (vcpu->arch.apic->lapic_timer.timer_advance_ns)
+	if (lapic_in_kernel(vcpu) &&
+	    vcpu->arch.apic->lapic_timer.timer_advance_ns)
 		wait_lapic_expire(vcpu);
 	guest_enter_irqoff();
 
-- 
GitLab


From eba3afde1cea7dbd7881683232f2a85e2ed86bfe Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 26 Apr 2019 15:27:11 +0200
Subject: [PATCH 1781/1861] KVM: selftests: make hyperv_cpuid test pass on AMD

Enlightened VMCS is only supported on Intel CPUs but the test shouldn't
fail completely.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 264425f75806b..9a21e912097c4 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -141,7 +141,13 @@ int main(int argc, char *argv[])
 
 	free(hv_cpuid_entries);
 
-	vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+
+	if (rv) {
+		fprintf(stderr,
+			"Enlightened VMCS is unsupported, skip related test\n");
+		goto vm_free;
+	}
 
 	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
 	if (!hv_cpuid_entries)
@@ -151,6 +157,7 @@ int main(int argc, char *argv[])
 
 	free(hv_cpuid_entries);
 
+vm_free:
 	kvm_vm_free(vm);
 
 	return 0;
-- 
GitLab


From e8ab8d24b488632d07ce5ddb261f1d454114415b Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 17 Jan 2019 11:55:58 -0800
Subject: [PATCH 1782/1861] KVM: nVMX: Fix size checks in vmx_set_nested_state

The size checks in vmx_nested_state are wrong because the calculations
are made based on the size of a pointer to a struct kvm_nested_state
rather than the size of a struct kvm_nested_state.

Reported-by: Felix Wilhelm  <fwilhelm@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Drew Schmitt <dasch@google.com>
Reviewed-by: Marc Orr <marcorr@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Fixes: 8fcc4b5923af5de58b80b53a069453b135693304
Cc: stable@ver.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6401eb7ef19ce..0c601d079cd20 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5423,7 +5423,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 		return ret;
 
 	/* Empty 'VMXON' state is permitted */
-	if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+	if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
 		return 0;
 
 	if (kvm_state->vmx.vmcs_pa != -1ull) {
@@ -5467,7 +5467,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 	    vmcs12->vmcs_link_pointer != -1ull) {
 		struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
 
-		if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
+		if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12))
 			return -EINVAL;
 
 		if (copy_from_user(shadow_vmcs12,
-- 
GitLab


From 0e2338749192ce0e52e7174c5352f627632f478a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 28 Apr 2019 12:22:25 -0700
Subject: [PATCH 1783/1861] ipv6: fix races in ip6_dst_destroy()

We had many syzbot reports that seem to be caused by use-after-free
of struct fib6_info.

ip6_dst_destroy(), fib6_drop_pcpu_from() and rt6_remove_exception()
are writers vs rt->from, and use non consistent synchronization among
themselves.

Switching to xchg() will solve the issues with no possible
lockdep issues.

BUG: KASAN: user-memory-access in atomic_dec_and_test include/asm-generic/atomic-instrumented.h:747 [inline]
BUG: KASAN: user-memory-access in fib6_info_release include/net/ip6_fib.h:294 [inline]
BUG: KASAN: user-memory-access in fib6_info_release include/net/ip6_fib.h:292 [inline]
BUG: KASAN: user-memory-access in fib6_drop_pcpu_from net/ipv6/ip6_fib.c:927 [inline]
BUG: KASAN: user-memory-access in fib6_purge_rt+0x4f6/0x670 net/ipv6/ip6_fib.c:960
Write of size 4 at addr 0000000000ffffb4 by task syz-executor.1/7649

CPU: 0 PID: 7649 Comm: syz-executor.1 Not tainted 5.1.0-rc6+ #183
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 kasan_report.cold+0x5/0x40 mm/kasan/report.c:321
 check_memory_region_inline mm/kasan/generic.c:185 [inline]
 check_memory_region+0x123/0x190 mm/kasan/generic.c:191
 kasan_check_write+0x14/0x20 mm/kasan/common.c:108
 atomic_dec_and_test include/asm-generic/atomic-instrumented.h:747 [inline]
 fib6_info_release include/net/ip6_fib.h:294 [inline]
 fib6_info_release include/net/ip6_fib.h:292 [inline]
 fib6_drop_pcpu_from net/ipv6/ip6_fib.c:927 [inline]
 fib6_purge_rt+0x4f6/0x670 net/ipv6/ip6_fib.c:960
 fib6_del_route net/ipv6/ip6_fib.c:1813 [inline]
 fib6_del+0xac2/0x10a0 net/ipv6/ip6_fib.c:1844
 fib6_clean_node+0x3a8/0x590 net/ipv6/ip6_fib.c:2006
 fib6_walk_continue+0x495/0x900 net/ipv6/ip6_fib.c:1928
 fib6_walk+0x9d/0x100 net/ipv6/ip6_fib.c:1976
 fib6_clean_tree+0xe0/0x120 net/ipv6/ip6_fib.c:2055
 __fib6_clean_all+0x118/0x2a0 net/ipv6/ip6_fib.c:2071
 fib6_clean_all+0x2b/0x40 net/ipv6/ip6_fib.c:2082
 rt6_sync_down_dev+0x134/0x150 net/ipv6/route.c:4057
 rt6_disable_ip+0x27/0x5f0 net/ipv6/route.c:4062
 addrconf_ifdown+0xa2/0x1220 net/ipv6/addrconf.c:3705
 addrconf_notify+0x19a/0x2260 net/ipv6/addrconf.c:3630
 notifier_call_chain+0xc7/0x240 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2e/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1753
 call_netdevice_notifiers_extack net/core/dev.c:1765 [inline]
 call_netdevice_notifiers net/core/dev.c:1779 [inline]
 dev_close_many+0x33f/0x6f0 net/core/dev.c:1522
 rollback_registered_many+0x43b/0xfd0 net/core/dev.c:8177
 rollback_registered+0x109/0x1d0 net/core/dev.c:8242
 unregister_netdevice_queue net/core/dev.c:9289 [inline]
 unregister_netdevice_queue+0x1ee/0x2c0 net/core/dev.c:9282
 unregister_netdevice include/linux/netdevice.h:2658 [inline]
 __tun_detach+0xd5b/0x1000 drivers/net/tun.c:727
 tun_detach drivers/net/tun.c:744 [inline]
 tun_chr_close+0xe0/0x180 drivers/net/tun.c:3443
 __fput+0x2e5/0x8d0 fs/file_table.c:278
 ____fput+0x16/0x20 fs/file_table.c:309
 task_work_run+0x14a/0x1c0 kernel/task_work.c:113
 exit_task_work include/linux/task_work.h:22 [inline]
 do_exit+0x90a/0x2fa0 kernel/exit.c:876
 do_group_exit+0x135/0x370 kernel/exit.c:980
 __do_sys_exit_group kernel/exit.c:991 [inline]
 __se_sys_exit_group kernel/exit.c:989 [inline]
 __x64_sys_exit_group+0x44/0x50 kernel/exit.c:989
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x458da9
Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffeafc2a6a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
RAX: ffffffffffffffda RBX: 000000000000001c RCX: 0000000000458da9
RDX: 0000000000412a80 RSI: 0000000000a54ef0 RDI: 0000000000000043
RBP: 00000000004be552 R08: 000000000000000c R09: 000000000004c0d1
R10: 0000000002341940 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00007ffeafc2a7f0 R14: 000000000004c065 R15: 00007ffeafc2a800

Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: David Ahern <dsahern@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Wei Wang <weiwan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 4 +---
 net/ipv6/route.c   | 9 ++-------
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6613d8dbb0e5a..91247a6fc67ff 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -921,9 +921,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
 		if (pcpu_rt) {
 			struct fib6_info *from;
 
-			from = rcu_dereference_protected(pcpu_rt->from,
-					     lockdep_is_held(&table->tb6_lock));
-			rcu_assign_pointer(pcpu_rt->from, NULL);
+			from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
 			fib6_info_release(from);
 		}
 	}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b4899f0de0d06..2cc166bc978d9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -379,11 +379,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		in6_dev_put(idev);
 	}
 
-	rcu_read_lock();
-	from = rcu_dereference(rt->from);
-	rcu_assign_pointer(rt->from, NULL);
+	from = xchg((__force struct fib6_info **)&rt->from, NULL);
 	fib6_info_release(from);
-	rcu_read_unlock();
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -1288,9 +1285,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
 	/* purge completely the exception to allow releasing the held resources:
 	 * some [sk] cache may keep the dst around for unlimited time
 	 */
-	from = rcu_dereference_protected(rt6_ex->rt6i->from,
-					 lockdep_is_held(&rt6_exception_lock));
-	rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
+	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
 	fib6_info_release(from);
 	dst_dev_put(&rt6_ex->rt6i->dst);
 
-- 
GitLab


From 23583f7795025e3c783b680d906509366b0906ad Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 30 Apr 2019 10:52:29 -0500
Subject: [PATCH 1784/1861] ACPI / property: fix handling of data_nodes in
 acpi_get_next_subnode()

When the DSDT tables expose devices with subdevices and a set of
hierarchical _DSD properties, the data returned by
acpi_get_next_subnode() is incorrect, with the results suggesting a bad
pointer assignment. The parser works fine with device_nodes or
data_nodes, but not with a combination of the two.

The problem is traced to an invalid pointer used when jumping from
handling device_nodes to data nodes. The existing code looks for data
nodes below the last subdevice found instead of the common root. Fix
by forcing the acpi_device pointer to be derived from the same fwnode
for the two types of subnodes.

This same problem of handling device and data nodes was already fixed
in a similar way by 'commit bf4703fdd166 ("ACPI / property: fix data
node parsing in acpi_get_next_subnode()")' but broken later by 'commit
34055190b19 ("ACPI / property: Add fwnode_get_next_child_node()")', so
this should probably go to linux-stable all the way to 4.12

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 8832d0e13a72d..9d460a859be07 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1032,6 +1032,14 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 		const struct acpi_data_node *data = to_acpi_data_node(fwnode);
 		struct acpi_data_node *dn;
 
+		/*
+		 * We can have a combination of device and data nodes, e.g. with
+		 * hierarchical _DSD properties. Make sure the adev pointer is
+		 * restored before going through data nodes, otherwise we will
+		 * be looking for data_nodes below the last device found instead
+		 * of the common fwnode shared by device_nodes and data_nodes.
+		 */
+		adev = to_acpi_device_node(fwnode);
 		if (adev)
 			head = &adev->data.subnodes;
 		else if (data)
-- 
GitLab


From 5c8b0b54db22c54f2aec991b388f550d3a927f26 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 30 Apr 2019 10:16:07 -0600
Subject: [PATCH 1785/1861] io_uring: have submission side sqe errors post a
 cqe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we only post a cqe if we get an error OUTSIDE of submission.
For submission, we return the error directly through io_uring_enter().
This is a bit awkward for applications, and it makes more sense to
always post a cqe with an error, if the error happens on behalf of an
sqe.

This changes submission behavior a bit. io_uring_enter() returns -ERROR
for an error, and > 0 for number of sqes submitted. Before this change,
if you wanted to submit 8 entries and had an error on the 5th entry,
io_uring_enter() would return 4 (for number of entries successfully
submitted) and rewind the sqring. The application would then have to
peek at the sqring and figure out what was wrong with the head sqe, and
then skip it itself. With this change, we'll return 5 since we did
consume 5 sqes, and the last sqe (with the error) will result in a cqe
being posted with the error.

This makes the logic easier to handle in the application, and it cleans
up the submission part.

Suggested-by: Stefan Bühler <source@stbuehler.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 77b247b5d10b8..0a894d7baceb0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1801,14 +1801,6 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
 	}
 }
 
-/*
- * Undo last io_get_sqring()
- */
-static void io_drop_sqring(struct io_ring_ctx *ctx)
-{
-	ctx->cached_sq_head--;
-}
-
 /*
  * Fetch an sqe, if one is available. Note that s->sqe will point to memory
  * that is mapped by userspace. This means that care needs to be taken to
@@ -2018,7 +2010,7 @@ static int io_sq_thread(void *data)
 static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 {
 	struct io_submit_state state, *statep = NULL;
-	int i, ret = 0, submit = 0;
+	int i, submit = 0;
 
 	if (to_submit > IO_PLUG_THRESHOLD) {
 		io_submit_state_start(&state, ctx, to_submit);
@@ -2027,6 +2019,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 
 	for (i = 0; i < to_submit; i++) {
 		struct sqe_submit s;
+		int ret;
 
 		if (!io_get_sqring(ctx, &s))
 			break;
@@ -2034,21 +2027,18 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 		s.has_user = true;
 		s.needs_lock = false;
 		s.needs_fixed_file = false;
+		submit++;
 
 		ret = io_submit_sqe(ctx, &s, statep);
-		if (ret) {
-			io_drop_sqring(ctx);
-			break;
-		}
-
-		submit++;
+		if (ret)
+			io_cqring_add_event(ctx, s.sqe->user_data, ret, 0);
 	}
 	io_commit_sqring(ctx);
 
 	if (statep)
 		io_submit_state_end(statep);
 
-	return submit ? submit : ret;
+	return submit;
 }
 
 static unsigned io_cqring_events(struct io_cq_ring *ring)
@@ -2779,24 +2769,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		mutex_lock(&ctx->uring_lock);
 		submitted = io_ring_submit(ctx, to_submit);
 		mutex_unlock(&ctx->uring_lock);
-
-		if (submitted < 0)
-			goto out_ctx;
 	}
 	if (flags & IORING_ENTER_GETEVENTS) {
 		unsigned nr_events = 0;
 
 		min_complete = min(min_complete, ctx->cq_entries);
 
-		/*
-		 * The application could have included the 'to_submit' count
-		 * in how many events it wanted to wait for. If we failed to
-		 * submit the desired count, we may need to adjust the number
-		 * of events to poll/wait for.
-		 */
-		if (submitted < to_submit)
-			min_complete = min_t(unsigned, submitted, min_complete);
-
 		if (ctx->flags & IORING_SETUP_IOPOLL) {
 			mutex_lock(&ctx->uring_lock);
 			ret = io_iopoll_check(ctx, &nr_events, min_complete);
-- 
GitLab


From 799381e49b4e7b0177664cdbc54a2de8b41647a8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 28 Apr 2019 18:10:39 -0700
Subject: [PATCH 1786/1861] Documentation: fix netdev-FAQ.rst markup warning

Fix ReST underline warning:

./Documentation/networking/netdev-FAQ.rst:135: WARNING: Title underline too short.

Q: I made changes to only a few patches in a patch series should I resend only those changed?
--------------------------------------------------------------------------------------------

Fixes: ffa91253739c ("Documentation: networking: Update netdev-FAQ regarding patches")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdev-FAQ.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 8c7a713cf657a..642fa963be3cf 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -132,7 +132,7 @@ version that should be applied. If there is any doubt, the maintainer
 will reply and ask what should be done.
 
 Q: I made changes to only a few patches in a patch series should I resend only those changed?
---------------------------------------------------------------------------------------------
+---------------------------------------------------------------------------------------------
 A: No, please resend the entire patch series and make sure you do number your
 patches such that it is clear this is the latest and greatest set of patches
 that can be applied.
-- 
GitLab


From 37e9c087c81447cebd4ca022226829e319b0e280 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 29 Apr 2019 07:51:44 +0200
Subject: [PATCH 1787/1861] stmmac: pci: Fix typo in IOT2000 comment

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index cc1e887e47b50..26db6aa002d19 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -160,7 +160,7 @@ static const struct dmi_system_id quark_pci_dmi[] = {
 		.driver_data = (void *)&galileo_stmmac_dmi_data,
 	},
 	/*
-	 * There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040.
+	 * There are 2 types of SIMATIC IOT2000: IOT2020 and IOT2040.
 	 * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
 	 * has only one pci network device while other asset tags are
 	 * for IOT2040 which has two.
-- 
GitLab


From fbd019737d71e405f86549fd738f81e2ff3dd073 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 29 Apr 2019 14:16:19 +0800
Subject: [PATCH 1788/1861] sctp: avoid running the sctp state machine
 recursively

Ying triggered a call trace when doing an asconf testing:

  BUG: scheduling while atomic: swapper/12/0/0x10000100
  Call Trace:
   <IRQ>  [<ffffffffa4375904>] dump_stack+0x19/0x1b
   [<ffffffffa436fcaf>] __schedule_bug+0x64/0x72
   [<ffffffffa437b93a>] __schedule+0x9ba/0xa00
   [<ffffffffa3cd5326>] __cond_resched+0x26/0x30
   [<ffffffffa437bc4a>] _cond_resched+0x3a/0x50
   [<ffffffffa3e22be8>] kmem_cache_alloc_node+0x38/0x200
   [<ffffffffa423512d>] __alloc_skb+0x5d/0x2d0
   [<ffffffffc0995320>] sctp_packet_transmit+0x610/0xa20 [sctp]
   [<ffffffffc098510e>] sctp_outq_flush+0x2ce/0xc00 [sctp]
   [<ffffffffc098646c>] sctp_outq_uncork+0x1c/0x20 [sctp]
   [<ffffffffc0977338>] sctp_cmd_interpreter.isra.22+0xc8/0x1460 [sctp]
   [<ffffffffc0976ad1>] sctp_do_sm+0xe1/0x350 [sctp]
   [<ffffffffc099443d>] sctp_primitive_ASCONF+0x3d/0x50 [sctp]
   [<ffffffffc0977384>] sctp_cmd_interpreter.isra.22+0x114/0x1460 [sctp]
   [<ffffffffc0976ad1>] sctp_do_sm+0xe1/0x350 [sctp]
   [<ffffffffc097b3a4>] sctp_assoc_bh_rcv+0xf4/0x1b0 [sctp]
   [<ffffffffc09840f1>] sctp_inq_push+0x51/0x70 [sctp]
   [<ffffffffc099732b>] sctp_rcv+0xa8b/0xbd0 [sctp]

As it shows, the first sctp_do_sm() running under atomic context (NET_RX
softirq) invoked sctp_primitive_ASCONF() that uses GFP_KERNEL flag later,
and this flag is supposed to be used in non-atomic context only. Besides,
sctp_do_sm() was called recursively, which is not expected.

Vlad tried to fix this recursive call in Commit c0786693404c ("sctp: Fix
oops when sending queued ASCONF chunks") by introducing a new command
SCTP_CMD_SEND_NEXT_ASCONF. But it didn't work as this command is still
used in the first sctp_do_sm() call, and sctp_primitive_ASCONF() will
be called in this command again.

To avoid calling sctp_do_sm() recursively, we send the next queued ASCONF
not by sctp_primitive_ASCONF(), but by sctp_sf_do_prm_asconf() in the 1st
sctp_do_sm() directly.

Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h |  1 -
 net/sctp/sm_sideeffect.c   | 29 -----------------------------
 net/sctp/sm_statefuns.c    | 35 +++++++++++++++++++++++++++--------
 3 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 6640f84fe5368..6d5beac29bc11 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -105,7 +105,6 @@ enum sctp_verb {
 	SCTP_CMD_T1_RETRAN,	 /* Mark for retransmission after T1 timeout  */
 	SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */
 	SCTP_CMD_SEND_MSG,	 /* Send the whole use message */
-	SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */
 	SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/
 	SCTP_CMD_SET_ASOC,	 /* Restore association context */
 	SCTP_CMD_LAST
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 1d143bc3f73de..4aa03588f87b9 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1112,32 +1112,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
 }
 
 
-/* Sent the next ASCONF packet currently stored in the association.
- * This happens after the ASCONF_ACK was succeffully processed.
- */
-static void sctp_cmd_send_asconf(struct sctp_association *asoc)
-{
-	struct net *net = sock_net(asoc->base.sk);
-
-	/* Send the next asconf chunk from the addip chunk
-	 * queue.
-	 */
-	if (!list_empty(&asoc->addip_chunk_list)) {
-		struct list_head *entry = asoc->addip_chunk_list.next;
-		struct sctp_chunk *asconf = list_entry(entry,
-						struct sctp_chunk, list);
-		list_del_init(entry);
-
-		/* Hold the chunk until an ASCONF_ACK is received. */
-		sctp_chunk_hold(asconf);
-		if (sctp_primitive_ASCONF(net, asoc, asconf))
-			sctp_chunk_free(asconf);
-		else
-			asoc->addip_last_asconf = asconf;
-	}
-}
-
-
 /* These three macros allow us to pull the debugging code out of the
  * main flow of sctp_do_sm() to keep attention focused on the real
  * functionality there.
@@ -1783,9 +1757,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
 			}
 			sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
 			break;
-		case SCTP_CMD_SEND_NEXT_ASCONF:
-			sctp_cmd_send_asconf(asoc);
-			break;
 		case SCTP_CMD_PURGE_ASCONF_QUEUE:
 			sctp_asconf_queue_teardown(asoc);
 			break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index c9ae3404b1bb1..713a669d20585 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3824,6 +3824,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
 	return SCTP_DISPOSITION_CONSUME;
 }
 
+static enum sctp_disposition sctp_send_next_asconf(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					struct sctp_association *asoc,
+					const union sctp_subtype type,
+					struct sctp_cmd_seq *commands)
+{
+	struct sctp_chunk *asconf;
+	struct list_head *entry;
+
+	if (list_empty(&asoc->addip_chunk_list))
+		return SCTP_DISPOSITION_CONSUME;
+
+	entry = asoc->addip_chunk_list.next;
+	asconf = list_entry(entry, struct sctp_chunk, list);
+
+	list_del_init(entry);
+	sctp_chunk_hold(asconf);
+	asoc->addip_last_asconf = asconf;
+
+	return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands);
+}
+
 /*
  * ADDIP Section 4.3 General rules for address manipulation
  * When building TLV parameters for the ASCONF Chunk that will add or
@@ -3915,14 +3938,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
 
 		if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
-					     asconf_ack)) {
-			/* Successfully processed ASCONF_ACK.  We can
-			 * release the next asconf if we have one.
-			 */
-			sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF,
-					SCTP_NULL());
-			return SCTP_DISPOSITION_CONSUME;
-		}
+					     asconf_ack))
+			return sctp_send_next_asconf(net, ep,
+					(struct sctp_association *)asoc,
+							type, commands);
 
 		abort = sctp_make_abort(asoc, asconf_ack,
 					sizeof(struct sctp_errhdr));
-- 
GitLab


From 61cf61d81e326163ce1557ceccfca76e11d0e57c Mon Sep 17 00:00:00 2001
From: Arun KS <arunks@codeaurora.org>
Date: Tue, 30 Apr 2019 16:05:04 +0530
Subject: [PATCH 1789/1861] arm64: Fix size of __early_cpu_boot_status

__early_cpu_boot_status is of type long. Use quad
assembler directive to allocate proper size.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Arun KS <arunks@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/head.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index f533305849d77..fcae3f85c6cdf 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -684,7 +684,7 @@ ENTRY(__boot_cpu_mode)
  * with MMU turned off.
  */
 ENTRY(__early_cpu_boot_status)
-	.long 	0
+	.quad 	0
 
 	.popsection
 
-- 
GitLab


From 526e065dbca6df0b5a130b84b836b8b3c9f54e21 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Mon, 15 Apr 2019 16:21:28 -0500
Subject: [PATCH 1790/1861] arm64: add sysfs vulnerability show for speculative
 store bypass

Return status based on ssbd_state and __ssb_safe. If the
mitigation is disabled, or the firmware isn't responding then
return the expected machine state based on a whitelist of known
good cores.

Given a heterogeneous machine, the overall machine vulnerability
defaults to safe but is reset to unsafe when we miss the whitelist
and the firmware doesn't explicitly tell us the core is safe.
In order to make that work we delay transitioning to vulnerable
until we know the firmware isn't responding to avoid a case
where we miss the whitelist, but the firmware goes ahead and
reports the core is not vulnerable. If all the cores in the
machine have SSBS, then __ssb_safe will remain true.

Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 42 ++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b6132783e8cfd..4bb0f7cad4189 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -278,6 +278,7 @@ static int detect_harden_bp_fw(void)
 DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
 
 int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
+static bool __ssb_safe = true;
 
 static const struct ssbd_options {
 	const char	*str;
@@ -381,6 +382,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 	struct arm_smccc_res res;
 	bool required = true;
 	s32 val;
+	bool this_cpu_safe = false;
 
 	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
 
@@ -389,8 +391,14 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 		goto out_printmsg;
 	}
 
+	/* delay setting __ssb_safe until we get a firmware response */
+	if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list))
+		this_cpu_safe = true;
+
 	if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
 		ssbd_state = ARM64_SSBD_UNKNOWN;
+		if (!this_cpu_safe)
+			__ssb_safe = false;
 		return false;
 	}
 
@@ -407,6 +415,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 
 	default:
 		ssbd_state = ARM64_SSBD_UNKNOWN;
+		if (!this_cpu_safe)
+			__ssb_safe = false;
 		return false;
 	}
 
@@ -415,14 +425,18 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 	switch (val) {
 	case SMCCC_RET_NOT_SUPPORTED:
 		ssbd_state = ARM64_SSBD_UNKNOWN;
+		if (!this_cpu_safe)
+			__ssb_safe = false;
 		return false;
 
+	/* machines with mixed mitigation requirements must not return this */
 	case SMCCC_RET_NOT_REQUIRED:
 		pr_info_once("%s mitigation not required\n", entry->desc);
 		ssbd_state = ARM64_SSBD_MITIGATED;
 		return false;
 
 	case SMCCC_RET_SUCCESS:
+		__ssb_safe = false;
 		required = true;
 		break;
 
@@ -432,6 +446,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 
 	default:
 		WARN_ON(1);
+		if (!this_cpu_safe)
+			__ssb_safe = false;
 		return false;
 	}
 
@@ -472,6 +488,14 @@ out_printmsg:
 	return required;
 }
 
+/* known invulnerable cores */
+static const struct midr_range arm64_ssb_cpus[] = {
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+	{},
+};
+
 static void __maybe_unused
 cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 {
@@ -767,6 +791,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.capability = ARM64_SSBD,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = has_ssbd_mitigation,
+		.midr_range_list = arm64_ssb_cpus,
 	},
 #ifdef CONFIG_ARM64_ERRATUM_1188873
 	{
@@ -805,3 +830,20 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
 
 	return sprintf(buf, "Vulnerable\n");
 }
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	if (__ssb_safe)
+		return sprintf(buf, "Not affected\n");
+
+	switch (ssbd_state) {
+	case ARM64_SSBD_KERNEL:
+	case ARM64_SSBD_FORCE_ENABLE:
+		if (IS_ENABLED(CONFIG_ARM64_SSBD))
+			return sprintf(buf,
+			    "Mitigation: Speculative Store Bypass disabled via prctl\n");
+	}
+
+	return sprintf(buf, "Vulnerable\n");
+}
-- 
GitLab


From 61ae1321f06c4489c724c803e9b8363dea576da3 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <ykaukab@suse.de>
Date: Mon, 15 Apr 2019 16:21:29 -0500
Subject: [PATCH 1791/1861] arm64: enable generic CPU vulnerabilites support

Enable CPU vulnerabilty show functions for spectre_v1, spectre_v2,
meltdown and store-bypass.

Signed-off-by: Mian Yousaf Kaukab <ykaukab@suse.de>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de1..6a7b7d4e0e903 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -90,6 +90,7 @@ config ARM64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IRQ_MULTI_HANDLER
-- 
GitLab


From eb337cdfcd5dd3b10522c2f34140a73a4c285c30 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 30 Apr 2019 16:58:56 +0100
Subject: [PATCH 1792/1861] arm64: ssbs: Don't treat CPUs with SSBS as
 unaffected by SSB

SSBS provides a relatively cheap mitigation for SSB, but it is still a
mitigation and its presence does not indicate that the CPU is unaffected
by the vulnerability.

Tweak the mitigation logic so that we report the correct string in sysfs.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpu_errata.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 4bb0f7cad4189..44ef98be001e9 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -386,15 +386,17 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 
 	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
 
+	/* delay setting __ssb_safe until we get a firmware response */
+	if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list))
+		this_cpu_safe = true;
+
 	if (this_cpu_has_cap(ARM64_SSBS)) {
+		if (!this_cpu_safe)
+			__ssb_safe = false;
 		required = false;
 		goto out_printmsg;
 	}
 
-	/* delay setting __ssb_safe until we get a firmware response */
-	if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list))
-		this_cpu_safe = true;
-
 	if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
 		ssbd_state = ARM64_SSBD_UNKNOWN;
 		if (!this_cpu_safe)
-- 
GitLab


From a111b7c0f20e13b54df2fa959b3dc0bdf1925ae6 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 12 Apr 2019 15:39:32 -0500
Subject: [PATCH 1793/1861] arm64/speculation: Support 'mitigations=' cmdline
 option

Configure arm64 runtime CPU speculation bug mitigations in accordance
with the 'mitigations=' cmdline option.  This affects Meltdown, Spectre
v2, and Speculative Store Bypass.

The default behavior is unchanged.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
[will: reorder checks so KASLR implies KPTI and SSBS is affected by cmdline]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 8 +++++---
 arch/arm64/kernel/cpu_errata.c                  | 6 +++++-
 arch/arm64/kernel/cpufeature.c                  | 8 +++++++-
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6a929258faf71..ce226f7ee5663 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2545,8 +2545,8 @@
 			http://repo.or.cz/w/linux-2.6/mini2440.git
 
 	mitigations=
-			[X86,PPC,S390] Control optional mitigations for CPU
-			vulnerabilities.  This is a set of curated,
+			[X86,PPC,S390,ARM64] Control optional mitigations for
+			CPU vulnerabilities.  This is a set of curated,
 			arch-independent options, each of which is an
 			aggregation of existing arch-specific options.
 
@@ -2555,11 +2555,13 @@
 				improves system performance, but it may also
 				expose users to several CPU vulnerabilities.
 				Equivalent to: nopti [X86,PPC]
+					       kpti=0 [ARM64]
 					       nospectre_v1 [PPC]
 					       nobp=0 [S390]
-					       nospectre_v2 [X86,PPC,S390]
+					       nospectre_v2 [X86,PPC,S390,ARM64]
 					       spectre_v2_user=off [X86]
 					       spec_store_bypass_disable=off [X86,PPC]
+					       ssbd=force-off [ARM64]
 					       l1tf=off [X86]
 
 			auto (default)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 44ef98be001e9..1b9ce0fdd81de 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -19,6 +19,7 @@
 #include <linux/arm-smccc.h>
 #include <linux/psci.h>
 #include <linux/types.h>
+#include <linux/cpu.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
@@ -386,6 +387,9 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 
 	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
 
+	if (cpu_mitigations_off())
+		ssbd_state = ARM64_SSBD_FORCE_DISABLE;
+
 	/* delay setting __ssb_safe until we get a firmware response */
 	if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list))
 		this_cpu_safe = true;
@@ -589,7 +593,7 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 	}
 
 	/* forced off */
-	if (__nospectre_v2) {
+	if (__nospectre_v2 || cpu_mitigations_off()) {
 		pr_info_once("spectrev2 mitigation disabled by command line option\n");
 		__hardenbp_enab = false;
 		return false;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 703ee8564fbda..f3b32d88f1654 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -25,6 +25,7 @@
 #include <linux/stop_machine.h>
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/cpu.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
@@ -966,7 +967,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 		MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
 		{ /* sentinel */ }
 	};
-	char const *str = "command line option";
+	char const *str = "kpti command line option";
 	bool meltdown_safe;
 
 	meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list);
@@ -996,6 +997,11 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 		}
 	}
 
+	if (cpu_mitigations_off() && !__kpti_forced) {
+		str = "mitigations=off";
+		__kpti_forced = -1;
+	}
+
 	if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
 		pr_info_once("kernel page table isolation disabled by kernel configuration\n");
 		return false;
-- 
GitLab


From 4ad499c94264a2ee05aacc518b9bde658318e510 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 12 Apr 2019 22:56:21 -0500
Subject: [PATCH 1794/1861] Documentation: Add ARM64 to kernel-parameters.rst

Add ARM64 to the legend of architectures.  It's already used in several
places in kernel-parameters.txt.

Suggested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/admin-guide/kernel-parameters.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index b8d0bc07ed0a6..0124980dca2db 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -88,6 +88,7 @@ parameter is applicable::
 	APIC	APIC support is enabled.
 	APM	Advanced Power Management support is enabled.
 	ARM	ARM architecture is enabled.
+	ARM64	ARM64 architecture is enabled.
 	AX25	Appropriate AX.25 support is enabled.
 	CLK	Common clock infrastructure is enabled.
 	CMA	Contiguous Memory Area support is enabled.
-- 
GitLab


From 975554b03eddc1df73bda3a764a09e18cadd5f1c Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 30 Apr 2019 13:34:51 +0100
Subject: [PATCH 1795/1861] io_uring: fix SQPOLL cpu validation

In io_sq_offload_start(), we call cpu_possible() on an unbounded cpu
value from userspace. On v5.1-rc7 on arm64 with
CONFIG_DEBUG_PER_CPU_MAPS, this results in a splat:

  WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 cpu_max_bits_warn include/linux/cpumask.h:121 [inline]

There was an attempt to fix this in commit:

  917257daa0fea7a0 ("io_uring: only test SQPOLL cpu after we've verified it")

... by adding a check after the cpu value had been limited to NR_CPU_IDS
using array_index_nospec(). However, this left an unbound check at the
start of the function, for which the warning still fires.

Let's fix this correctly by checking that the cpu value is bound by
nr_cpu_ids before passing it to cpu_possible(). Note that only
nr_cpu_ids of a cpumask are guaranteed to exist at runtime, and
nr_cpu_ids can be significantly smaller than NR_CPUs. For example, an
arm64 defconfig has NR_CPUS=256, while my test VM has 4 vCPUs.

Following the intent from the commit message for 917257daa0fea7a0, the
check is moved under the SQ_AFF branch, which is the only branch where
the cpu values is consumed. The check is performed before bounding the
value with array_index_nospec() so that we don't silently accept bogus
cpu values from userspace, where array_index_nospec() would force these
values to 0.

I suspect we can remove the array_index_nospec() call entirely, but I've
conservatively left that in place, updated to use nr_cpu_ids to match
the prior check.

Tested on arm64 with the Syzkaller reproducer:

  https://syzkaller.appspot.com/bug?extid=cd714a07c6de2bc34293
  https://syzkaller.appspot.com/x/repro.syz?x=15d8b397200000

Full splat from before this patch:

WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 cpu_max_bits_warn include/linux/cpumask.h:121 [inline]
WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 cpumask_check include/linux/cpumask.h:128 [inline]
WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 cpumask_test_cpu include/linux/cpumask.h:344 [inline]
WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 io_sq_offload_start fs/io_uring.c:2244 [inline]
WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 io_uring_create fs/io_uring.c:2864 [inline]
WARNING: CPU: 1 PID: 27601 at include/linux/cpumask.h:121 io_uring_setup+0x1108/0x15a0 fs/io_uring.c:2916
Kernel panic - not syncing: panic_on_warn set ...
CPU: 1 PID: 27601 Comm: syz-executor.0 Not tainted 5.1.0-rc7 #3
Hardware name: linux,dummy-virt (DT)
Call trace:
 dump_backtrace+0x0/0x2f0 include/linux/compiler.h:193
 show_stack+0x20/0x30 arch/arm64/kernel/traps.c:158
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x110/0x190 lib/dump_stack.c:113
 panic+0x384/0x68c kernel/panic.c:214
 __warn+0x2bc/0x2c0 kernel/panic.c:571
 report_bug+0x228/0x2d8 lib/bug.c:186
 bug_handler+0xa0/0x1a0 arch/arm64/kernel/traps.c:956
 call_break_hook arch/arm64/kernel/debug-monitors.c:301 [inline]
 brk_handler+0x1d4/0x388 arch/arm64/kernel/debug-monitors.c:316
 do_debug_exception+0x1a0/0x468 arch/arm64/mm/fault.c:831
 el1_dbg+0x18/0x8c
 cpu_max_bits_warn include/linux/cpumask.h:121 [inline]
 cpumask_check include/linux/cpumask.h:128 [inline]
 cpumask_test_cpu include/linux/cpumask.h:344 [inline]
 io_sq_offload_start fs/io_uring.c:2244 [inline]
 io_uring_create fs/io_uring.c:2864 [inline]
 io_uring_setup+0x1108/0x15a0 fs/io_uring.c:2916
 __do_sys_io_uring_setup fs/io_uring.c:2929 [inline]
 __se_sys_io_uring_setup fs/io_uring.c:2926 [inline]
 __arm64_sys_io_uring_setup+0x50/0x70 fs/io_uring.c:2926
 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline]
 invoke_syscall arch/arm64/kernel/syscall.c:47 [inline]
 el0_svc_common.constprop.0+0x148/0x2e0 arch/arm64/kernel/syscall.c:83
 el0_svc_handler+0xdc/0x100 arch/arm64/kernel/syscall.c:129
 el0_svc+0x8/0xc arch/arm64/kernel/entry.S:948
SMP: stopping secondary CPUs
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
CPU features: 0x002,23000438
Memory Limit: none
Rebooting in 1 seconds..

Fixes: 917257daa0fea7a0 ("io_uring: only test SQPOLL cpu after we've verified it")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-block@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

Simplied the logic

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0a894d7baceb0..5954047ee96d9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2319,10 +2319,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 	mmgrab(current->mm);
 	ctx->sqo_mm = current->mm;
 
-	ret = -EINVAL;
-	if (!cpu_possible(p->sq_thread_cpu))
-		goto err;
-
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		ret = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
@@ -2333,11 +2329,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 			ctx->sq_thread_idle = HZ;
 
 		if (p->flags & IORING_SETUP_SQ_AFF) {
-			int cpu;
+			int cpu = array_index_nospec(p->sq_thread_cpu,
+							nr_cpu_ids);
 
-			cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS);
 			ret = -EINVAL;
-			if (!cpu_possible(p->sq_thread_cpu))
+			if (!cpu_possible(cpu))
 				goto err;
 
 			ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
-- 
GitLab


From 52e04ef4c9d459cba3afd86ec335a411b40b7fd2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 30 Apr 2019 17:30:21 +0100
Subject: [PATCH 1796/1861] io_uring: free allocated io_memory once

If io_allocate_scq_urings() fails to allocate an sq_* region, it will
call io_mem_free() for any previously allocated regions, but leave
dangling pointers to these regions in the ctx. Any regions which have
not yet been allocated are left NULL. Note that when returning
-EOVERFLOW, the previously allocated sq_ring is not freed, which appears
to be an unintentional leak.

When io_allocate_scq_urings() fails, io_uring_create() will call
io_ring_ctx_wait_and_kill(), which calls io_mem_free() on all the sq_*
regions, assuming the pointers are valid and not NULL.

This can result in pages being freed multiple times, which has been
observed to corrupt the page state, leading to subsequent fun. This can
also result in virt_to_page() on NULL, resulting in the use of bogus
page addresses, and yet more subsequent fun. The latter can be detected
with CONFIG_DEBUG_VIRTUAL on arm64.

Adding a cleanup path to io_allocate_scq_urings() complicates the logic,
so let's leave it to io_ring_ctx_free() to consistently free these
pointers, and simplify the io_allocate_scq_urings() error paths.

Full splats from before this patch below. Note that the pointer logged
by the DEBUG_VIRTUAL "non-linear address" warning has been hashed, and
is actually NULL.

[   26.098129] page:ffff80000e949a00 count:0 mapcount:-128 mapping:0000000000000000 index:0x0
[   26.102976] flags: 0x63fffc000000()
[   26.104373] raw: 000063fffc000000 ffff80000e86c188 ffff80000ea3df08 0000000000000000
[   26.108917] raw: 0000000000000000 0000000000000001 00000000ffffff7f 0000000000000000
[   26.137235] page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0)
[   26.143960] ------------[ cut here ]------------
[   26.146020] kernel BUG at include/linux/mm.h:547!
[   26.147586] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[   26.149163] Modules linked in:
[   26.150287] Process syz-executor.21 (pid: 20204, stack limit = 0x000000000e9cefeb)
[   26.153307] CPU: 2 PID: 20204 Comm: syz-executor.21 Not tainted 5.1.0-rc7-00004-g7d30b2ea43d6 #18
[   26.156566] Hardware name: linux,dummy-virt (DT)
[   26.158089] pstate: 40400005 (nZcv daif +PAN -UAO)
[   26.159869] pc : io_mem_free+0x9c/0xa8
[   26.161436] lr : io_mem_free+0x9c/0xa8
[   26.162720] sp : ffff000013003d60
[   26.164048] x29: ffff000013003d60 x28: ffff800025048040
[   26.165804] x27: 0000000000000000 x26: ffff800025048040
[   26.167352] x25: 00000000000000c0 x24: ffff0000112c2820
[   26.169682] x23: 0000000000000000 x22: 0000000020000080
[   26.171899] x21: ffff80002143b418 x20: ffff80002143b400
[   26.174236] x19: ffff80002143b280 x18: 0000000000000000
[   26.176607] x17: 0000000000000000 x16: 0000000000000000
[   26.178997] x15: 0000000000000000 x14: 0000000000000000
[   26.181508] x13: 00009178a5e077b2 x12: 0000000000000001
[   26.183863] x11: 0000000000000000 x10: 0000000000000980
[   26.186437] x9 : ffff000013003a80 x8 : ffff800025048a20
[   26.189006] x7 : ffff8000250481c0 x6 : ffff80002ffe9118
[   26.191359] x5 : ffff80002ffe9118 x4 : 0000000000000000
[   26.193863] x3 : ffff80002ffefe98 x2 : 44c06ddd107d1f00
[   26.196642] x1 : 0000000000000000 x0 : 000000000000003e
[   26.198892] Call trace:
[   26.199893]  io_mem_free+0x9c/0xa8
[   26.201155]  io_ring_ctx_wait_and_kill+0xec/0x180
[   26.202688]  io_uring_setup+0x6c4/0x6f0
[   26.204091]  __arm64_sys_io_uring_setup+0x18/0x20
[   26.205576]  el0_svc_common.constprop.0+0x7c/0xe8
[   26.207186]  el0_svc_handler+0x28/0x78
[   26.208389]  el0_svc+0x8/0xc
[   26.209408] Code: aa0203e0 d0006861 9133a021 97fcdc3c (d4210000)
[   26.211995] ---[ end trace bdb81cd43a21e50d ]---

[   81.770626] ------------[ cut here ]------------
[   81.825015] virt_to_phys used for non-linear address: 000000000d42f2c7 (          (null))
[   81.827860] WARNING: CPU: 1 PID: 30171 at arch/arm64/mm/physaddr.c:15 __virt_to_phys+0x48/0x68
[   81.831202] Modules linked in:
[   81.832212] CPU: 1 PID: 30171 Comm: syz-executor.20 Not tainted 5.1.0-rc7-00004-g7d30b2ea43d6 #19
[   81.835616] Hardware name: linux,dummy-virt (DT)
[   81.836863] pstate: 60400005 (nZCv daif +PAN -UAO)
[   81.838727] pc : __virt_to_phys+0x48/0x68
[   81.840572] lr : __virt_to_phys+0x48/0x68
[   81.842264] sp : ffff80002cf67c70
[   81.843858] x29: ffff80002cf67c70 x28: ffff800014358e18
[   81.846463] x27: 0000000000000000 x26: 0000000020000080
[   81.849148] x25: 0000000000000000 x24: ffff80001bb01f40
[   81.851986] x23: ffff200011db06c8 x22: ffff2000127e3c60
[   81.854351] x21: ffff800014358cc0 x20: ffff800014358d98
[   81.856711] x19: 0000000000000000 x18: 0000000000000000
[   81.859132] x17: 0000000000000000 x16: 0000000000000000
[   81.861586] x15: 0000000000000000 x14: 0000000000000000
[   81.863905] x13: 0000000000000000 x12: ffff1000037603e9
[   81.866226] x11: 1ffff000037603e8 x10: 0000000000000980
[   81.868776] x9 : ffff80002cf67840 x8 : ffff80001bb02920
[   81.873272] x7 : ffff1000037603e9 x6 : ffff80001bb01f47
[   81.875266] x5 : ffff1000037603e9 x4 : dfff200000000000
[   81.876875] x3 : ffff200010087528 x2 : ffff1000059ecf58
[   81.878751] x1 : 44c06ddd107d1f00 x0 : 0000000000000000
[   81.880453] Call trace:
[   81.881164]  __virt_to_phys+0x48/0x68
[   81.882919]  io_mem_free+0x18/0x110
[   81.886585]  io_ring_ctx_wait_and_kill+0x13c/0x1f0
[   81.891212]  io_uring_setup+0xa60/0xad0
[   81.892881]  __arm64_sys_io_uring_setup+0x2c/0x38
[   81.894398]  el0_svc_common.constprop.0+0xac/0x150
[   81.896306]  el0_svc_handler+0x34/0x88
[   81.897744]  el0_svc+0x8/0xc
[   81.898715] ---[ end trace b4a703802243cbba ]---

Fixes: 2b188cc1bb857a9d ("Add io_uring IO interface")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-block@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5954047ee96d9..046fc4e1e1552 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2396,8 +2396,12 @@ static int io_account_mem(struct user_struct *user, unsigned long nr_pages)
 
 static void io_mem_free(void *ptr)
 {
-	struct page *page = virt_to_head_page(ptr);
+	struct page *page;
+
+	if (!ptr)
+		return;
 
+	page = virt_to_head_page(ptr);
 	if (put_page_testzero(page))
 		free_compound_page(page);
 }
@@ -2816,17 +2820,12 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
 		return -EOVERFLOW;
 
 	ctx->sq_sqes = io_mem_alloc(size);
-	if (!ctx->sq_sqes) {
-		io_mem_free(ctx->sq_ring);
+	if (!ctx->sq_sqes)
 		return -ENOMEM;
-	}
 
 	cq_ring = io_mem_alloc(struct_size(cq_ring, cqes, p->cq_entries));
-	if (!cq_ring) {
-		io_mem_free(ctx->sq_ring);
-		io_mem_free(ctx->sq_sqes);
+	if (!cq_ring)
 		return -ENOMEM;
-	}
 
 	ctx->cq_ring = cq_ring;
 	cq_ring->ring_mask = p->cq_entries - 1;
-- 
GitLab


From 817869d2519f0cb7be5b3482129dadc806dfb747 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 30 Apr 2019 14:44:05 -0600
Subject: [PATCH 1797/1861] io_uring: drop req submit reference always in async
 punt

If we don't end up actually calling submit in io_sq_wq_submit_work(),
we still need to drop the submit reference to the request. If we
don't, then we can leak the request. This can happen if we race
with ring shutdown while flushing the workqueue for requests that
require use of the mm_struct.

Fixes: e65ef56db494 ("io_uring: use regular request ref counts")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 046fc4e1e1552..18cecb6a01518 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1568,10 +1568,11 @@ restart:
 					break;
 				cond_resched();
 			} while (1);
-
-			/* drop submission reference */
-			io_put_req(req);
 		}
+
+		/* drop submission reference */
+		io_put_req(req);
+
 		if (ret) {
 			io_cqring_add_event(ctx, sqe->user_data, ret, 0);
 			io_put_req(req);
-- 
GitLab


From 60a27b906d1a372474669c914c10d6c993858a4a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 26 Apr 2019 18:45:20 +0800
Subject: [PATCH 1798/1861] block: fix handling for BIO_NO_PAGE_REF

Commit 399254aaf489211 ("block: add BIO_NO_PAGE_REF flag") introduces
BIO_NO_PAGE_REF, and once this flag is set for one bio, all pages
in the bio won't be get/put during IO.

However, if one bio is submitted via __blkdev_direct_IO_simple(),
even though BIO_NO_PAGE_REF is set, pages still may be put.

Fixes this issue by avoiding to put pages if BIO_NO_PAGE_REF is
set.

Fixes: 399254aaf489211 ("block: add BIO_NO_PAGE_REF flag")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 24615c76c1d0e..bb28e2ead679c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -264,7 +264,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	bio_for_each_segment_all(bvec, &bio, i, iter_all) {
 		if (should_dirty && !PageCompound(bvec->bv_page))
 			set_page_dirty_lock(bvec->bv_page);
-		put_page(bvec->bv_page);
+		if (!bio_flagged(&bio, BIO_NO_PAGE_REF))
+			put_page(bvec->bv_page);
 	}
 
 	if (unlikely(bio.bi_status))
-- 
GitLab


From f5eb4d3b92a6a1096ef3480b54782a9409281300 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 26 Apr 2019 18:45:21 +0800
Subject: [PATCH 1799/1861] iov_iter: fix iov_iter_type

Commit 875f1d0769cd ("iov_iter: add ITER_BVEC_FLAG_NO_REF flag")
introduces one extra flag of ITER_BVEC_FLAG_NO_REF, and this flag
is stored into iter->type.

However, iov_iter_type() doesn't consider the new added flag, fix
it by masking this flag in iov_iter_type().

Fixes: 875f1d0769cd ("iov_iter: add ITER_BVEC_FLAG_NO_REF flag")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/uio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index f184af1999a8e..2d0131ad46041 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -60,7 +60,7 @@ struct iov_iter {
 
 static inline enum iter_type iov_iter_type(const struct iov_iter *i)
 {
-	return i->type & ~(READ | WRITE);
+	return i->type & ~(READ | WRITE | ITER_BVEC_FLAG_NO_REF);
 }
 
 static inline bool iter_is_iovec(const struct iov_iter *i)
-- 
GitLab


From b2cf86e1563e33a14a1c69b3e508d15dc12f804c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 29 Apr 2019 11:46:55 -0400
Subject: [PATCH 1800/1861] packet: in recvmsg msg_name return at least sizeof
 sockaddr_ll

Packet send checks that msg_name is at least sizeof sockaddr_ll.
Packet recv must return at least this length, so that its output
can be passed unmodified to packet send.

This ceased to be true since adding support for lladdr longer than
sll_addr. Since, the return value uses true address length.

Always return at least sizeof sockaddr_ll, even if address length
is shorter. Zero the padding bytes.

Change v1->v2: do not overwrite zeroed padding again. use copy_len.

Fixes: 0fb375fb9b93 ("[AF_PACKET]: Allow for > 8 byte hardware addresses.")
Suggested-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9419c5cf4de5e..7d361cd53ad5c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3344,20 +3344,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
+		int copy_len;
+
 		/* If the address length field is there to be filled
 		 * in, we fill it in now.
 		 */
 		if (sock->type == SOCK_PACKET) {
 			__sockaddr_check_size(sizeof(struct sockaddr_pkt));
 			msg->msg_namelen = sizeof(struct sockaddr_pkt);
+			copy_len = msg->msg_namelen;
 		} else {
 			struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
 
 			msg->msg_namelen = sll->sll_halen +
 				offsetof(struct sockaddr_ll, sll_addr);
+			copy_len = msg->msg_namelen;
+			if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
+				memset(msg->msg_name +
+				       offsetof(struct sockaddr_ll, sll_addr),
+				       0, sizeof(sll->sll_addr));
+				msg->msg_namelen = sizeof(struct sockaddr_ll);
+			}
 		}
-		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
-		       msg->msg_namelen);
+		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
 	}
 
 	if (pkt_sk(sk)->auxdata) {
-- 
GitLab


From 486efdc8f6ce802b27e15921d2353cc740c55451 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 29 Apr 2019 11:53:18 -0400
Subject: [PATCH 1801/1861] packet: validate msg_namelen in send directly

Packet sockets in datagram mode take a destination address. Verify its
length before passing to dev_hard_header.

Prior to 2.6.14-rc3, the send code ignored sll_halen. This is
established behavior. Directly compare msg_namelen to dev->addr_len.

Change v1->v2: initialize addr in all paths

Fixes: 6b8d95f1795c4 ("packet: validate address length if non-zero")
Suggested-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 7d361cd53ad5c..9b81813dd16af 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2602,8 +2602,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	void *ph;
 	DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
 	bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
+	unsigned char *addr = NULL;
 	int tp_len, size_max;
-	unsigned char *addr;
 	void *data;
 	int len_sum = 0;
 	int status = TP_STATUS_AVAILABLE;
@@ -2614,7 +2614,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	if (likely(saddr == NULL)) {
 		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
-		addr	= NULL;
 	} else {
 		err = -EINVAL;
 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2624,10 +2623,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 						sll_addr)))
 			goto out;
 		proto	= saddr->sll_protocol;
-		addr	= saddr->sll_halen ? saddr->sll_addr : NULL;
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
-		if (addr && dev && saddr->sll_halen < dev->addr_len)
-			goto out_put;
+		if (po->sk.sk_socket->type == SOCK_DGRAM) {
+			if (dev && msg->msg_namelen < dev->addr_len +
+				   offsetof(struct sockaddr_ll, sll_addr))
+				goto out_put;
+			addr = saddr->sll_addr;
+		}
 	}
 
 	err = -ENXIO;
@@ -2799,7 +2801,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	unsigned char *addr;
+	unsigned char *addr = NULL;
 	int err, reserve = 0;
 	struct sockcm_cookie sockc;
 	struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2816,7 +2818,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (likely(saddr == NULL)) {
 		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
-		addr	= NULL;
 	} else {
 		err = -EINVAL;
 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2824,10 +2825,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
 			goto out;
 		proto	= saddr->sll_protocol;
-		addr	= saddr->sll_halen ? saddr->sll_addr : NULL;
 		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
-		if (addr && dev && saddr->sll_halen < dev->addr_len)
-			goto out_unlock;
+		if (sock->type == SOCK_DGRAM) {
+			if (dev && msg->msg_namelen < dev->addr_len +
+				   offsetof(struct sockaddr_ll, sll_addr))
+				goto out_unlock;
+			addr = saddr->sll_addr;
+		}
 	}
 
 	err = -ENXIO;
-- 
GitLab


From 15d55bae4e3c43cd9f87fd93c73a263e172d34e1 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 29 Apr 2019 10:30:09 -0700
Subject: [PATCH 1802/1861] selftests: fib_rule_tests: Fix icmp proto with ipv6

A recent commit returns an error if icmp is used as the ip-proto for
IPv6 fib rules. Update fib_rule_tests to send ipv6-icmp instead of icmp.

Fixes: 5e1a99eae8499 ("ipv4: Add ICMPv6 support when parse route ipproto")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_rule_tests.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index d4cfb6a7a086d..5b92c1f4dc049 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -147,8 +147,8 @@ fib_rule6_test()
 
 	fib_check_iproute_support "ipproto" "ipproto"
 	if [ $? -eq 0 ]; then
-		match="ipproto icmp"
-		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+		match="ipproto ipv6-icmp"
+		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
 	fi
 }
 
-- 
GitLab


From 2dcb003314032c6efb13a065ffae60d164b2dd35 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 29 Apr 2019 12:19:12 -0700
Subject: [PATCH 1803/1861] net/tls: avoid NULL pointer deref on nskb->sk in
 fallback

update_chksum() accesses nskb->sk before it has been set
by complete_skb(), move the init up.

Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device_fallback.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index a3ebd4b02714c..c3a5fe624b4e3 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -201,13 +201,14 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
 
 	skb_put(nskb, skb->len);
 	memcpy(nskb->data, skb->data, headln);
-	update_chksum(nskb, headln);
 
 	nskb->destructor = skb->destructor;
 	nskb->sk = sk;
 	skb->destructor = NULL;
 	skb->sk = NULL;
 
+	update_chksum(nskb, headln);
+
 	delta = nskb->truesize - skb->truesize;
 	if (likely(delta < 0))
 		WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
-- 
GitLab


From d4ef647510b1200fe1c996ff1cbf5ac47eb930cc Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 1 May 2019 16:59:16 +0100
Subject: [PATCH 1804/1861] io_uring: avoid page allocation warnings

In io_sqe_buffer_register() we allocate a number of arrays based on the
iov_len from the user-provided iov. While we limit iov_len to SZ_1G,
we can still attempt to allocate arrays exceeding MAX_ORDER.

On a 64-bit system with 4KiB pages, for an iov where iov_base = 0x10 and
iov_len = SZ_1G, we'll calculate that nr_pages = 262145. When we try to
allocate a corresponding array of (16-byte) bio_vecs, requiring 4194320
bytes, which is greater than 4MiB. This results in SLUB warning that
we're trying to allocate greater than MAX_ORDER, and failing the
allocation.

Avoid this by using kvmalloc() for allocations dependent on the
user-provided iov_len. At the same time, fix a leak of imu->bvec when
registration fails.

Full splat from before this patch:

WARNING: CPU: 1 PID: 2314 at mm/page_alloc.c:4595 __alloc_pages_nodemask+0x7ac/0x2938 mm/page_alloc.c:4595
Kernel panic - not syncing: panic_on_warn set ...
CPU: 1 PID: 2314 Comm: syz-executor326 Not tainted 5.1.0-rc7-dirty #4
Hardware name: linux,dummy-virt (DT)
Call trace:
 dump_backtrace+0x0/0x2f0 include/linux/compiler.h:193
 show_stack+0x20/0x30 arch/arm64/kernel/traps.c:158
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x110/0x190 lib/dump_stack.c:113
 panic+0x384/0x68c kernel/panic.c:214
 __warn+0x2bc/0x2c0 kernel/panic.c:571
 report_bug+0x228/0x2d8 lib/bug.c:186
 bug_handler+0xa0/0x1a0 arch/arm64/kernel/traps.c:956
 call_break_hook arch/arm64/kernel/debug-monitors.c:301 [inline]
 brk_handler+0x1d4/0x388 arch/arm64/kernel/debug-monitors.c:316
 do_debug_exception+0x1a0/0x468 arch/arm64/mm/fault.c:831
 el1_dbg+0x18/0x8c
 __alloc_pages_nodemask+0x7ac/0x2938 mm/page_alloc.c:4595
 alloc_pages_current+0x164/0x278 mm/mempolicy.c:2132
 alloc_pages include/linux/gfp.h:509 [inline]
 kmalloc_order+0x20/0x50 mm/slab_common.c:1231
 kmalloc_order_trace+0x30/0x2b0 mm/slab_common.c:1243
 kmalloc_large include/linux/slab.h:480 [inline]
 __kmalloc+0x3dc/0x4f0 mm/slub.c:3791
 kmalloc_array include/linux/slab.h:670 [inline]
 io_sqe_buffer_register fs/io_uring.c:2472 [inline]
 __io_uring_register fs/io_uring.c:2962 [inline]
 __do_sys_io_uring_register fs/io_uring.c:3008 [inline]
 __se_sys_io_uring_register fs/io_uring.c:2990 [inline]
 __arm64_sys_io_uring_register+0x9e0/0x1bc8 fs/io_uring.c:2990
 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline]
 invoke_syscall arch/arm64/kernel/syscall.c:47 [inline]
 el0_svc_common.constprop.0+0x148/0x2e0 arch/arm64/kernel/syscall.c:83
 el0_svc_handler+0xdc/0x100 arch/arm64/kernel/syscall.c:129
 el0_svc+0x8/0xc arch/arm64/kernel/entry.S:948
SMP: stopping secondary CPUs
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
CPU features: 0x002,23000438
Memory Limit: none
Rebooting in 1 seconds..

Fixes: edafccee56ff3167 ("io_uring: add support for pre-mapped user IO buffers")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 18cecb6a01518..84efb8956734f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2443,7 +2443,7 @@ static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
 
 		if (ctx->account_mem)
 			io_unaccount_mem(ctx->user, imu->nr_bvecs);
-		kfree(imu->bvec);
+		kvfree(imu->bvec);
 		imu->nr_bvecs = 0;
 	}
 
@@ -2535,9 +2535,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
 		if (!pages || nr_pages > got_pages) {
 			kfree(vmas);
 			kfree(pages);
-			pages = kmalloc_array(nr_pages, sizeof(struct page *),
+			pages = kvmalloc_array(nr_pages, sizeof(struct page *),
 						GFP_KERNEL);
-			vmas = kmalloc_array(nr_pages,
+			vmas = kvmalloc_array(nr_pages,
 					sizeof(struct vm_area_struct *),
 					GFP_KERNEL);
 			if (!pages || !vmas) {
@@ -2549,7 +2549,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
 			got_pages = nr_pages;
 		}
 
-		imu->bvec = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+		imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
 						GFP_KERNEL);
 		ret = -ENOMEM;
 		if (!imu->bvec) {
@@ -2588,6 +2588,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
 			}
 			if (ctx->account_mem)
 				io_unaccount_mem(ctx->user, nr_pages);
+			kvfree(imu->bvec);
 			goto err;
 		}
 
@@ -2610,12 +2611,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
 
 		ctx->nr_user_bufs++;
 	}
-	kfree(pages);
-	kfree(vmas);
+	kvfree(pages);
+	kvfree(vmas);
 	return 0;
 err:
-	kfree(pages);
-	kfree(vmas);
+	kvfree(pages);
+	kvfree(vmas);
 	io_sqe_buffer_unregister(ctx);
 	return ret;
 }
-- 
GitLab


From d2f0c961148f65bc73eda72b9fa3a4e80973cb49 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik@metanetworks.com>
Date: Mon, 29 Apr 2019 16:39:30 +0300
Subject: [PATCH 1805/1861] ipv4: ip_do_fragment: Preserve skb_iif during
 fragmentation

Previously, during fragmentation after forwarding, skb->skb_iif isn't
preserved, i.e. 'ip_copy_metadata' does not copy skb_iif from given
'from' skb.

As a result, ip_do_fragment's creates fragments with zero skb_iif,
leading to inconsistent behavior.

Assume for example an eBPF program attached at tc egress (post
forwarding) that examines __sk_buff->ingress_ifindex:
 - the correct iif is observed if forwarding path does not involve
   fragmentation/refragmentation
 - a bogus iif is observed if forwarding path involves
   fragmentation/refragmentatiom

Fix, by preserving skb_iif during 'ip_copy_metadata'.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c80188875f392..e8bb2e85c5a47 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -519,6 +519,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->pkt_type = from->pkt_type;
 	to->priority = from->priority;
 	to->protocol = from->protocol;
+	to->skb_iif = from->skb_iif;
 	skb_dst_drop(to);
 	skb_dst_copy(to, from);
 	to->dev = from->dev;
-- 
GitLab


From 6f303d60534c46aa1a239f29c321f95c83dda748 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 1 May 2019 11:05:41 -0700
Subject: [PATCH 1806/1861] gcc-9: silence 'address-of-packed-member' warning

We already did this for clang, but now gcc has that warning too.  Yes,
yes, the address may be unaligned.  And that's kind of the point.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2b99679148dc7..633d1196bf005 100644
--- a/Makefile
+++ b/Makefile
@@ -678,6 +678,7 @@ KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, int-in-bool-context)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, address-of-packed-member)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= -Os
@@ -719,7 +720,6 @@ ifdef CONFIG_CC_IS_CLANG
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
 # Quiet clang warning: comparison of unsigned expression < 0 is always false
 KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
 # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
-- 
GitLab


From cf676908846a06443fa5e6724ca3f5dd7460eca1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 1 May 2019 11:07:40 -0700
Subject: [PATCH 1807/1861] gcc-9: don't warn about uninitialized variable

I'm not sure what made gcc warn about this code now.  The 'ret' variable
does end up initialized in all cases, but it's definitely not obvious,
so the compiler is quite reasonable to warn about this.

So just add initialization to make it all much more obvious both to
compilers and to humans.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/i2c/i2c-core-base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 38af18645133c..c480ca385ffb7 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -185,7 +185,7 @@ static int i2c_generic_bus_free(struct i2c_adapter *adap)
 int i2c_generic_scl_recovery(struct i2c_adapter *adap)
 {
 	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
-	int i = 0, scl = 1, ret;
+	int i = 0, scl = 1, ret = 0;
 
 	if (bri->prepare_recovery)
 		bri->prepare_recovery(adap);
-- 
GitLab


From 459e3a21535ae3c7a9a123650e54f5c882b8fcbf Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 1 May 2019 11:20:53 -0700
Subject: [PATCH 1808/1861] gcc-9: properly declare the {pv,hv}clock_page
 storage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pvlock_page and hvclock_page variables are (as the name implies)
addresses to pages, created by the linker script.

But we declared them as just "extern u8" variables, which _works_, but
now that gcc does some more bounds checking, it causes warnings like

    warning: array subscript 1 is outside array bounds of ‘u8[1]’

when we then access more than one byte from those variables.

Fix this by simply making the declaration of the variables match
reality, which makes the compiler happy too.

Signed-off-by: Linus Torvalds <torvalds@-linux-foundation.org>
---
 arch/x86/entry/vdso/vclock_gettime.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 007b3fe9d727c..98c7d12b945c2 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -29,12 +29,12 @@ extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
 extern time_t __vdso_time(time_t *t);
 
 #ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page
+extern u8 pvclock_page[PAGE_SIZE]
 	__attribute__((visibility("hidden")));
 #endif
 
 #ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page
+extern u8 hvclock_page[PAGE_SIZE]
 	__attribute__((visibility("hidden")));
 #endif
 
-- 
GitLab


From f68d7c44e76532e46f292ad941aa3706cb9e6e40 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 30 Apr 2019 10:46:10 +0800
Subject: [PATCH 1809/1861] selftests: fib_rule_tests: print the result and
 return 1 if any tests failed

Fixes: 65b2b4939a64 ("selftests: net: initial fib rule tests")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_rule_tests.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 5b92c1f4dc049..4b7e107865bf3 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -27,6 +27,7 @@ log_test()
 		nsuccess=$((nsuccess+1))
 		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
 	else
+		ret=1
 		nfail=$((nfail+1))
 		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
@@ -245,4 +246,9 @@ setup
 run_fibrule_tests
 cleanup
 
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
 exit $ret
-- 
GitLab


From 7e74e235bb31a1fefc28d5303da0718b88627ea8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 1 May 2019 12:19:20 -0700
Subject: [PATCH 1810/1861] gcc-9: don't warn about uninitialized btrfs
 extent_type variable

The 'extent_type' variable does seem to be reliably initialized, but
it's _very_ non-obvious, since there's a "goto next" case that jumps
over the normal initialization.  That will then always trigger the
"start >= extent_end" test, which will end up never falling through to
the use of that variable.

But the code is certainly not obvious, and the compiler warning looks
reasonable.  Make 'extent_type' an int, and initialize it to an invalid
negative value, which seems to be the common pattern in other places.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 82fdda8ff5ab8..2973608824eca 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6783,7 +6783,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
 	u64 extent_start = 0;
 	u64 extent_end = 0;
 	u64 objectid = btrfs_ino(inode);
-	u8 extent_type;
+	int extent_type = -1;
 	struct btrfs_path *path = NULL;
 	struct btrfs_root *root = inode->root;
 	struct btrfs_file_extent_item *item;
-- 
GitLab


From 15d2aba7c602cd9005b20ff011b670547b3882c4 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 22 Apr 2019 16:43:30 -0600
Subject: [PATCH 1811/1861] PCI/portdrv: Use shared MSI/MSI-X vector for
 Bandwidth Management

The Interrupt Message Number in the PCIe Capabilities register (PCIe r4.0,
sec 7.5.3.2) indicates which MSI/MSI-X vector is shared by interrupts
related to the PCIe Capability, including Link Bandwidth Management and
Link Autonomous Bandwidth Interrupts (Link Control, 7.5.3.7), Command
Completed and Hot-Plug Interrupts (Slot Control, 7.5.3.10), and the PME
Interrupt (Root Control, 7.5.3.12).

pcie_message_numbers() checked whether we want to enable PME or Hot-Plug
interrupts but neglected to check for Link Bandwidth Management, so if we
only wanted the Bandwidth Management interrupts, it decided we didn't need
any vectors at all.  Then pcie_port_enable_irq_vec() tried to reallocate
zero vectors, which failed, resulting in fallback to INTx.

On some systems, e.g., an X79-based workstation, that INTx seems broken or
not handled correctly, so we got spurious IRQ16 interrupts for Bandwidth
Management events.

Change pcie_message_numbers() so that if we want Link Bandwidth Management
interrupts, we use the shared MSI/MSI-X vector from the PCIe Capabilities
register.

Fixes: e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification")
Link: https://lore.kernel.org/lkml/155597243666.19387.1205950870601742062.stgit@gimli.home
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
[bhelgaas: changelog]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/portdrv_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 7d04f9d087a62..1b330129089fe 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -55,7 +55,8 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask,
 	 * 7.8.2, 7.10.10, 7.31.2.
 	 */
 
-	if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) {
+	if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
+		    PCIE_PORT_SERVICE_BWNOTIF)) {
 		pcie_capability_read_word(dev, PCI_EXP_FLAGS, &reg16);
 		*pme = (reg16 & PCI_EXP_FLAGS_IRQ) >> 9;
 		nvec = *pme + 1;
-- 
GitLab


From f3505745c07ff50c22aeca9dde98762d1c8b5898 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Tue, 30 Apr 2019 05:12:57 +0200
Subject: [PATCH 1812/1861] rds: ib: force endiannes annotation

While the endiannes is being handled correctly as indicated by the comment
above the offending line - sparse was unhappy with the missing annotation
as be64_to_cpu() expects a __be64 argument. To mitigate this annotation
all involved variables are changed to a consistent __le64 and the
 conversion to uint64_t delayed to the call to rds_cong_map_updated().

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_recv.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 70559854837ee..8946c89d73923 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -772,7 +772,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
 	unsigned long frag_off;
 	unsigned long to_copy;
 	unsigned long copied;
-	uint64_t uncongested = 0;
+	__le64 uncongested = 0;
 	void *addr;
 
 	/* catch completely corrupt packets */
@@ -789,7 +789,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
 	copied = 0;
 
 	while (copied < RDS_CONG_MAP_BYTES) {
-		uint64_t *src, *dst;
+		__le64 *src, *dst;
 		unsigned int k;
 
 		to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
@@ -824,9 +824,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
 	}
 
 	/* the congestion map is in little endian order */
-	uncongested = le64_to_cpu(uncongested);
-
-	rds_cong_map_updated(map, uncongested);
+	rds_cong_map_updated(map, le64_to_cpu(uncongested));
 }
 
 static void rds_ib_process_recv(struct rds_connection *conn,
-- 
GitLab


From 886b7a50100a50f1cbd08a6f8ec5884dfbe082dc Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 30 Apr 2019 10:45:12 -0700
Subject: [PATCH 1813/1861] ipv6: A few fixes on dereferencing rt->from

It is a followup after the fix in
commit 9c69a1320515 ("route: Avoid crash from dereferencing NULL rt->from")

rt6_do_redirect():
1. NULL checking is needed on rt->from because a parallel
   fib6_info delete could happen that sets rt->from to NULL.
   (e.g. rt6_remove_exception() and fib6_drop_pcpu_from()).

2. fib6_info_hold() is not enough.  Same reason as (1).
   Meaning, holding dst->__refcnt cannot ensure
   rt->from is not NULL or rt->from->fib6_ref is not 0.

   Instead of using fib6_info_hold_safe() which ip6_rt_cache_alloc()
   is already doing, this patch chooses to extend the rcu section
   to keep "from" dereference-able after checking for NULL.

inet6_rtm_getroute():
1. NULL checking is also needed on rt->from for a similar reason.
   Note that inet6_rtm_getroute() is using RTNL_FLAG_DOIT_UNLOCKED.

Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Wei Wang <weiwan@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2cc166bc978d9..0520aca3354b8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3392,11 +3392,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 
 	rcu_read_lock();
 	from = rcu_dereference(rt->from);
-	/* This fib6_info_hold() is safe here because we hold reference to rt
-	 * and rt already holds reference to fib6_info.
-	 */
-	fib6_info_hold(from);
-	rcu_read_unlock();
+	if (!from)
+		goto out;
 
 	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
 	if (!nrt)
@@ -3408,10 +3405,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
-	/* No need to remove rt from the exception table if rt is
-	 * a cached route because rt6_insert_exception() will
-	 * takes care of it
-	 */
+	/* rt6_insert_exception() will take care of duplicated exceptions */
 	if (rt6_insert_exception(nrt, from)) {
 		dst_release_immediate(&nrt->dst);
 		goto out;
@@ -3424,7 +3418,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 out:
-	fib6_info_release(from);
+	rcu_read_unlock();
 	neigh_release(neigh);
 }
 
@@ -5023,16 +5017,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	rcu_read_lock();
 	from = rcu_dereference(rt->from);
-
-	if (fibmatch)
-		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
-				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
-				    nlh->nlmsg_seq, 0);
-	else
-		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
-				    &fl6.saddr, iif, RTM_NEWROUTE,
-				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
-				    0);
+	if (from) {
+		if (fibmatch)
+			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
+					    iif, RTM_NEWROUTE,
+					    NETLINK_CB(in_skb).portid,
+					    nlh->nlmsg_seq, 0);
+		else
+			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+					    &fl6.saddr, iif, RTM_NEWROUTE,
+					    NETLINK_CB(in_skb).portid,
+					    nlh->nlmsg_seq, 0);
+	} else {
+		err = -ENETUNREACH;
+	}
 	rcu_read_unlock();
 
 	if (err < 0) {
-- 
GitLab


From 4dd2b82d5adfbe0b1587ccad7a8f76d826120f37 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 May 2019 18:56:28 -0700
Subject: [PATCH 1814/1861] udp: fix GRO packet of death

syzbot was able to crash host by sending UDP packets with a 0 payload.

TCP does not have this issue since we do not aggregate packets without
payload.

Since dev_gro_receive() sets gso_size based on skb_gro_len(skb)
it seems not worth trying to cope with padded packets.

BUG: KASAN: slab-out-of-bounds in skb_gro_receive+0xf5f/0x10e0 net/core/skbuff.c:3826
Read of size 16 at addr ffff88808893fff0 by task syz-executor612/7889

CPU: 0 PID: 7889 Comm: syz-executor612 Not tainted 5.1.0-rc7+ #96
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 __asan_report_load16_noabort+0x14/0x20 mm/kasan/generic_report.c:133
 skb_gro_receive+0xf5f/0x10e0 net/core/skbuff.c:3826
 udp_gro_receive_segment net/ipv4/udp_offload.c:382 [inline]
 call_gro_receive include/linux/netdevice.h:2349 [inline]
 udp_gro_receive+0xb61/0xfd0 net/ipv4/udp_offload.c:414
 udp4_gro_receive+0x763/0xeb0 net/ipv4/udp_offload.c:478
 inet_gro_receive+0xe72/0x1110 net/ipv4/af_inet.c:1510
 dev_gro_receive+0x1cd0/0x23c0 net/core/dev.c:5581
 napi_gro_frags+0x36b/0xd10 net/core/dev.c:5843
 tun_get_user+0x2f24/0x3fb0 drivers/net/tun.c:1981
 tun_chr_write_iter+0xbd/0x156 drivers/net/tun.c:2027
 call_write_iter include/linux/fs.h:1866 [inline]
 do_iter_readv_writev+0x5e1/0x8e0 fs/read_write.c:681
 do_iter_write fs/read_write.c:957 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:938
 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1002
 do_writev+0x15e/0x370 fs/read_write.c:1037
 __do_sys_writev fs/read_write.c:1110 [inline]
 __se_sys_writev fs/read_write.c:1107 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1107
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x441cc0
Code: 05 48 3d 01 f0 ff ff 0f 83 9d 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 83 3d 51 93 29 00 00 75 14 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 74 09 fc ff c3 48 83 ec 08 e8 ba 2b 00 00
RSP: 002b:00007ffe8c716118 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 00007ffe8c716150 RCX: 0000000000441cc0
RDX: 0000000000000001 RSI: 00007ffe8c716170 RDI: 00000000000000f0
RBP: 0000000000000000 R08: 000000000000ffff R09: 0000000000a64668
R10: 0000000020000040 R11: 0000000000000246 R12: 000000000000c2d9
R13: 0000000000402b50 R14: 0000000000000000 R15: 0000000000000000

Allocated by task 5143:
 save_stack+0x45/0xd0 mm/kasan/common.c:75
 set_track mm/kasan/common.c:87 [inline]
 __kasan_kmalloc mm/kasan/common.c:497 [inline]
 __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:470
 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:505
 slab_post_alloc_hook mm/slab.h:437 [inline]
 slab_alloc mm/slab.c:3393 [inline]
 kmem_cache_alloc+0x11a/0x6f0 mm/slab.c:3555
 mm_alloc+0x1d/0xd0 kernel/fork.c:1030
 bprm_mm_init fs/exec.c:363 [inline]
 __do_execve_file.isra.0+0xaa3/0x23f0 fs/exec.c:1791
 do_execveat_common fs/exec.c:1865 [inline]
 do_execve fs/exec.c:1882 [inline]
 __do_sys_execve fs/exec.c:1958 [inline]
 __se_sys_execve fs/exec.c:1953 [inline]
 __x64_sys_execve+0x8f/0xc0 fs/exec.c:1953
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 5351:
 save_stack+0x45/0xd0 mm/kasan/common.c:75
 set_track mm/kasan/common.c:87 [inline]
 __kasan_slab_free+0x102/0x150 mm/kasan/common.c:459
 kasan_slab_free+0xe/0x10 mm/kasan/common.c:467
 __cache_free mm/slab.c:3499 [inline]
 kmem_cache_free+0x86/0x260 mm/slab.c:3765
 __mmdrop+0x238/0x320 kernel/fork.c:677
 mmdrop include/linux/sched/mm.h:49 [inline]
 finish_task_switch+0x47b/0x780 kernel/sched/core.c:2746
 context_switch kernel/sched/core.c:2880 [inline]
 __schedule+0x81b/0x1cc0 kernel/sched/core.c:3518
 preempt_schedule_irq+0xb5/0x140 kernel/sched/core.c:3745
 retint_kernel+0x1b/0x2d
 arch_local_irq_restore arch/x86/include/asm/paravirt.h:767 [inline]
 kmem_cache_free+0xab/0x260 mm/slab.c:3766
 anon_vma_chain_free mm/rmap.c:134 [inline]
 unlink_anon_vmas+0x2ba/0x870 mm/rmap.c:401
 free_pgtables+0x1af/0x2f0 mm/memory.c:394
 exit_mmap+0x2d1/0x530 mm/mmap.c:3144
 __mmput kernel/fork.c:1046 [inline]
 mmput+0x15f/0x4c0 kernel/fork.c:1067
 exec_mmap fs/exec.c:1046 [inline]
 flush_old_exec+0x8d9/0x1c20 fs/exec.c:1279
 load_elf_binary+0x9bc/0x53f0 fs/binfmt_elf.c:864
 search_binary_handler fs/exec.c:1656 [inline]
 search_binary_handler+0x17f/0x570 fs/exec.c:1634
 exec_binprm fs/exec.c:1698 [inline]
 __do_execve_file.isra.0+0x1394/0x23f0 fs/exec.c:1818
 do_execveat_common fs/exec.c:1865 [inline]
 do_execve fs/exec.c:1882 [inline]
 __do_sys_execve fs/exec.c:1958 [inline]
 __se_sys_execve fs/exec.c:1953 [inline]
 __x64_sys_execve+0x8f/0xc0 fs/exec.c:1953
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

The buggy address belongs to the object at ffff88808893f7c0
 which belongs to the cache mm_struct of size 1496
The buggy address is located 600 bytes to the right of
 1496-byte region [ffff88808893f7c0, ffff88808893fd98)
The buggy address belongs to the page:
page:ffffea0002224f80 count:1 mapcount:0 mapping:ffff88821bc40ac0 index:0xffff88808893f7c0 compound_mapcount: 0
flags: 0x1fffc0000010200(slab|head)
raw: 01fffc0000010200 ffffea00025b4f08 ffffea00027b9d08 ffff88821bc40ac0
raw: ffff88808893f7c0 ffff88808893e440 0000000100000001 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff88808893fe80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff88808893ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88808893ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                                             ^
 ffff888088940000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff888088940080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_offload.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d8776b2110c10..065334b41d575 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -352,6 +352,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 	struct sk_buff *pp = NULL;
 	struct udphdr *uh2;
 	struct sk_buff *p;
+	unsigned int ulen;
 
 	/* requires non zero csum, for symmetry with GSO */
 	if (!uh->check) {
@@ -359,6 +360,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 		return NULL;
 	}
 
+	/* Do not deal with padded or malicious packets, sorry ! */
+	ulen = ntohs(uh->len);
+	if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
 	/* pull encapsulating udp header */
 	skb_gro_pull(skb, sizeof(struct udphdr));
 	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
@@ -377,12 +384,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
 
 		/* Terminate the flow on len mismatch or if it grow "too much".
 		 * Under small packet flood GRO count could elsewhere grow a lot
-		 * leading to execessive truesize values.
+		 * leading to excessive truesize values.
 		 * On len mismatch merge the first packet shorter than gso_size,
 		 * otherwise complete the GRO packet.
 		 */
-		if (uh->len > uh2->len || skb_gro_receive(p, skb) ||
-		    uh->len != uh2->len ||
+		if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
+		    ulen != ntohs(uh2->len) ||
 		    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
 			pp = p;
 
-- 
GitLab


From 12f363511d47f86c49b7766c349989cb33fd61a8 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 30 Apr 2019 16:11:59 +0000
Subject: [PATCH 1815/1861] powerpc/32s: Fix BATs setting with
 CONFIG_STRICT_KERNEL_RWX

Serge reported some crashes with CONFIG_STRICT_KERNEL_RWX enabled
on a book3s32 machine.

Analysis shows two issues:
  - BATs addresses and sizes are not properly aligned.
  - There is a gap between the last address covered by BATs and the
    first address covered by pages.

Memory mapped with DBATs:
0: 0xc0000000-0xc07fffff 0x00000000 Kernel RO coherent
1: 0xc0800000-0xc0bfffff 0x00800000 Kernel RO coherent
2: 0xc0c00000-0xc13fffff 0x00c00000 Kernel RW coherent
3: 0xc1400000-0xc23fffff 0x01400000 Kernel RW coherent
4: 0xc2400000-0xc43fffff 0x02400000 Kernel RW coherent
5: 0xc4400000-0xc83fffff 0x04400000 Kernel RW coherent
6: 0xc8400000-0xd03fffff 0x08400000 Kernel RW coherent
7: 0xd0400000-0xe03fffff 0x10400000 Kernel RW coherent

Memory mapped with pages:
0xe1000000-0xefffffff  0x21000000       240M        rw       present           dirty  accessed

This patch fixes both issues. With the patch, we get the following
which is as expected:

Memory mapped with DBATs:
0: 0xc0000000-0xc07fffff 0x00000000 Kernel RO coherent
1: 0xc0800000-0xc0bfffff 0x00800000 Kernel RO coherent
2: 0xc0c00000-0xc0ffffff 0x00c00000 Kernel RW coherent
3: 0xc1000000-0xc1ffffff 0x01000000 Kernel RW coherent
4: 0xc2000000-0xc3ffffff 0x02000000 Kernel RW coherent
5: 0xc4000000-0xc7ffffff 0x04000000 Kernel RW coherent
6: 0xc8000000-0xcfffffff 0x08000000 Kernel RW coherent
7: 0xd0000000-0xdfffffff 0x10000000 Kernel RW coherent

Memory mapped with pages:
0xe0000000-0xefffffff  0x20000000       256M        rw       present           dirty  accessed

Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX")
Reported-by: Serge Belyshev <belyshev@depni.sinp.msu.ru>
Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/ppc_mmu_32.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f29d2f118b444..5d9c3ff728c9a 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -98,10 +98,20 @@ static int find_free_bat(void)
 	return -1;
 }
 
+/*
+ * This function calculates the size of the larger block usable to map the
+ * beginning of an area based on the start address and size of that area:
+ * - max block size is 8M on 601 and 256 on other 6xx.
+ * - base address must be aligned to the block size. So the maximum block size
+ *   is identified by the lowest bit set to 1 in the base address (for instance
+ *   if base is 0x16000000, max size is 0x02000000).
+ * - block size has to be a power of two. This is calculated by finding the
+ *   highest bit set to 1.
+ */
 static unsigned int block_size(unsigned long base, unsigned long top)
 {
 	unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
-	unsigned int base_shift = (fls(base) - 1) & 31;
+	unsigned int base_shift = (ffs(base) - 1) & 31;
 	unsigned int block_shift = (fls(top - base) - 1) & 31;
 
 	return min3(max_size, 1U << base_shift, 1U << block_shift);
@@ -157,7 +167,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
 
 unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 {
-	int done;
+	unsigned long done;
 	unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
 
 	if (__map_without_bats) {
@@ -169,10 +179,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 		return __mmu_mapin_ram(base, top);
 
 	done = __mmu_mapin_ram(base, border);
-	if (done != border - base)
+	if (done != border)
 		return done;
 
-	return done + __mmu_mapin_ram(border, top);
+	return __mmu_mapin_ram(border, top);
 }
 
 void mmu_mark_initmem_nx(void)
-- 
GitLab


From 4e9036042fedaffcd868d7f7aa948756c48c637d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 1 May 2019 22:46:11 -0400
Subject: [PATCH 1816/1861] ufs: fix braino in ufs_get_inode_gid() for solaris
 UFS flavour

To choose whether to pick the GID from the old (16bit) or new (32bit)
field, we should check if the old gid field is set to 0xffff.  Mainline
checks the old *UID* field instead - cut'n'paste from the corresponding
code in ufs_get_inode_uid().

Fixes: 252e211e90ce
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 1fd3011ea6236..7fd4802222b8c 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -229,7 +229,7 @@ ufs_get_inode_gid(struct super_block *sb, struct ufs_inode *inode)
 	case UFS_UID_44BSD:
 		return fs32_to_cpu(sb, inode->ui_u3.ui_44.ui_gid);
 	case UFS_UID_EFT:
-		if (inode->ui_u1.oldids.ui_suid == 0xFFFF)
+		if (inode->ui_u1.oldids.ui_sgid == 0xFFFF)
 			return fs32_to_cpu(sb, inode->ui_u3.ui_sun.ui_gid);
 		/* Fall through */
 	default:
-- 
GitLab


From 26a374ae7af8d7003ad28a962fba0141e68af5da Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 17 Jan 2019 10:02:22 +0100
Subject: [PATCH 1817/1861] s390: add missing ENDPROC statements to assembler
 functions

The assembler code in arch/s390 misses proper ENDPROC statements
to properly end functions in a few places. Add them.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/text_dma.S          |  6 ++++++
 arch/s390/crypto/crc32be-vx.S      |  1 +
 arch/s390/crypto/crc32le-vx.S      |  6 ++++--
 arch/s390/kernel/base.S            |  3 +++
 arch/s390/kernel/entry.S           | 24 ++++++++++++++++++++++--
 arch/s390/kernel/mcount.S          |  8 ++++++--
 arch/s390/kernel/reipl.S           |  1 +
 arch/s390/kernel/relocate_kernel.S |  1 +
 arch/s390/kernel/swsusp.S          |  2 ++
 arch/s390/lib/mem.S                |  1 +
 10 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
index 2414360ff22da..ea93314f44976 100644
--- a/arch/s390/boot/text_dma.S
+++ b/arch/s390/boot/text_dma.S
@@ -41,6 +41,7 @@ ENTRY(_diag14_dma)
 	lgfr	%r2,%r5
 	BR_EX_DMA_r14
 	EX_TABLE_DMA(.Ldiag14_ex, .Ldiag14_fault)
+ENDPROC(_diag14_dma)
 
 /*
  * int _diag210_dma(struct diag210 *addr)
@@ -58,6 +59,7 @@ ENTRY(_diag210_dma)
 	lgfr	%r2,%r2
 	BR_EX_DMA_r14
 	EX_TABLE_DMA(.Ldiag210_ex, .Ldiag210_fault)
+ENDPROC(_diag210_dma)
 
 /*
  * int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode)
@@ -71,6 +73,7 @@ ENTRY(_diag26c_dma)
 	lgfr	%r2,%r5
 	BR_EX_DMA_r14
 	EX_TABLE_DMA(.Ldiag26c_ex, .Ldiag26c_ex)
+ENDPROC(_diag26c_dma)
 
 /*
  * void _diag0c_dma(struct hypfs_diag0c_entry *entry)
@@ -80,6 +83,7 @@ ENTRY(_diag0c_dma)
 	diag	%r2,%r2,0x0c
 	sam64
 	BR_EX_DMA_r14
+ENDPROC(_diag0c_dma)
 
 /*
  * void _swsusp_reset_dma(void)
@@ -96,6 +100,7 @@ restart_entry:
 	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
 	sam64
 	BR_EX_DMA_r14
+ENDPROC(_swsusp_reset_dma)
 
 /*
  * void _diag308_reset_dma(void)
@@ -142,6 +147,7 @@ restart_part2:
 	lpswe	0(%r4)
 .Lcontinue:
 	BR_EX_DMA_r14
+ENDPROC(_diag308_reset_dma)
 
 	.section .dma.data,"aw",@progbits
 .align	8
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
index 2bf01ba44107c..0099044e2c860 100644
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -207,5 +207,6 @@ ENTRY(crc32_be_vgfm_16)
 .Ldone:
 	VLGVF	%r2,%v2,3
 	BR_EX	%r14
+ENDPROC(crc32_be_vgfm_16)
 
 .previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
index 7d6f568bd3ad1..71caf0f4ec08b 100644
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -105,13 +105,14 @@
 ENTRY(crc32_le_vgfm_16)
 	larl	%r5,.Lconstants_CRC_32_LE
 	j	crc32_le_vgfm_generic
+ENDPROC(crc32_le_vgfm_16)
 
 ENTRY(crc32c_le_vgfm_16)
 	larl	%r5,.Lconstants_CRC_32C_LE
 	j	crc32_le_vgfm_generic
+ENDPROC(crc32c_le_vgfm_16)
 
-
-crc32_le_vgfm_generic:
+ENTRY(crc32_le_vgfm_generic)
 	/* Load CRC-32 constants */
 	VLM	CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
 
@@ -267,5 +268,6 @@ crc32_le_vgfm_generic:
 .Ldone:
 	VLGVF	%r2,%v2,2
 	BR_EX	%r14
+ENDPROC(crc32_le_vgfm_generic)
 
 .previous
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index d6ee5978e2731..2f39ea57f3589 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -28,6 +28,7 @@ ENTRY(s390_base_mcck_handler)
 1:	la	%r1,4095
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
 	lpswe	__LC_MCK_OLD_PSW
+ENDPROC(s390_base_mcck_handler)
 
 	.section .bss
 	.align 8
@@ -48,6 +49,7 @@ ENTRY(s390_base_ext_handler)
 1:	lmg	%r0,%r15,__LC_SAVE_AREA_ASYNC
 	ni	__LC_EXT_OLD_PSW+1,0xfd	# clear wait state bit
 	lpswe	__LC_EXT_OLD_PSW
+ENDPROC(s390_base_ext_handler)
 
 	.section .bss
 	.align 8
@@ -68,6 +70,7 @@ ENTRY(s390_base_pgm_handler)
 	lmg	%r0,%r15,__LC_SAVE_AREA_SYNC
 	lpswe	__LC_PGM_OLD_PSW
 1:	lpswe	disabled_wait_psw-0b(%r13)
+ENDPROC(s390_base_pgm_handler)
 
 	.align	8
 disabled_wait_psw:
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index baa67e434b468..3f4d272577d34 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -224,6 +224,7 @@ ENTRY(__bpon)
 	.globl __bpon
 	BPON
 	BR_EX	%r14
+ENDPROC(__bpon)
 
 /*
  * Scheduler resume function, called by switch_to
@@ -248,6 +249,7 @@ ENTRY(__switch_to)
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
 	BR_EX	%r14
+ENDPROC(__switch_to)
 
 .L__critical_start:
 
@@ -324,6 +326,7 @@ sie_exit:
 	EX_TABLE(.Lrewind_pad4,.Lsie_fault)
 	EX_TABLE(.Lrewind_pad2,.Lsie_fault)
 	EX_TABLE(sie_exit,.Lsie_fault)
+ENDPROC(sie64a)
 EXPORT_SYMBOL(sie64a)
 EXPORT_SYMBOL(sie_exit)
 #endif
@@ -570,6 +573,7 @@ ENTRY(system_call)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	larl	%r14,.Lsysc_return
 	jg	do_syscall_trace_exit
+ENDPROC(system_call)
 
 #
 # a new process exits the kernel with ret_from_fork
@@ -584,10 +588,16 @@ ENTRY(ret_from_fork)
 	jne	.Lsysc_tracenogo
 	# it's a kernel thread
 	lmg	%r9,%r10,__PT_R9(%r11)	# load gprs
+	la	%r2,0(%r10)
+	BASR_EX	%r14,%r9
+	j	.Lsysc_tracenogo
+ENDPROC(ret_from_fork)
+
 ENTRY(kernel_thread_starter)
 	la	%r2,0(%r10)
 	BASR_EX	%r14,%r9
 	j	.Lsysc_tracenogo
+ENDPROC(kernel_thread_starter)
 
 /*
  * Program check handler routine
@@ -698,6 +708,7 @@ ENTRY(pgm_check_handler)
 	stg	%r14,__LC_RETURN_PSW+8
 	lghi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
 	lpswe	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
+ENDPROC(pgm_check_handler)
 
 /*
  * IO interrupt handler routine
@@ -926,6 +937,7 @@ ENTRY(io_int_handler)
 	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
 	j	.Lio_return
+ENDPROC(io_int_handler)
 
 /*
  * External interrupt handler routine
@@ -965,6 +977,7 @@ ENTRY(ext_int_handler)
 	lghi	%r3,EXT_INTERRUPT
 	brasl	%r14,do_IRQ
 	j	.Lio_return
+ENDPROC(ext_int_handler)
 
 /*
  * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
@@ -989,6 +1002,7 @@ ENTRY(psw_idle)
 	lpswe	__SF_EMPTY(%r15)
 	BR_EX	%r14
 .Lpsw_idle_end:
+ENDPROC(psw_idle)
 
 /*
  * Store floating-point controls and floating-point or vector register
@@ -1031,6 +1045,7 @@ ENTRY(save_fpu_regs)
 .Lsave_fpu_regs_exit:
 	BR_EX	%r14
 .Lsave_fpu_regs_end:
+ENDPROC(save_fpu_regs)
 EXPORT_SYMBOL(save_fpu_regs)
 
 /*
@@ -1077,6 +1092,7 @@ load_fpu_regs:
 .Lload_fpu_regs_exit:
 	BR_EX	%r14
 .Lload_fpu_regs_end:
+ENDPROC(load_fpu_regs)
 
 .L__critical_end:
 
@@ -1206,6 +1222,7 @@ ENTRY(mcck_int_handler)
 	lg	%r15,__LC_NODAT_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	j	.Lmcck_skip
+ENDPROC(mcck_int_handler)
 
 #
 # PSW restart interrupt handler
@@ -1232,6 +1249,7 @@ ENTRY(restart_int_handler)
 2:	sigp	%r4,%r3,SIGP_STOP		# sigp stop to current cpu
 	brc	2,2b
 3:	j	3b
+ENDPROC(restart_int_handler)
 
 	.section .kprobes.text, "ax"
 
@@ -1241,7 +1259,7 @@ ENTRY(restart_int_handler)
  * No need to properly save the registers, we are going to panic anyway.
  * Setup a pt_regs so that show_trace can provide a good call trace.
  */
-stack_overflow:
+ENTRY(stack_overflow)
 	lg	%r15,__LC_NODAT_STACK	# change to panic stack
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -1251,9 +1269,10 @@ stack_overflow:
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	jg	kernel_stack_overflow
+ENDPROC(stack_overflow)
 #endif
 
-cleanup_critical:
+ENTRY(cleanup_critical)
 #if IS_ENABLED(CONFIG_KVM)
 	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
 	jl	0f
@@ -1289,6 +1308,7 @@ cleanup_critical:
 	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
 	jl	.Lcleanup_load_fpu_regs
 0:	BR_EX	%r14,%r11
+ENDPROC(cleanup_critical)
 
 	.align	8
 .Lcleanup_table:
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index e93fbf02490cf..09ae6da0aaa5f 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -20,6 +20,7 @@
 
 ENTRY(ftrace_stub)
 	BR_EX	%r14
+ENDPROC(ftrace_stub)
 
 #define STACK_FRAME_SIZE  (STACK_FRAME_OVERHEAD + __PT_SIZE)
 #define STACK_PTREGS	  (STACK_FRAME_OVERHEAD)
@@ -28,7 +29,7 @@ ENTRY(ftrace_stub)
 
 ENTRY(_mcount)
 	BR_EX	%r14
-
+ENDPROC(_mcount)
 EXPORT_SYMBOL(_mcount)
 
 ENTRY(ftrace_caller)
@@ -61,7 +62,8 @@ ENTRY(ftrace_caller)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 # The j instruction gets runtime patched to a nop instruction.
 # See ftrace_enable_ftrace_graph_caller.
-ENTRY(ftrace_graph_caller)
+	.globl ftrace_graph_caller
+ftrace_graph_caller:
 	j	ftrace_graph_caller_end
 	lg	%r2,(STACK_PTREGS_GPRS+14*8)(%r15)
 	lg	%r3,(STACK_PTREGS_PSW+8)(%r15)
@@ -73,6 +75,7 @@ ftrace_graph_caller_end:
 	lg	%r1,(STACK_PTREGS_PSW+8)(%r15)
 	lmg	%r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
 	BR_EX	%r1
+ENDPROC(ftrace_caller)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
@@ -86,5 +89,6 @@ ENTRY(return_to_handler)
 	lgr	%r14,%r2
 	lmg	%r2,%r5,32(%r15)
 	BR_EX	%r14
+ENDPROC(return_to_handler)
 
 #endif
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 7f14adf512c6d..4a22163962eb3 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -73,6 +73,7 @@ ENTRY(store_status)
 	lgr	%r9,%r2
 	lgr	%r2,%r3
 	BR_EX	%r9
+ENDPROC(store_status)
 
 	.section .bss
 	.align	8
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index 1b56f087ce2ce..fe396673e8a66 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -66,6 +66,7 @@ ENTRY(relocate_kernel)
 		mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
 	.diag:
 		diag	%r0,%r0,0x308
+ENDPROC(relocate_kernel)
 
 		.align	8
 	load_psw:
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index f5219ce11cc5b..19a3c427801a8 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -108,6 +108,7 @@ ENTRY(swsusp_arch_suspend)
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
 	lghi	%r2,0
 	BR_EX	%r14
+ENDPROC(swsusp_arch_suspend)
 
 /*
  * Restore saved memory image to correct place and restore register context.
@@ -260,6 +261,7 @@ restore_registers:
 	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
 	lghi	%r2,0
 	BR_EX	%r14
+ENDPROC(swsusp_arch_resume)
 
 	.section .data..nosave,"aw",@progbits
 	.align	8
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 53008da051907..dc0874f2e203c 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -178,6 +178,7 @@ ENTRY(__memset\bits)
 	BR_EX	%r14
 .L__memset_mvc\bits:
 	mvc	\bytes(1,%r1),0(%r1)
+ENDPROC(__memset\bits)
 .endm
 
 __MEMSET 16,2,sth
-- 
GitLab


From 40a3abf751dd0ff9ef758fd5c14530f557cd432d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 1 Feb 2019 16:33:37 +0100
Subject: [PATCH 1818/1861] s390/nospec: rename assembler generated expoline
 thunks

The assembler version of the expoline thunk use the naming
__s390x_indirect_jump_rxuse_ry while the compiler generates names
like __s390_indirect_jump_rx_use_ry. Make the naming more consistent.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/nospec-insn.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index 123dac3717b33..0033dcd663b1b 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -32,23 +32,23 @@ _LC_BR_R1 = __LC_BR_R1
 	.endm
 
 	.macro __THUNK_PROLOG_BR r1,r2
-	__THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+	__THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1
 	.endm
 
 	.macro __THUNK_PROLOG_BC d0,r1,r2
-	__THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+	__THUNK_PROLOG_NAME __s390_indirect_branch_\d0\()_\r2\()use_\r1
 	.endm
 
 	.macro __THUNK_BR r1,r2
-	jg	__s390x_indirect_jump_r\r2\()use_r\r1
+	jg	__s390_indirect_jump_r\r2\()use_r\r1
 	.endm
 
 	.macro __THUNK_BC d0,r1,r2
-	jg	__s390x_indirect_branch_\d0\()_\r2\()use_\r1
+	jg	__s390_indirect_branch_\d0\()_\r2\()use_\r1
 	.endm
 
 	.macro __THUNK_BRASL r1,r2,r3
-	brasl	\r1,__s390x_indirect_jump_r\r3\()use_r\r2
+	brasl	\r1,__s390_indirect_jump_r\r3\()use_r\r2
 	.endm
 
 	.macro	__DECODE_RR expand,reg,ruse
-- 
GitLab


From bf72630130c29e2ba42e3db8d502de6bdfaa7d9a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 12 Apr 2019 08:57:34 +0200
Subject: [PATCH 1819/1861] s390: use proper expoline sections for .dma code

The text_dma.S code uses its own macro to generate an inline version of an
expoline. To make it easier to identify all expolines in the kernel use a
thunk and a branch to the thunk just like the rest of the kernel code does
it.

The name of the text_dma.S expoline thunk is __dma__s390_indirect_jump_r14
and the section is named .dma.text.__s390_indirect_jump_r14.

This will be needed for the objtool support.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/text_dma.S | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
index ea93314f44976..9715715c4c28d 100644
--- a/arch/s390/boot/text_dma.S
+++ b/arch/s390/boot/text_dma.S
@@ -9,6 +9,16 @@
 #include <asm/errno.h>
 #include <asm/sigp.h>
 
+#ifdef CC_USING_EXPOLINE
+	.pushsection .dma.text.__s390_indirect_jump_r14,"axG"
+__dma__s390_indirect_jump_r14:
+	larl	%r1,0f
+	ex	0,0(%r1)
+	j	.
+0:	br	%r14
+	.popsection
+#endif
+
 	.section .dma.text,"ax"
 /*
  * Simplified version of expoline thunk. The normal thunks can not be used here,
@@ -17,10 +27,11 @@
  * affects a few functions that are not performance-relevant.
  */
 	.macro BR_EX_DMA_r14
-	larl	%r1,0f
-	ex	0,0(%r1)
-	j	.
-0:	br	%r14
+#ifdef CC_USING_EXPOLINE
+	jg	__dma__s390_indirect_jump_r14
+#else
+	br	%r14
+#endif
 	.endm
 
 /*
-- 
GitLab


From e21f8baf8d9a452a09a9cf7c8ab4720ff9a891ef Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 17 Jan 2019 09:29:15 +0100
Subject: [PATCH 1820/1861] s390/bug: add entry size to the __bug_table section

Change the __EMIT_BUG inline assembly to emit mergeable __bug_table
entries with type @progbits and specify the size of each entry.
The entry size is encoded sh_entsize field of the section definition,
it allows to identify which struct bug_entry to use to decode the
entries. This will be needed for the objtool support.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/bug.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 429f43a8a8e81..713fc9735ffb0 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -15,7 +15,7 @@
 		".section .rodata.str,\"aMS\",@progbits,1\n"	\
 		"2:	.asciz	\""__FILE__"\"\n"		\
 		".previous\n"					\
-		".section __bug_table,\"aw\"\n"			\
+		".section __bug_table,\"awM\",@progbits,%2\n"	\
 		"3:	.long	1b-3b,2b-3b\n"			\
 		"	.short	%0,%1\n"			\
 		"	.org	3b+%2\n"			\
@@ -27,17 +27,17 @@
 
 #else /* CONFIG_DEBUG_BUGVERBOSE */
 
-#define __EMIT_BUG(x) do {				\
-	asm volatile(					\
-		"0:	j	0b+2\n"			\
-		"1:\n"					\
-		".section __bug_table,\"aw\"\n"		\
-		"2:	.long	1b-2b\n"		\
-		"	.short	%0\n"			\
-		"	.org	2b+%1\n"		\
-		".previous\n"				\
-		: : "i" (x),				\
-		    "i" (sizeof(struct bug_entry)));	\
+#define __EMIT_BUG(x) do {					\
+	asm volatile(						\
+		"0:	j	0b+2\n"				\
+		"1:\n"						\
+		".section __bug_table,\"awM\",@progbits,%1\n"	\
+		"2:	.long	1b-2b\n"			\
+		"	.short	%0\n"				\
+		"	.org	2b+%1\n"			\
+		".previous\n"					\
+		: : "i" (x),					\
+		    "i" (sizeof(struct bug_entry)));		\
 } while (0)
 
 #endif /* CONFIG_DEBUG_BUGVERBOSE */
-- 
GitLab


From 1c705ad5efae9c712e763a47fbcc95b87b7347d2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 3 Jan 2019 16:49:35 +0100
Subject: [PATCH 1821/1861] s390/opcodes: add missing instructions to the
 disassembler

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/tools/opcodes.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 1cbed82cd17b7..64638b764d1cc 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -1,3 +1,5 @@
+0000	illegal	E
+0002	brkpt	E
 0101	pr	E
 0102	upt	E
 0104	ptff	E
@@ -257,6 +259,7 @@ b258	bsg	RRE_RR
 b25a	bsa	RRE_RR
 b25d	clst	RRE_RR
 b25e	srst	RRE_RR
+b25f	chsc	RRE_R0
 b263	cmpsc	RRE_RR
 b274	siga	S_RD
 b276	xsch	S_00
@@ -277,6 +280,9 @@ b29d	lfpc	S_RD
 b2a5	tre	RRE_RR
 b2a6	cu21	RRF_U0RR
 b2a7	cu12	RRF_U0RR
+b2ad	nqap	RRE_RR
+b2ae	dqap	RRE_RR
+b2af	pqap	RRE_RR
 b2b0	stfle	S_RD
 b2b1	stfl	S_RD
 b2b2	lpswe	S_RD
@@ -290,6 +296,7 @@ b2e5	epctr	RRE_RR
 b2e8	ppa	RRF_U0RR
 b2ec	etnd	RRE_R0
 b2ed	ecpga	RRE_RR
+b2f0	iucv	RRE_RR
 b2f8	tend	S_00
 b2fa	niai	IE_UU
 b2fc	tabort	S_RD
@@ -559,12 +566,15 @@ b998	alcr	RRE_RR
 b999	slbr	RRE_RR
 b99a	epair	RRE_R0
 b99b	esair	RRE_R0
+b99c	eqbs	RRF_U0RR
 b99d	esea	RRE_R0
 b99e	pti	RRE_RR
 b99f	ssair	RRE_R0
+b9a0	clp	RRF_U0RR
 b9a1	tpei	RRE_RR
 b9a2	ptf	RRE_R0
 b9aa	lptea	RRF_RURR2
+b9ab	essa	RRF_U0RR
 b9ac	irbm	RRE_RR
 b9ae	rrbm	RRE_RR
 b9af	pfmf	RRE_RR
@@ -1039,6 +1049,7 @@ eb7a	agsi	SIY_IRD
 eb7e	algsi	SIY_IRD
 eb80	icmh	RSY_RURD
 eb81	icmy	RSY_RURD
+eb8a	sqbs	RSY_RDRU
 eb8e	mvclu	RSY_RRRD
 eb8f	clclu	RSY_RRRD
 eb90	stmy	RSY_RRRD
-- 
GitLab


From 78c98f9074135d3dab4e39544e0a537f92388fce Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 28 Jan 2019 08:33:08 +0100
Subject: [PATCH 1822/1861] s390/unwind: introduce stack unwind API

Rework the dump_trace() stack unwinder interface to support different
unwinding algorithms. The new interface looks like this:

	struct unwind_state state;
	unwind_for_each_frame(&state, task, regs, start_stack)
		do_something(state.sp, state.ip, state.reliable);

The unwind_bc.c file contains the implementation for the classic
back-chain unwinder.

One positive side effect of the new code is it now handles ftraced
functions gracefully. It prints the real name of the return function
instead of 'return_to_handler'.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/processor.h  |  72 -------------
 arch/s390/include/asm/stacktrace.h | 114 ++++++++++++++++++++
 arch/s390/include/asm/unwind.h     | 101 +++++++++++++++++
 arch/s390/kernel/Makefile          |   3 +-
 arch/s390/kernel/asm-offsets.c     |   1 +
 arch/s390/kernel/dumpstack.c       | 167 +++++++++++++++++------------
 arch/s390/kernel/irq.c             |   1 +
 arch/s390/kernel/machine_kexec.c   |   1 +
 arch/s390/kernel/perf_event.c      |  16 +--
 arch/s390/kernel/process.c         |   1 +
 arch/s390/kernel/setup.c           |   1 +
 arch/s390/kernel/smp.c             |   1 +
 arch/s390/kernel/stacktrace.c      |  69 ++++++------
 arch/s390/kernel/unwind_bc.c       | 155 ++++++++++++++++++++++++++
 arch/s390/mm/maccess.c             |   1 +
 arch/s390/oprofile/init.c          |  22 ++--
 16 files changed, 521 insertions(+), 205 deletions(-)
 create mode 100644 arch/s390/include/asm/stacktrace.h
 create mode 100644 arch/s390/include/asm/unwind.h
 create mode 100644 arch/s390/kernel/unwind_bc.c

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 8aa85e39f50b0..9f2ff4a54aff1 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -156,25 +156,6 @@ struct thread_struct {
 
 typedef struct thread_struct thread_struct;
 
-/*
- * Stack layout of a C stack frame.
- */
-#ifndef __PACK_STACK
-struct stack_frame {
-	unsigned long back_chain;
-	unsigned long empty1[5];
-	unsigned long gprs[10];
-	unsigned int  empty2[8];
-};
-#else
-struct stack_frame {
-	unsigned long empty1[5];
-	unsigned int  empty2[8];
-	unsigned long gprs[10];
-	unsigned long back_chain;
-};
-#endif
-
 #define ARCH_MIN_TASKALIGN	8
 
 #define INIT_THREAD {							\
@@ -206,11 +187,7 @@ struct mm_struct;
 struct seq_file;
 struct pt_regs;
 
-typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
-void dump_trace(dump_trace_func_t func, void *data,
-		struct task_struct *task, unsigned long sp);
 void show_registers(struct pt_regs *regs);
-
 void show_cacheinfo(struct seq_file *m);
 
 /* Free all resources held by a thread. */
@@ -244,55 +221,6 @@ static __no_kasan_or_inline unsigned short stap(void)
 	return cpu_address;
 }
 
-#define CALL_ARGS_0()							\
-	register unsigned long r2 asm("2")
-#define CALL_ARGS_1(arg1)						\
-	register unsigned long r2 asm("2") = (unsigned long)(arg1)
-#define CALL_ARGS_2(arg1, arg2)						\
-	CALL_ARGS_1(arg1);						\
-	register unsigned long r3 asm("3") = (unsigned long)(arg2)
-#define CALL_ARGS_3(arg1, arg2, arg3)					\
-	CALL_ARGS_2(arg1, arg2);					\
-	register unsigned long r4 asm("4") = (unsigned long)(arg3)
-#define CALL_ARGS_4(arg1, arg2, arg3, arg4)				\
-	CALL_ARGS_3(arg1, arg2, arg3);					\
-	register unsigned long r4 asm("5") = (unsigned long)(arg4)
-#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5)			\
-	CALL_ARGS_4(arg1, arg2, arg3, arg4);				\
-	register unsigned long r4 asm("6") = (unsigned long)(arg5)
-
-#define CALL_FMT_0 "=&d" (r2) :
-#define CALL_FMT_1 "+&d" (r2) :
-#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
-#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
-#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
-#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
-
-#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
-#define CALL_CLOBBER_4 CALL_CLOBBER_5
-#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5"
-#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4"
-#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
-#define CALL_CLOBBER_0 CALL_CLOBBER_1
-
-#define CALL_ON_STACK(fn, stack, nr, args...)				\
-({									\
-	CALL_ARGS_##nr(args);						\
-	unsigned long prev;						\
-									\
-	asm volatile(							\
-		"	la	%[_prev],0(15)\n"			\
-		"	la	15,0(%[_stack])\n"			\
-		"	stg	%[_prev],%[_bc](15)\n"			\
-		"	brasl	14,%[_fn]\n"				\
-		"	la	15,0(%[_prev])\n"			\
-		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
-		  [_stack] "a" (stack),					\
-		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
-		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
-	r2;								\
-})
-
 /*
  * Give up the time slice of the virtual PU.
  */
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
new file mode 100644
index 0000000000000..49634bfbecdd0
--- /dev/null
+++ b/arch/s390/include/asm/stacktrace.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_STACKTRACE_H
+#define _ASM_S390_STACKTRACE_H
+
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <asm/switch_to.h>
+
+enum stack_type {
+	STACK_TYPE_UNKNOWN,
+	STACK_TYPE_TASK,
+	STACK_TYPE_IRQ,
+	STACK_TYPE_NODAT,
+	STACK_TYPE_RESTART,
+};
+
+struct stack_info {
+	enum stack_type type;
+	unsigned long begin, end;
+};
+
+const char *stack_type_name(enum stack_type type);
+int get_stack_info(unsigned long sp, struct task_struct *task,
+		   struct stack_info *info, unsigned long *visit_mask);
+
+static inline bool on_stack(struct stack_info *info,
+			    unsigned long addr, size_t len)
+{
+	if (info->type == STACK_TYPE_UNKNOWN)
+		return false;
+	if (addr + len < addr)
+		return false;
+	return addr >= info->begin && addr + len < info->end;
+}
+
+static inline unsigned long get_stack_pointer(struct task_struct *task,
+					      struct pt_regs *regs)
+{
+	if (regs)
+		return (unsigned long) kernel_stack_pointer(regs);
+	if (task == current)
+		return current_stack_pointer();
+	return (unsigned long) task->thread.ksp;
+}
+
+/*
+ * Stack layout of a C stack frame.
+ */
+#ifndef __PACK_STACK
+struct stack_frame {
+	unsigned long back_chain;
+	unsigned long empty1[5];
+	unsigned long gprs[10];
+	unsigned int  empty2[8];
+};
+#else
+struct stack_frame {
+	unsigned long empty1[5];
+	unsigned int  empty2[8];
+	unsigned long gprs[10];
+	unsigned long back_chain;
+};
+#endif
+
+#define CALL_ARGS_0()							\
+	register unsigned long r2 asm("2")
+#define CALL_ARGS_1(arg1)						\
+	register unsigned long r2 asm("2") = (unsigned long)(arg1)
+#define CALL_ARGS_2(arg1, arg2)						\
+	CALL_ARGS_1(arg1);						\
+	register unsigned long r3 asm("3") = (unsigned long)(arg2)
+#define CALL_ARGS_3(arg1, arg2, arg3)					\
+	CALL_ARGS_2(arg1, arg2);					\
+	register unsigned long r4 asm("4") = (unsigned long)(arg3)
+#define CALL_ARGS_4(arg1, arg2, arg3, arg4)				\
+	CALL_ARGS_3(arg1, arg2, arg3);					\
+	register unsigned long r4 asm("5") = (unsigned long)(arg4)
+#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5)			\
+	CALL_ARGS_4(arg1, arg2, arg3, arg4);				\
+	register unsigned long r4 asm("6") = (unsigned long)(arg5)
+
+#define CALL_FMT_0 "=&d" (r2) :
+#define CALL_FMT_1 "+&d" (r2) :
+#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
+#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
+#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
+#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
+
+#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
+#define CALL_CLOBBER_4 CALL_CLOBBER_5
+#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5"
+#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4"
+#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
+#define CALL_CLOBBER_0 CALL_CLOBBER_1
+
+#define CALL_ON_STACK(fn, stack, nr, args...)				\
+({									\
+	CALL_ARGS_##nr(args);						\
+	unsigned long prev;						\
+									\
+	asm volatile(							\
+		"	la	%[_prev],0(15)\n"			\
+		"	la	15,0(%[_stack])\n"			\
+		"	stg	%[_prev],%[_bc](15)\n"			\
+		"	brasl	14,%[_fn]\n"				\
+		"	la	15,0(%[_prev])\n"			\
+		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
+		  [_stack] "a" (stack),					\
+		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
+		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
+	r2;								\
+})
+
+#endif /* _ASM_S390_STACKTRACE_H */
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
new file mode 100644
index 0000000000000..6eb2ef105d879
--- /dev/null
+++ b/arch/s390/include/asm/unwind.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_UNWIND_H
+#define _ASM_S390_UNWIND_H
+
+#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+/*
+ * To use the stack unwinder it has to be initialized with unwind_start.
+ * There four combinations for task and regs:
+ * 1) task==NULL, regs==NULL: the unwind starts for the task that is currently
+ *    running, sp/ip picked up from the CPU registers
+ * 2) task==NULL, regs!=NULL: the unwind starts from the sp/ip found in
+ *    the struct pt_regs of an interrupt frame for the current task
+ * 3) task!=NULL, regs==NULL: the unwind starts for an inactive task with
+ *    the sp picked up from task->thread.ksp and the ip picked up from the
+ *    return address stored by __switch_to
+ * 4) task!=NULL, regs!=NULL: the sp/ip are picked up from the interrupt
+ *    frame 'regs' of a inactive task
+ * If 'first_frame' is not zero unwind_start skips unwind frames until it
+ * reaches the specified stack pointer.
+ * The end of the unwinding is indicated with unwind_done, this can be true
+ * right after unwind_start, e.g. with first_frame!=0 that can not be found.
+ * unwind_next_frame skips to the next frame.
+ * Once the unwind is completed unwind_error() can be used to check if there
+ * has been a situation where the unwinder could not correctly understand
+ * the tasks call chain.
+ */
+
+struct unwind_state {
+	struct stack_info stack_info;
+	unsigned long stack_mask;
+	struct task_struct *task;
+	struct pt_regs *regs;
+	unsigned long sp, ip;
+	int graph_idx;
+	bool reliable;
+	bool error;
+};
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long first_frame);
+bool unwind_next_frame(struct unwind_state *state);
+unsigned long unwind_get_return_address(struct unwind_state *state);
+
+static inline bool unwind_done(struct unwind_state *state)
+{
+	return state->stack_info.type == STACK_TYPE_UNKNOWN;
+}
+
+static inline bool unwind_error(struct unwind_state *state)
+{
+	return state->error;
+}
+
+static inline void unwind_start(struct unwind_state *state,
+				struct task_struct *task,
+				struct pt_regs *regs,
+				unsigned long sp)
+{
+	sp = sp ? : get_stack_pointer(task, regs);
+	__unwind_start(state, task, regs, sp);
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	return unwind_done(state) ? NULL : state->regs;
+}
+
+#define unwind_for_each_frame(state, task, regs, first_frame)	\
+	for (unwind_start(state, task, regs, first_frame);	\
+	     !unwind_done(state);				\
+	     unwind_next_frame(state))
+
+static inline void unwind_init(void) {}
+static inline void unwind_module_init(struct module *mod, void *orc_ip,
+				      size_t orc_ip_size, void *orc,
+				      size_t orc_size) {}
+
+#ifdef CONFIG_KASAN
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x)			\
+({							\
+	unsigned long val;				\
+	if (task == current)				\
+		val = READ_ONCE(x);			\
+	else						\
+		val = READ_ONCE_NOCHECK(x);		\
+	val;						\
+})
+#else
+#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x)
+#endif
+
+#endif /* _ASM_S390_UNWIND_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 19425605a83dc..b0478d01a0c55 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -39,6 +39,7 @@ CFLAGS_smp.o		:= -Wno-nonnull
 #
 CFLAGS_stacktrace.o	+= -fno-optimize-sibling-calls
 CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
+CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
 
 #
 # Pass UTS_MACHINE for user_regset definition
@@ -51,7 +52,7 @@ obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o
 obj-y	+= sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
 obj-y	+= entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
-obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o
+obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
 
 extra-y				+= head64.o vmlinux.lds
 
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 164bec175628a..41ac4ad213110 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -16,6 +16,7 @@
 #include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include <asm/nmi.h>
+#include <asm/stacktrace.h>
 
 int main(void)
 {
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index cb7f55bbe06e8..9e87b68be21c7 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -21,95 +21,124 @@
 #include <asm/debug.h>
 #include <asm/dis.h>
 #include <asm/ipl.h>
+#include <asm/unwind.h>
 
-/*
- * For dump_trace we have tree different stack to consider:
- *   - the panic stack which is used if the kernel stack has overflown
- *   - the asynchronous interrupt stack (cpu related)
- *   - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stacks and can potentially
- * touch all of them. The order is: panic stack, async stack, sync stack.
- */
-static unsigned long __no_sanitize_address
-__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
-	     unsigned long low, unsigned long high)
+const char *stack_type_name(enum stack_type type)
 {
-	struct stack_frame *sf;
-	struct pt_regs *regs;
-
-	while (1) {
-		if (sp < low || sp > high - sizeof(*sf))
-			return sp;
-		sf = (struct stack_frame *) sp;
-		if (func(data, sf->gprs[8], 0))
-			return sp;
-		/* Follow the backchain. */
-		while (1) {
-			low = sp;
-			sp = sf->back_chain;
-			if (!sp)
-				break;
-			if (sp <= low || sp > high - sizeof(*sf))
-				return sp;
-			sf = (struct stack_frame *) sp;
-			if (func(data, sf->gprs[8], 1))
-				return sp;
-		}
-		/* Zero backchain detected, check for interrupt frame. */
-		sp = (unsigned long) (sf + 1);
-		if (sp <= low || sp > high - sizeof(*regs))
-			return sp;
-		regs = (struct pt_regs *) sp;
-		if (!user_mode(regs)) {
-			if (func(data, regs->psw.addr, 1))
-				return sp;
-		}
-		low = sp;
-		sp = regs->gprs[15];
+	switch (type) {
+	case STACK_TYPE_TASK:
+		return "task";
+	case STACK_TYPE_IRQ:
+		return "irq";
+	case STACK_TYPE_NODAT:
+		return "nodat";
+	case STACK_TYPE_RESTART:
+		return "restart";
+	default:
+		return "unknown";
 	}
 }
 
-void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
-		unsigned long sp)
+static inline bool in_stack(unsigned long sp, struct stack_info *info,
+			    enum stack_type type, unsigned long low,
+			    unsigned long high)
+{
+	if (sp < low || sp >= high)
+		return false;
+	info->type = type;
+	info->begin = low;
+	info->end = high;
+	return true;
+}
+
+static bool in_task_stack(unsigned long sp, struct task_struct *task,
+			  struct stack_info *info)
+{
+	unsigned long stack;
+
+	stack = (unsigned long) task_stack_page(task);
+	return in_stack(sp, info, STACK_TYPE_TASK, stack, stack + THREAD_SIZE);
+}
+
+static bool in_irq_stack(unsigned long sp, struct stack_info *info)
 {
-	unsigned long frame_size;
+	unsigned long frame_size, top;
 
 	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-#ifdef CONFIG_CHECK_STACK
-	sp = __dump_trace(func, data, sp,
-			  S390_lowcore.nodat_stack + frame_size - THREAD_SIZE,
-			  S390_lowcore.nodat_stack + frame_size);
-#endif
-	sp = __dump_trace(func, data, sp,
-			  S390_lowcore.async_stack + frame_size - THREAD_SIZE,
-			  S390_lowcore.async_stack + frame_size);
-	task = task ?: current;
-	__dump_trace(func, data, sp,
-		     (unsigned long)task_stack_page(task),
-		     (unsigned long)task_stack_page(task) + THREAD_SIZE);
+	top = S390_lowcore.async_stack + frame_size;
+	return in_stack(sp, info, STACK_TYPE_IRQ, top - THREAD_SIZE, top);
+}
+
+static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long frame_size, top;
+
+	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+	top = S390_lowcore.nodat_stack + frame_size;
+	return in_stack(sp, info, STACK_TYPE_NODAT, top - THREAD_SIZE, top);
 }
-EXPORT_SYMBOL_GPL(dump_trace);
 
-static int show_address(void *data, unsigned long address, int reliable)
+static bool in_restart_stack(unsigned long sp, struct stack_info *info)
 {
-	if (reliable)
-		printk(" [<%016lx>] %pSR \n", address, (void *)address);
-	else
-		printk("([<%016lx>] %pSR)\n", address, (void *)address);
+	unsigned long frame_size, top;
+
+	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+	top = S390_lowcore.restart_stack + frame_size;
+	return in_stack(sp, info, STACK_TYPE_RESTART, top - THREAD_SIZE, top);
+}
+
+int get_stack_info(unsigned long sp, struct task_struct *task,
+		   struct stack_info *info, unsigned long *visit_mask)
+{
+	if (!sp)
+		goto unknown;
+
+	task = task ? : current;
+
+	/* Check per-task stack */
+	if (in_task_stack(sp, task, info))
+		goto recursion_check;
+
+	if (task != current)
+		goto unknown;
+
+	/* Check per-cpu stacks */
+	if (!in_irq_stack(sp, info) &&
+	    !in_nodat_stack(sp, info) &&
+	    !in_restart_stack(sp, info))
+		goto unknown;
+
+recursion_check:
+	/*
+	 * Make sure we don't iterate through any given stack more than once.
+	 * If it comes up a second time then there's something wrong going on:
+	 * just break out and report an unknown stack type.
+	 */
+	if (*visit_mask & (1UL << info->type)) {
+		printk_deferred_once(KERN_WARNING
+			"WARNING: stack recursion on stack type %d\n",
+			info->type);
+		goto unknown;
+	}
+	*visit_mask |= 1UL << info->type;
 	return 0;
+unknown:
+	info->type = STACK_TYPE_UNKNOWN;
+	return -EINVAL;
 }
 
 void show_stack(struct task_struct *task, unsigned long *stack)
 {
-	unsigned long sp = (unsigned long) stack;
+	struct unwind_state state;
 
-	if (!sp)
-		sp = task ? task->thread.ksp : current_stack_pointer();
 	printk("Call Trace:\n");
-	dump_trace(show_address, NULL, task, sp);
 	if (!task)
 		task = current;
-	debug_show_held_locks(task);
+	unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
+		printk(state.reliable ? " [<%016lx>] %pSR \n" :
+					"([<%016lx>] %pSR)\n",
+		       state.ip, (void *) state.ip);
+	debug_show_held_locks(task ? : current);
 }
 
 static void show_last_breaking_event(struct pt_regs *regs)
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 150964f911833..8371855042dc2 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -26,6 +26,7 @@
 #include <asm/lowcore.h>
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
+#include <asm/stacktrace.h>
 #include "entry.h"
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index e2ba7b7f574ef..2f3a742a71a5d 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
 #include <asm/cacheflush.h>
 #include <asm/os_info.h>
 #include <asm/set_memory.h>
+#include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 #include <asm/nmi.h>
 
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 0d770e513abf4..fcb6c2e92b071 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -21,6 +21,7 @@
 #include <asm/lowcore.h>
 #include <asm/processor.h>
 #include <asm/sysinfo.h>
+#include <asm/unwind.h>
 
 const char *perf_pmu_name(void)
 {
@@ -219,20 +220,13 @@ static int __init service_level_perf_register(void)
 }
 arch_initcall(service_level_perf_register);
 
-static int __perf_callchain_kernel(void *data, unsigned long address, int reliable)
-{
-	struct perf_callchain_entry_ctx *entry = data;
-
-	perf_callchain_store(entry, address);
-	return 0;
-}
-
 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 			   struct pt_regs *regs)
 {
-	if (user_mode(regs))
-		return;
-	dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]);
+	struct unwind_state state;
+
+	unwind_for_each_frame(&state, current, regs, 0)
+		perf_callchain_store(entry, state.ip);
 }
 
 /* Perf definitions for PMU event attributes in sysfs */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 6e758bb6cd29b..63873aa6693fe 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -37,6 +37,7 @@
 #include <asm/irq.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
+#include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 #include <asm/runtime_instr.h>
 #include "entry.h"
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 64e4bc9dd1301..f8544d5174307 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -66,6 +66,7 @@
 #include <asm/diag.h>
 #include <asm/os_info.h>
 #include <asm/sclp.h>
+#include <asm/stacktrace.h>
 #include <asm/sysinfo.h>
 #include <asm/numa.h>
 #include <asm/alternative.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 88634fb0cc50b..35fafa2b91a80 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -53,6 +53,7 @@
 #include <asm/sigp.h>
 #include <asm/idle.h>
 #include <asm/nmi.h>
+#include <asm/stacktrace.h>
 #include <asm/topology.h>
 #include "entry.h"
 
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 460dcfba7d4ec..89f9f63dca188 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -11,40 +11,21 @@
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
 #include <linux/export.h>
-
-static int __save_address(void *data, unsigned long address, int nosched)
-{
-	struct stack_trace *trace = data;
-
-	if (nosched && in_sched_functions(address))
-		return 0;
-	if (trace->skip > 0) {
-		trace->skip--;
-		return 0;
-	}
-	if (trace->nr_entries < trace->max_entries) {
-		trace->entries[trace->nr_entries++] = address;
-		return 0;
-	}
-	return 1;
-}
-
-static int save_address(void *data, unsigned long address, int reliable)
-{
-	return __save_address(data, address, 0);
-}
-
-static int save_address_nosched(void *data, unsigned long address, int reliable)
-{
-	return __save_address(data, address, 1);
-}
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
 
 void save_stack_trace(struct stack_trace *trace)
 {
-	unsigned long sp;
+	struct unwind_state state;
 
-	sp = current_stack_pointer();
-	dump_trace(save_address, trace, NULL, sp);
+	unwind_for_each_frame(&state, current, NULL, 0) {
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+		if (trace->skip > 0)
+			trace->skip--;
+		else
+			trace->entries[trace->nr_entries++] = state.ip;
+	}
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
@@ -52,12 +33,18 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-	unsigned long sp;
+	struct unwind_state state;
 
-	sp = tsk->thread.ksp;
-	if (tsk == current)
-		sp = current_stack_pointer();
-	dump_trace(save_address_nosched, trace, tsk, sp);
+	unwind_for_each_frame(&state, tsk, NULL, 0) {
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+		if (in_sched_functions(state.ip))
+			continue;
+		if (trace->skip > 0)
+			trace->skip--;
+		else
+			trace->entries[trace->nr_entries++] = state.ip;
+	}
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
@@ -65,10 +52,16 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
 void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 {
-	unsigned long sp;
+	struct unwind_state state;
 
-	sp = kernel_stack_pointer(regs);
-	dump_trace(save_address, trace, NULL, sp);
+	unwind_for_each_frame(&state, current, regs, 0) {
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+		if (trace->skip > 0)
+			trace->skip--;
+		else
+			trace->entries[trace->nr_entries++] = state.ip;
+	}
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
new file mode 100644
index 0000000000000..cf5a630f3aa98
--- /dev/null
+++ b/arch/s390/kernel/unwind_bc.c
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/interrupt.h>
+#include <asm/sections.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return 0;
+	return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+static bool outside_of_stack(struct unwind_state *state, unsigned long sp)
+{
+	return (sp <= state->sp) ||
+		(sp + sizeof(struct stack_frame) > state->stack_info.end);
+}
+
+static bool update_stack_info(struct unwind_state *state, unsigned long sp)
+{
+	struct stack_info *info = &state->stack_info;
+	unsigned long *mask = &state->stack_mask;
+
+	/* New stack pointer leaves the current stack */
+	if (get_stack_info(sp, state->task, info, mask) != 0 ||
+	    !on_stack(info, sp, sizeof(struct stack_frame)))
+		/* 'sp' does not point to a valid stack */
+		return false;
+	return true;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+	struct stack_info *info = &state->stack_info;
+	struct stack_frame *sf;
+	struct pt_regs *regs;
+	unsigned long sp, ip;
+	bool reliable;
+
+	regs = state->regs;
+	if (unlikely(regs)) {
+		sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]);
+		if (unlikely(outside_of_stack(state, sp))) {
+			if (!update_stack_info(state, sp))
+				goto out_err;
+		}
+		sf = (struct stack_frame *) sp;
+		ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+		reliable = false;
+		regs = NULL;
+	} else {
+		sf = (struct stack_frame *) state->sp;
+		sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain);
+		if (likely(sp)) {
+			/* Non-zero back-chain points to the previous frame */
+			if (unlikely(outside_of_stack(state, sp))) {
+				if (!update_stack_info(state, sp))
+					goto out_err;
+			}
+			sf = (struct stack_frame *) sp;
+			ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+			reliable = true;
+		} else {
+			/* No back-chain, look for a pt_regs structure */
+			sp = state->sp + STACK_FRAME_OVERHEAD;
+			if (!on_stack(info, sp, sizeof(struct pt_regs)))
+				goto out_stop;
+			regs = (struct pt_regs *) sp;
+			if (user_mode(regs))
+				goto out_stop;
+			ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+			reliable = true;
+		}
+	}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* Decode any ftrace redirection */
+	if (ip == (unsigned long) return_to_handler)
+		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+					   ip, NULL);
+#endif
+
+	/* Update unwind state */
+	state->sp = sp;
+	state->ip = ip;
+	state->regs = regs;
+	state->reliable = reliable;
+	return true;
+
+out_err:
+	state->error = true;
+out_stop:
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long sp)
+{
+	struct stack_info *info = &state->stack_info;
+	unsigned long *mask = &state->stack_mask;
+	struct stack_frame *sf;
+	unsigned long ip;
+	bool reliable;
+
+	memset(state, 0, sizeof(*state));
+	state->task = task;
+	state->regs = regs;
+
+	/* Don't even attempt to start from user mode regs: */
+	if (regs && user_mode(regs)) {
+		info->type = STACK_TYPE_UNKNOWN;
+		return;
+	}
+
+	/* Get current stack pointer and initialize stack info */
+	if (get_stack_info(sp, task, info, mask) != 0 ||
+	    !on_stack(info, sp, sizeof(struct stack_frame))) {
+		/* Something is wrong with the stack pointer */
+		info->type = STACK_TYPE_UNKNOWN;
+		state->error = true;
+		return;
+	}
+
+	/* Get the instruction pointer from pt_regs or the stack frame */
+	if (regs) {
+		ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+		reliable = true;
+	} else {
+		sf = (struct stack_frame *) sp;
+		ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+		reliable = false;
+	}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* Decode any ftrace redirection */
+	if (ip == (unsigned long) return_to_handler)
+		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+					   ip, NULL);
+#endif
+
+	/* Update unwind state */
+	state->sp = sp;
+	state->ip = ip;
+	state->reliable = reliable;
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 97b3ee53852b3..818deeb1ebc3d 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -16,6 +16,7 @@
 #include <linux/cpu.h>
 #include <asm/ctl_reg.h>
 #include <asm/io.h>
+#include <asm/stacktrace.h>
 
 static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
 {
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 43d9525c36fc3..7441857df51ba 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -13,23 +13,17 @@
 #include <linux/oprofile.h>
 #include <linux/init.h>
 #include <asm/processor.h>
-
-static int __s390_backtrace(void *data, unsigned long address, int reliable)
-{
-	unsigned int *depth = data;
-
-	if (*depth == 0)
-		return 1;
-	(*depth)--;
-	oprofile_add_trace(address);
-	return 0;
-}
+#include <asm/unwind.h>
 
 static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
 {
-	if (user_mode(regs))
-		return;
-	dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
+	struct unwind_state state;
+
+	unwind_for_each_frame(&state, current, regs, 0) {
+		if (depth-- == 0)
+			break;
+		oprofile_add_trace(state.ip);
+	}
 }
 
 int __init oprofile_arch_init(struct oprofile_operations *ops)
-- 
GitLab


From ec7bf4789d95a0053bac0dfa36fbefd8cc584eea Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 18 Feb 2019 16:51:28 +0100
Subject: [PATCH 1823/1861] s390/ftrace: use HAVE_FUNCTION_GRAPH_RET_ADDR_PTR

Make the call chain more reliable by tagging the ftrace stack entries
with the stack pointer that is associated with the return address.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ftrace.h | 2 ++
 arch/s390/kernel/entry.h       | 2 +-
 arch/s390/kernel/ftrace.c      | 9 +++++----
 arch/s390/kernel/mcount.S      | 4 ++--
 arch/s390/kernel/unwind_bc.c   | 2 +-
 5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 6e0ed03387854..68d362f8d6c17 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -11,6 +11,8 @@
 #define MCOUNT_RETURN_FIXUP	18
 #endif
 
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_CC_IS_CLANG
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index c3816ae108b08..20420c2b8a146 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -65,7 +65,7 @@ int setup_profiling_timer(unsigned int multiplier);
 void __init time_init(void);
 int pfn_is_nosave(unsigned long);
 void s390_early_resume(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
 
 struct s390_mmap_arg_struct;
 struct fadvise64_64_args;
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 39b13d71a8fe6..1bb85f60c0dd5 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -201,17 +201,18 @@ device_initcall(ftrace_plt_init);
  * Hook the return address and push it in the stack of return addresses
  * in current thread info.
  */
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
+				    unsigned long ip)
 {
 	if (unlikely(ftrace_graph_is_dead()))
 		goto out;
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
 	ip -= MCOUNT_INSN_SIZE;
-	if (!function_graph_enter(parent, ip, 0, NULL))
-		parent = (unsigned long) return_to_handler;
+	if (!function_graph_enter(ra, ip, 0, (void *) sp))
+		ra = (unsigned long) return_to_handler;
 out:
-	return parent;
+	return ra;
 }
 NOKPROBE_SYMBOL(prepare_ftrace_return);
 
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 09ae6da0aaa5f..9e1660a6b9db6 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -65,8 +65,8 @@ ENTRY(ftrace_caller)
 	.globl ftrace_graph_caller
 ftrace_graph_caller:
 	j	ftrace_graph_caller_end
-	lg	%r2,(STACK_PTREGS_GPRS+14*8)(%r15)
-	lg	%r3,(STACK_PTREGS_PSW+8)(%r15)
+	lmg	%r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15)
+	lg	%r4,(STACK_PTREGS_PSW+8)(%r15)
 	brasl	%r14,prepare_ftrace_return
 	stg	%r2,(STACK_PTREGS_GPRS+14*8)(%r15)
 ftrace_graph_caller_end:
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index cf5a630f3aa98..57fd4e902f1f4 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -84,7 +84,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	/* Decode any ftrace redirection */
 	if (ip == (unsigned long) return_to_handler)
 		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-					   ip, NULL);
+					   ip, (void *) sp);
 #endif
 
 	/* Update unwind state */
-- 
GitLab


From 98587c2d894c34c9af5cd84ca169e1cd493aa692 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 30 Apr 2019 12:33:45 +0200
Subject: [PATCH 1824/1861] s390: simplify disabled_wait

The disabled_wait() function uses its argument as the PSW address when
it stops the CPU with a wait PSW that is disabled for interrupts.
The different callers sometimes use a specific number like 0xdeadbeef
to indicate a specific failure, the early boot code uses 0 and some
other calls sites use __builtin_return_address(0).

At the time a dump is created the current PSW and the registers of a
CPU are written to lowcore to make them avaiable to the dump analysis
tool. For a CPU stopped with disabled_wait the PSW and the registers
do not really make sense together, the PSW address does not point to
the function the registers belong to.

Simplify disabled_wait() by using _THIS_IP_ for the PSW address and
drop the argument to the function.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/als.c              | 2 +-
 arch/s390/boot/startup.c          | 2 +-
 arch/s390/include/asm/processor.h | 4 ++--
 arch/s390/kernel/early.c          | 4 ++--
 arch/s390/kernel/early_nobss.c    | 2 +-
 arch/s390/kernel/ipl.c            | 4 ++--
 arch/s390/kernel/machine_kexec.c  | 4 ++--
 arch/s390/kernel/nmi.c            | 2 +-
 kernel/panic.c                    | 7 +------
 9 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index f902215e9cd9b..ff6801d401c44 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -99,7 +99,7 @@ static void facility_mismatch(void)
 	print_machine_type();
 	print_missing_facilities();
 	sclp_early_printk("See Principles of Operations for facility bits\n");
-	disabled_wait(0x8badcccc);
+	disabled_wait();
 }
 
 void verify_facilities(void)
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 4401e992bda1e..7b0d05414618b 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -57,7 +57,7 @@ void error(char *x)
 	sclp_early_printk(x);
 	sclp_early_printk("\n\n -- System halted");
 
-	disabled_wait(0xdeadbeef);
+	disabled_wait();
 }
 
 #ifdef CONFIG_KERNEL_UNCOMPRESSED
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 9f2ff4a54aff1..b0fcbc37b637d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -315,12 +315,12 @@ void enabled_wait(void);
 /*
  * Function to drop a processor into disabled wait state
  */
-static inline void __noreturn disabled_wait(unsigned long code)
+static inline void __noreturn disabled_wait(void)
 {
 	psw_t psw;
 
 	psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
-	psw.addr = code;
+	psw.addr = _THIS_IP_;
 	__load_psw(psw);
 	while (1);
 }
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 33f704c16bd33..629f173f60cd9 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -141,7 +141,7 @@ static void early_pgm_check_handler(void)
 	addr = S390_lowcore.program_old_psw.addr;
 	fixup = s390_search_extables(addr);
 	if (!fixup)
-		disabled_wait(0);
+		disabled_wait();
 	/* Disable low address protection before storing into lowcore. */
 	__ctl_store(cr0, 0, 0);
 	cr0_new = cr0 & ~(1UL << 28);
@@ -298,7 +298,7 @@ static void __init check_image_bootable(void)
 	sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
 	sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n");
 	sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n");
-	disabled_wait(0xbadb007);
+	disabled_wait();
 }
 
 void __init startup_init(void)
diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c
index 8d73f7fae16e0..52a3ef959341a 100644
--- a/arch/s390/kernel/early_nobss.c
+++ b/arch/s390/kernel/early_nobss.c
@@ -25,7 +25,7 @@ static void __init reset_tod_clock(void)
 		return;
 	/* TOD clock not running. Set the clock to Unix Epoch. */
 	if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
-		disabled_wait(0);
+		disabled_wait();
 
 	memset(tod_clock_base, 0, 16);
 	*(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index aa8fe768640e4..d836af3ccc382 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -920,7 +920,7 @@ static void __reipl_run(void *unused)
 	case IPL_TYPE_FCP_DUMP:
 		break;
 	}
-	disabled_wait((unsigned long) __builtin_return_address(0));
+	disabled_wait();
 }
 
 static void reipl_run(struct shutdown_trigger *trigger)
@@ -1375,7 +1375,7 @@ static void stop_run(struct shutdown_trigger *trigger)
 {
 	if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
 	    strcmp(trigger->name, ON_RESTART_STR) == 0)
-		disabled_wait((unsigned long) __builtin_return_address(0));
+		disabled_wait();
 	smp_stop_cpu();
 }
 
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 2f3a742a71a5d..8a1ae140c5e2c 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -96,7 +96,7 @@ static void __do_machine_kdump(void *image)
 	start_kdump(1);
 
 	/* Die if start_kdump returns */
-	disabled_wait((unsigned long) __builtin_return_address(0));
+	disabled_wait();
 }
 
 /*
@@ -284,7 +284,7 @@ static void __do_machine_kexec(void *data)
 	(*data_mover)(&image->head, image->start);
 
 	/* Die if kexec returns */
-	disabled_wait((unsigned long) __builtin_return_address(0));
+	disabled_wait();
 }
 
 /*
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 8c867b43c8ebc..0a487fae763ee 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -125,7 +125,7 @@ void nmi_free_per_cpu(struct lowcore *lc)
 static notrace void s390_handle_damage(void)
 {
 	smp_emergency_stop();
-	disabled_wait((unsigned long) __builtin_return_address(0));
+	disabled_wait();
 	while (1);
 }
 NOKPROBE_SYMBOL(s390_handle_damage);
diff --git a/kernel/panic.c b/kernel/panic.c
index 0ae0d7332f12a..c1fcaad337b74 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -318,12 +318,7 @@ void panic(const char *fmt, ...)
 	}
 #endif
 #if defined(CONFIG_S390)
-	{
-		unsigned long caller;
-
-		caller = (unsigned long)__builtin_return_address(0);
-		disabled_wait(caller);
-	}
+	disabled_wait();
 #endif
 	pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf);
 	local_irq_enable();
-- 
GitLab


From 2078e1e7f7e0e21bd0291908f3037c39e666d27b Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 May 2019 08:29:42 -0600
Subject: [PATCH 1825/1861] PCI/LINK: Add Kconfig option (default off)

e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth
notification") added dmesg logging whenever a link changes speed or width
to a state that is considered degraded.  Unfortunately, it cannot
differentiate signal integrity-related link changes from those
intentionally initiated by an endpoint driver, including drivers that may
live in userspace or VMs when making use of vfio-pci.  Some GPU drivers
actively manage the link state to save power, which generates a stream of
messages like this:

  vfio-pci 0000:07:00.0: 32.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s x16 link at 0000:00:02.0 (capable of 64.000 Gb/s with 5 GT/s x16 link)

Since we can't distinguish the intentional changes from the signal
integrity issues, leave the reporting turned off by default.  Add a Kconfig
option to turn it on if desired.

Fixes: e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification")
Link: https://lore.kernel.org/linux-pci/20190501142942.26972-1-keith.busch@intel.com
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/Kconfig   | 8 ++++++++
 drivers/pci/pcie/Makefile  | 2 +-
 drivers/pci/pcie/portdrv.h | 4 ++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 5cbdbca904ac8..362eb8cfa53ba 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -142,3 +142,11 @@ config PCIE_PTM
 
 	  This is only useful if you have devices that support PTM, but it
 	  is safe to enable even if you don't.
+
+config PCIE_BW
+	bool "PCI Express Bandwidth Change Notification"
+	depends on PCIEPORTBUS
+	help
+	  This enables PCI Express Bandwidth Change Notification.  If
+	  you know link width or rate changes occur only to correct
+	  unreliable links, you may answer Y.
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index f1d7bc1e5efae..efb9d2e71e9ee 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -3,7 +3,6 @@
 # Makefile for PCI Express features and port driver
 
 pcieportdrv-y			:= portdrv_core.o portdrv_pci.o err.o
-pcieportdrv-y			+= bw_notification.o
 
 obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
 
@@ -13,3 +12,4 @@ obj-$(CONFIG_PCIEAER_INJECT)	+= aer_inject.o
 obj-$(CONFIG_PCIE_PME)		+= pme.o
 obj-$(CONFIG_PCIE_DPC)		+= dpc.o
 obj-$(CONFIG_PCIE_PTM)		+= ptm.o
+obj-$(CONFIG_PCIE_BW)		+= bw_notification.o
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 1d50dc58ac400..944827a8c7d36 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -49,7 +49,11 @@ int pcie_dpc_init(void);
 static inline int pcie_dpc_init(void) { return 0; }
 #endif
 
+#ifdef CONFIG_PCIE_BW
 int pcie_bandwidth_notification_init(void);
+#else
+static inline int pcie_bandwidth_notification_init(void) { return 0; }
+#endif
 
 /* Port Type */
 #define PCIE_ANY_PORT			(~0)
-- 
GitLab


From 6bac9bc273cdab6157ad7a2ead09400aabfc445b Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 24 Apr 2019 18:16:32 +0200
Subject: [PATCH 1826/1861] i2c: designware: ratelimit 'transfer when
 suspended' errors

There are two problems with dev_err() here. One: It is not ratelimited.
Two: We don't see which driver tried to transfer something with a
suspended adapter. Switch to dev_WARN_ONCE to fix both issues. Drawback
is that we don't see if multiple drivers are trying to transfer while
suspended. They need to be discovered one after the other now. This is
better than a high CPU load because a really broken driver might try to
resend endlessly.

Link: https://bugs.archlinux.org/task/62391
Fixes: 275154155538 ("i2c: designware: Do not allow i2c_dw_xfer() calls while suspended")
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reported-by: skidnik <skidnik@gmail.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: skidnik <skidnik@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-master.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index bb8e3f1499796..d464799e40a30 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -426,8 +426,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 
 	pm_runtime_get_sync(dev->dev);
 
-	if (dev->suspended) {
-		dev_err(dev->dev, "Error %s call while suspended\n", __func__);
+	if (dev_WARN_ONCE(dev->dev, dev->suspended, "Transfer while suspended\n")) {
 		ret = -ESHUTDOWN;
 		goto done_nolock;
 	}
-- 
GitLab


From 0e3b74e26280f2cf8753717a950b97d424da6046 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Thu, 2 May 2019 15:29:47 +0000
Subject: [PATCH 1827/1861] perf/x86/amd: Update generic hardware cache events
 for Family 17h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new amd_hw_cache_event_ids_f17h assignment structure set
for AMD families 17h and above, since a lot has changed.  Specifically:

L1 Data Cache

The data cache access counter remains the same on Family 17h.

For DC misses, PMCx041's definition changes with Family 17h,
so instead we use the L2 cache accesses from L1 data cache
misses counter (PMCx060,umask=0xc8).

For DC hardware prefetch events, Family 17h breaks compatibility
for PMCx067 "Data Prefetcher", so instead, we use PMCx05a "Hardware
Prefetch DC Fills."

L1 Instruction Cache

PMCs 0x80 and 0x81 (32-byte IC fetches and misses) are backward
compatible on Family 17h.

For prefetches, we remove the erroneous PMCx04B assignment which
counts how many software data cache prefetch load instructions were
dispatched.

LL - Last Level Cache

Removing PMCs 7D, 7E, and 7F assignments, as they do not exist
on Family 17h, where the last level cache is L3.  L3 counters
can be accessed using the existing AMD Uncore driver.

Data TLB

On Intel machines, data TLB accesses ("dTLB-loads") are assigned
to counters that count load/store instructions retired.  This
is inconsistent with instruction TLB accesses, where Intel
implementations report iTLB misses that hit in the STLB.

Ideally, dTLB-loads would count higher level dTLB misses that hit
in lower level TLBs, and dTLB-load-misses would report those
that also missed in those lower-level TLBs, therefore causing
a page table walk.  That would be consistent with instruction
TLB operation, remove the redundancy between dTLB-loads and
L1-dcache-loads, and prevent perf from producing artificially
low percentage ratios, i.e. the "0.01%" below:

        42,550,869      L1-dcache-loads
        41,591,860      dTLB-loads
             4,802      dTLB-load-misses          #    0.01% of all dTLB cache hits
         7,283,682      L1-dcache-stores
         7,912,392      dTLB-stores
               310      dTLB-store-misses

On AMD Families prior to 17h, the "Data Cache Accesses" counter is
used, which is slightly better than load/store instructions retired,
but still counts in terms of individual load/store operations
instead of TLB operations.

So, for AMD Families 17h and higher, this patch assigns "dTLB-loads"
to a counter for L1 dTLB misses that hit in the L2 dTLB, and
"dTLB-load-misses" to a counter for L1 DTLB misses that caused
L2 DTLB misses and therefore also caused page table walks.  This
results in a much more accurate view of data TLB performance:

        60,961,781      L1-dcache-loads
             4,601      dTLB-loads
               963      dTLB-load-misses          #   20.93% of all dTLB cache hits

Note that for all AMD families, data loads and stores are combined
in a single accesses counter, so no 'L1-dcache-stores' are reported
separately, and stores are counted with loads in 'L1-dcache-loads'.

Also note that the "% of all dTLB cache hits" string is misleading
because (a) "dTLB cache": although TLBs can be considered caches for
page tables, in this context, it can be misinterpreted as data cache
hits because the figures are similar (at least on Intel), and (b) not
all those loads (technically accesses) technically "hit" at that
hardware level.  "% of all dTLB accesses" would be more clear/accurate.

Instruction TLB

On Intel machines, 'iTLB-loads' measure iTLB misses that hit in the
STLB, and 'iTLB-load-misses' measure iTLB misses that also missed in
the STLB and completed a page table walk.

For AMD Family 17h and above, for 'iTLB-loads' we replace the
erroneous instruction cache fetches counter with PMCx084
"L1 ITLB Miss, L2 ITLB Hit".

For 'iTLB-load-misses' we still use PMCx085 "L1 ITLB Miss,
L2 ITLB Miss", but set a 0xff umask because without it the event
does not get counted.

Branch Predictor (BPU)

PMCs 0xc2 and 0xc3 continue to be valid across all AMD Families.

Node Level Events

Family 17h does not have a PMCx0e9 counter, and corresponding counters
have not been made available publicly, so for now, we mark them as
unsupported for Families 17h and above.

Reference:

  "Open-Source Register Reference For AMD Family 17h Processors Models 00h-2Fh"
  Released 7/17/2018, Publication #56255, Revision 3.03:
  https://www.amd.com/system/files/TechDocs/56255_OSRR.pdf

[ mingo: tidied up the line breaks. ]
Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Cc: <stable@vger.kernel.org> # v4.9+
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Liška <mliska@suse.cz>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Stephane Eranian <eranian@google.com>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Lendacky <Thomas.Lendacky@amd.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Cc: linux-perf-users@vger.kernel.org
Fixes: e40ed1542dd7 ("perf/x86: Add perf support for AMD family-17h processors")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/amd/core.c | 111 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 108 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index d45f3fbd232ea..f15441b07dad8 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -116,6 +116,110 @@ static __initconst const u64 amd_hw_cache_event_ids
  },
 };
 
+static __initconst const u64 amd_hw_cache_event_ids_f17h
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
+		[C(RESULT_MISS)]   = 0xc860, /* L2$ access from DC Miss */
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = 0,
+		[C(RESULT_MISS)]   = 0,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
+		[C(RESULT_MISS)]   = 0,
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches  */
+		[C(RESULT_MISS)]   = 0x0081, /* Instruction cache misses   */
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)]   = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0,
+		[C(RESULT_MISS)]   = 0,
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0,
+		[C(RESULT_MISS)]   = 0,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = 0,
+		[C(RESULT_MISS)]   = 0,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0,
+		[C(RESULT_MISS)]   = 0,
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
+		[C(RESULT_MISS)]   = 0xf045, /* L2 DTLB misses (PT walks) */
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = 0,
+		[C(RESULT_MISS)]   = 0,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = 0,
+		[C(RESULT_MISS)]   = 0,
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
+		[C(RESULT_MISS)]   = 0xff85, /* L1 ITLB misses, L2 misses */
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)]   = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)]   = -1,
+	},
+},
+[C(BPU)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr.      */
+		[C(RESULT_MISS)]   = 0x00c3, /* Retired Mispredicted BI    */
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)]   = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)]   = -1,
+	},
+},
+[C(NODE)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = 0,
+		[C(RESULT_MISS)]   = 0,
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)]   = -1,
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = -1,
+		[C(RESULT_MISS)]   = -1,
+	},
+},
+};
+
 /*
  * AMD Performance Monitor K7 and later, up to and including Family 16h:
  */
@@ -865,9 +969,10 @@ __init int amd_pmu_init(void)
 		x86_pmu.amd_nb_constraints = 0;
 	}
 
-	/* Events are common for all AMDs */
-	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-	       sizeof(hw_cache_event_ids));
+	if (boot_cpu_data.x86 >= 0x17)
+		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
+	else
+		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 
 	return 0;
 }
-- 
GitLab


From 1804569d87de903b4d746ba71512c3ed0a890d65 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Thu, 2 May 2019 15:58:37 +0000
Subject: [PATCH 1828/1861] MAINTAINERS: Include vendor specific files under
 arch/*/events/*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an explicit subdirectory specification for arch/x86/events/amd to
the MAINTAINERS file, to distinguish it from its parent. This will
produce the correct set of maintainers for the files found therein.

Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Gary Hook <Gary.Hook@amd.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Liška <mliska@suse.cz>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Pu Wen <puwen@hygon.cn>
Cc: Stephane Eranian <eranian@google.com>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Lendacky <Thomas.Lendacky@amd.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Fixes: 39b0332a2158 ("perf/x86: Move perf_event_amd.c ........... => x86/events/amd/core.c")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5c38f21aee787..3a15b6d7584e6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12176,6 +12176,7 @@ F:	arch/*/kernel/*/*/perf_event*.c
 F:	arch/*/include/asm/perf_event.h
 F:	arch/*/kernel/perf_callchain.c
 F:	arch/*/events/*
+F:	arch/*/events/*/*
 F:	tools/perf/
 
 PERSONALITY HANDLING
-- 
GitLab


From fb31fbef9c35fbc4fc15cbf8bfafa96e5638d8d5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 25 Apr 2019 16:35:55 +0200
Subject: [PATCH 1829/1861] MAINTAINERS: friendly takeover of i2c-gpio driver

I haven't heard from Haavard in years despite putting him to the CC list for
i2c-gpio related mails. Since I was doing the work on this driver for a while
now, let me take official maintainership, so it will be more clear to users.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Haavard Skinnemoen <hskinnemoen@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 09f43f1bdd15e..50c643393af61 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6461,7 +6461,7 @@ S:	Maintained
 F:	drivers/media/radio/radio-gemtek*
 
 GENERIC GPIO I2C DRIVER
-M:	Haavard Skinnemoen <hskinnemoen@gmail.com>
+M:	Wolfram Sang <wsa+renesas@sang-engineering.com>
 S:	Supported
 F:	drivers/i2c/busses/i2c-gpio.c
 F:	include/linux/platform_data/i2c-gpio.h
-- 
GitLab


From 95e0cf3caeb11e1b0398c747b5cfa12828263824 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 30 Apr 2019 11:47:34 +0200
Subject: [PATCH 1830/1861] i2c: synquacer: fix enumeration of slave devices

The I2C host driver for SynQuacer fails to populate the of_node and
ACPI companion fields of the struct i2c_adapter it instantiates,
resulting in enumeration of the subordinate I2C bus to fail.

Fixes: 0d676a6c4390 ("i2c: add support for Socionext SynQuacer I2C controller")
Cc: <stable@vger.kernel.org> # v4.19+
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-synquacer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c
index d18b0941b71a4..f14d4b3fab446 100644
--- a/drivers/i2c/busses/i2c-synquacer.c
+++ b/drivers/i2c/busses/i2c-synquacer.c
@@ -597,6 +597,8 @@ static int synquacer_i2c_probe(struct platform_device *pdev)
 	i2c->adapter = synquacer_i2c_ops;
 	i2c_set_adapdata(&i2c->adapter, i2c);
 	i2c->adapter.dev.parent = &pdev->dev;
+	i2c->adapter.dev.of_node = pdev->dev.of_node;
+	ACPI_COMPANION_SET(&i2c->adapter.dev, ACPI_COMPANION(&pdev->dev));
 	i2c->adapter.nr = pdev->id;
 	init_completion(&i2c->completion);
 
-- 
GitLab


From 72bfcee11cf89509795c56b0e40a3785ab00bbdd Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Tue, 30 Apr 2019 17:23:22 +0300
Subject: [PATCH 1831/1861] i2c: Prevent runtime suspend of adapter when Host
 Notify is required

Multiple users have reported their Synaptics touchpad has stopped
working between v4.20.1 and v4.20.2 when using SMBus interface.

The culprit for this appeared to be commit c5eb1190074c ("PCI / PM: Allow
runtime PM without callback functions") that fixed the runtime PM for
i2c-i801 SMBus adapter. Those Synaptics touchpad are using i2c-i801
for SMBus communication and testing showed they are able to get back
working by preventing the runtime suspend of adapter.

Normally when i2c-i801 SMBus adapter transmits with the client it resumes
before operation and autosuspends after.

However, if client requires SMBus Host Notify protocol, what those
Synaptics touchpads do, then the host adapter must not go to runtime
suspend since then it cannot process incoming SMBus Host Notify commands
the client may send.

Fix this by keeping I2C/SMBus adapter active in case client requires
Host Notify.

Reported-by: Keijo Vaara <ferdasyn@rocketmail.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=203297
Fixes: c5eb1190074c ("PCI / PM: Allow runtime PM without callback functions")
Cc: stable@vger.kernel.org # v4.20+
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Keijo Vaara <ferdasyn@rocketmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 38af18645133c..8149c9e32b697 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -327,6 +327,8 @@ static int i2c_device_probe(struct device *dev)
 
 		if (client->flags & I2C_CLIENT_HOST_NOTIFY) {
 			dev_dbg(dev, "Using Host Notify IRQ\n");
+			/* Keep adapter active when Host Notify is required */
+			pm_runtime_get_sync(&client->adapter->dev);
 			irq = i2c_smbus_host_notify_to_irq(client);
 		} else if (dev->of_node) {
 			irq = of_irq_get_byname(dev->of_node, "irq");
@@ -431,6 +433,8 @@ static int i2c_device_remove(struct device *dev)
 	device_init_wakeup(&client->dev, false);
 
 	client->irq = client->init_irq;
+	if (client->flags & I2C_CLIENT_HOST_NOTIFY)
+		pm_runtime_put(&client->adapter->dev);
 
 	return status;
 }
-- 
GitLab


From 2e712675ffd1331bb527dfc851b0e98cd684c2f1 Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Mon, 22 Apr 2019 04:01:38 -0400
Subject: [PATCH 1832/1861] perf bpf: Return value with unlocking in
 perf_env__find_btf()

In perf_env__find_btf(), we're returning without unlocking
"env->bpf_progs.lock". There may be cause lockdep issue.

Detected by CoversityScan, CID# 1444762:(program hangs(LOCK))

Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: bpf@vger.kernel.org
Cc: netdev@vger.kernel.org
Fixes: 2db7b1e0bd49d: (perf bpf: Return NULL when RB tree lookup fails in perf_env__find_btf())
Link: http://lkml.kernel.org/r/20190422080138.10088-1-tsu.yubo@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/env.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 9494f9dc61eca..6a3eaf7d9353c 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -115,8 +115,8 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
 	}
 	node = NULL;
 
-	up_read(&env->bpf_progs.lock);
 out:
+	up_read(&env->bpf_progs.lock);
 	return node;
 }
 
-- 
GitLab


From 24e45b49eef07814e0507507161cd06f15b8ee1b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 22 Apr 2019 11:54:50 -0300
Subject: [PATCH 1833/1861] tools uapi x86: Sync vmx.h with the kernel

To pick up the changes from:

  2b27924bb1d4 ("KVM: nVMX: always use early vmcs check when EPT is disabled")

That causes this object in the tools/perf build process to be rebuilt:

  CC       /tmp/build/perf/arch/x86/util/kvm-stat.o

But it isn't using VMX_ABORT_ prefixed constants, so no change in
behaviour.

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h'
  diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lkml.kernel.org/n/tip-bjbo3zc0r8i8oa0udpvftya6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/vmx.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd3982..d213ec5c3766d 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -146,6 +146,7 @@
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL       2
+#define VMX_ABORT_VMCS_CORRUPTED             3
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
 
 #endif /* _UAPIVMX_H */
-- 
GitLab


From 01e985e900d3e602e9b1a55372a8e5274012a417 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Wed, 3 Apr 2019 16:44:52 -0300
Subject: [PATCH 1834/1861] perf annotate: Fix build on 32 bit for BPF
 annotation

Commit 6987561c9e86 ("perf annotate: Enable annotation of BPF programs") adds
support for BPF programs annotations but the new code does not build on 32-bit.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Acked-by: Song Liu <songliubraving@fb.com>
Fixes: 6987561c9e86 ("perf annotate: Enable annotation of BPF programs")
Link: http://lkml.kernel.org/r/20190403194452.10845-1-cascardo@canonical.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index c8b01176c9e16..09762985c7137 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1714,8 +1714,8 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 	if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
 		return -1;
 
-	pr_debug("%s: handling sym %s addr %lx len %lx\n", __func__,
-		 sym->name, sym->start, sym->end - sym->start);
+	pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
+		  sym->name, sym->start, sym->end - sym->start);
 
 	memset(tpath, 0, sizeof(tpath));
 	perf_exe(tpath, sizeof(tpath));
@@ -1740,7 +1740,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 	info_linear = info_node->info_linear;
 	sub_id = dso->bpf_prog.sub_id;
 
-	info.buffer = (void *)(info_linear->info.jited_prog_insns);
+	info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
 	info.buffer_length = info_linear->info.jited_prog_len;
 
 	if (info_linear->info.nr_line_info)
@@ -1776,7 +1776,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 		const char *srcline;
 		u64 addr;
 
-		addr = pc + ((u64 *)(info_linear->info.jited_ksyms))[sub_id];
+		addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
 		count = disassemble(pc, &info);
 
 		if (prog_linfo)
-- 
GitLab


From 5f05182fab9a29fea6c4ab8113be45adf0c11bf0 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 24 Apr 2019 09:38:02 +0800
Subject: [PATCH 1835/1861] tools lib traceevent: Change tag string for error

The traceevent lib is used by the perf tool, and when executing

  perf test -v 6

it outputs error log on the ARM64 platform:

  running test 33 '*:*'trace-cmd: No such file or directory

  [...]

  trace-cmd: Invalid argument

The trace event parsing code originally came from trace-cmd so it keeps
the tag string "trace-cmd" for errors, this easily introduces the
impression that the perf tool launches trace-cmd command for trace event
parsing, but in fact the related parsing is accomplished by the
traceevent lib.

This patch changes the tag string to "libtraceevent" so that we can
avoid confusion and let users to more easily connect the error with
traceevent lib.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20190424013802.27569-1-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/parse-utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
index 77e4ec6402dd3..e99867111387f 100644
--- a/tools/lib/traceevent/parse-utils.c
+++ b/tools/lib/traceevent/parse-utils.c
@@ -14,7 +14,7 @@
 void __vwarning(const char *fmt, va_list ap)
 {
 	if (errno)
-		perror("trace-cmd");
+		perror("libtraceevent");
 	errno = 0;
 
 	fprintf(stderr, "  ");
-- 
GitLab


From bf561d3c13423fc54daa19b5d49dc15fafdb7acc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 25 Apr 2019 18:36:51 -0300
Subject: [PATCH 1836/1861] perf bench numa: Add define for RUSAGE_THREAD if
 not present
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While cross building perf to the ARC architecture on a fedora 30 host,
we were failing with:

      CC       /tmp/build/perf/bench/numa.o
  bench/numa.c: In function ‘worker_thread’:
  bench/numa.c:1261:12: error: ‘RUSAGE_THREAD’ undeclared (first use in this function); did you mean ‘SIGEV_THREAD’?
    getrusage(RUSAGE_THREAD, &rusage);
              ^~~~~~~~~~~~~
              SIGEV_THREAD
  bench/numa.c:1261:12: note: each undeclared identifier is reported only once for each function it appears in

[perfbuilder@60d5802468f6 perf]$ /arc_gnu_2019.03-rc1_prebuilt_uclibc_le_archs_linux_install/bin/arc-linux-gcc --version | head -1
arc-linux-gcc (ARCv2 ISA Linux uClibc toolchain 2019.03-rc1) 8.3.1 20190225
[perfbuilder@60d5802468f6 perf]$

Trying to reproduce a report by Vineet, I noticed that, with just
cross-built zlib and numactl libraries, I ended up with the above
failure.

So, since RUSAGE_THREAD is available as a define, check for that and
numactl libraries, I ended up with the above failure.

So, since RUSAGE_THREAD is available as a define in the system headers,
check if it is defined in the 'perf bench numa' sources and define it if
not.

Now it builds and I have to figure out if the problem reported by Vineet
only takes place if we have libelf or some other library available.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: linux-snps-arc@lists.infradead.org
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Link: https://lkml.kernel.org/n/tip-2wb4r1gir9xrevbpq7qp0amk@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/numa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 98ad783efc69d..a7784554a80de 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -39,6 +39,10 @@
 #include <numa.h>
 #include <numaif.h>
 
+#ifndef RUSAGE_THREAD
+# define RUSAGE_THREAD 1
+#endif
+
 /*
  * Regular printout to the terminal, supressed if -q is specified:
  */
-- 
GitLab


From 167e418fa0871c083e2c74508d73012abb01e6f7 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Tue, 23 Apr 2019 12:53:03 +0200
Subject: [PATCH 1837/1861] perf report: Report OOM in status line in the GTK
 UI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An -ENOMEM error is not reported in the GTK GUI.  Instead this error
message pops up on the screen:

[root@m35lp76 perf]# ./perf  report -i perf.data.error68-1

	Processing events... [974K/3M]
	Error:failed to process sample

	0xf4198 [0x8]: failed to process type: 68

However when I use the same perf.data file with --stdio it works:

[root@m35lp76 perf]# ./perf  report -i perf.data.error68-1 --stdio \
		| head -12

  # Total Lost Samples: 0
  #
  # Samples: 76K of event 'cycles'
  # Event count (approx.): 99056160000
  #
  # Overhead  Command          Shared Object      Symbol
  # ........  ...............  .................  .........
  #
     8.81%  find             [kernel.kallsyms]  [k] ftrace_likely_update
     8.74%  swapper          [kernel.kallsyms]  [k] ftrace_likely_update
     8.34%  sshd             [kernel.kallsyms]  [k] ftrace_likely_update
     2.19%  kworker/u512:1-  [kernel.kallsyms]  [k] ftrace_likely_update

The sample precentage is a bit low.....

The GUI always fails in the FINISHED_ROUND event (68) and does not
indicate the reason why.

When happened is the following. Perf report calls a lot of functions and
down deep when a FINISHED_ROUND event is processed, these functions are
called:

  perf_session__process_event()
  + perf_session__process_user_event()
    + process_finished_round()
      + ordered_events__flush()
        + __ordered_events__flush()
	  + do_flush()
	    + ordered_events__deliver_event()
	      + perf_session__deliver_event()
	        + machine__deliver_event()
	          + perf_evlist__deliver_event()
	            + process_sample_event()
	              + hist_entry_iter_add() --> only called in GUI case!!!
	                + hist_iter__report__callback()
	                  + symbol__inc_addr_sample()

	                    Now this functions runs out of memory and
			    returns -ENOMEM. This is reported all the way up
			    until function

perf_session__process_event() returns to its caller, where -ENOMEM is
changed to -EINVAL and processing stops:

 if ((skip = perf_session__process_event(session, event, head)) < 0) {
      pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
	     head, event->header.size, event->header.type);
      err = -EINVAL;
      goto out_err;
 }

This occurred in the FINISHED_ROUND event when it has to process some
10000 entries and ran out of memory.

This patch indicates the root cause and displays it in the status line
of ther perf report GUI.

Output before (on GUI status line):

  0xf4198 [0x8]: failed to process type: 68

Output after:

  0xf4198 [0x8]: failed to process type: 68 [not enough memory]

Committer notes:

the 'skip' variable needs to be initialized to -EINVAL, so that when the
size is less than sizeof(struct perf_event_attr) we avoid this valid
compiler warning:

  util/session.c: In function ‘perf_session__process_events’:
  util/session.c:1936:7: error: ‘skip’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
     err = skip;
     ~~~~^~~~~~
  util/session.c:1874:6: note: ‘skip’ was declared here
    s64 skip;
        ^~~~
  cc1: all warnings being treated as errors

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20190423105303.61683-1-tmricht@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b17f1c9bc9651..bad5f87ae001b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1928,12 +1928,14 @@ more:
 
 	size = event->header.size;
 
+	skip = -EINVAL;
+
 	if (size < sizeof(struct perf_event_header) ||
 	    (skip = rd->process(session, event, file_pos)) < 0) {
-		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
 		       file_offset + head, event->header.size,
-		       event->header.type);
-		err = -EINVAL;
+		       event->header.type, strerror(-skip));
+		err = skip;
 		goto out;
 	}
 
-- 
GitLab


From cf0c37b6dbf74fb71bea07b516612d29e00dcbc4 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Sun, 28 Apr 2019 16:32:28 +0800
Subject: [PATCH 1838/1861] perf cs-etm: Don't check cs_etm_queue::prev_packet
 validity

Since cs_etm_queue::prev_packet is allocated for all cases, it will
never be NULL pointer; now validity checking prev_packet is pointless,
remove all of them.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Tested-by: Robert Walker <robert.walker@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Suzuki K Poulouse <suzuki.poulose@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/20190428083228.20246-2-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 110804936fc3f..7777cfc1ad8c4 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -981,7 +981,6 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 	 * PREV_PACKET is a branch.
 	 */
 	if (etm->synth_opts.last_branch &&
-	    etmq->prev_packet &&
 	    etmq->prev_packet->sample_type == CS_ETM_RANGE &&
 	    etmq->prev_packet->last_instr_taken_branch)
 		cs_etm__update_last_branch_rb(etmq);
@@ -1014,7 +1013,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 		etmq->period_instructions = instrs_over;
 	}
 
-	if (etm->sample_branches && etmq->prev_packet) {
+	if (etm->sample_branches) {
 		bool generate_sample = false;
 
 		/* Generate sample for tracing on packet */
@@ -1071,9 +1070,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq)
 	struct cs_etm_auxtrace *etm = etmq->etm;
 	struct cs_etm_packet *tmp;
 
-	if (!etmq->prev_packet)
-		return 0;
-
 	/* Handle start tracing packet */
 	if (etmq->prev_packet->sample_type == CS_ETM_EMPTY)
 		goto swap_packet;
-- 
GitLab


From 35bb59c10a6d0578806dd500477dae9cb4be344e Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Sun, 28 Apr 2019 16:32:27 +0800
Subject: [PATCH 1839/1861] perf cs-etm: Always allocate memory for
 cs_etm_queue::prev_packet

Robert Walker reported a segmentation fault is observed when process
CoreSight trace data; this issue can be easily reproduced by the command
'perf report --itrace=i1000i' for decoding tracing data.

If neither the 'b' flag (synthesize branches events) nor 'l' flag
(synthesize last branch entries) are specified to option '--itrace',
cs_etm_queue::prev_packet will not been initialised.  After merging the
code to support exception packets and sample flags, there introduced a
number of uses of cs_etm_queue::prev_packet without checking whether it
is valid, for these cases any accessing to uninitialised prev_packet
will cause crash.

As cs_etm_queue::prev_packet is used more widely now and it's already
hard to follow which functions have been called in a context where the
validity of cs_etm_queue::prev_packet has been checked, this patch
always allocates memory for cs_etm_queue::prev_packet.

Reported-by: Robert Walker <robert.walker@arm.com>
Suggested-by: Robert Walker <robert.walker@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Tested-by: Robert Walker <robert.walker@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Suzuki K Poulouse <suzuki.poulose@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Fixes: 7100b12cf474 ("perf cs-etm: Generate branch sample for exception packet")
Fixes: 24fff5eb2b93 ("perf cs-etm: Avoid stale branch samples when flush packet")
Link: http://lkml.kernel.org/r/20190428083228.20246-1-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 7777cfc1ad8c4..de488b43f440f 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -422,11 +422,9 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
 	if (!etmq->packet)
 		goto out_free;
 
-	if (etm->synth_opts.last_branch || etm->sample_branches) {
-		etmq->prev_packet = zalloc(szp);
-		if (!etmq->prev_packet)
-			goto out_free;
-	}
+	etmq->prev_packet = zalloc(szp);
+	if (!etmq->prev_packet)
+		goto out_free;
 
 	if (etm->synth_opts.last_branch) {
 		size_t sz = sizeof(struct branch_stack);
-- 
GitLab


From c638417e1a64b1f43ebab589e697d1cd1a127a74 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 1 May 2019 16:27:00 -0400
Subject: [PATCH 1840/1861] tools build: Add -ldl to the disassembler-four-args
 feature test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thomas Backlund reported that the perf build was failing on the Mageia 7
distro, that is because it uses:

  cat /tmp/build/perf/feature/test-disassembler-four-args.make.output
  /usr/bin/ld: /usr/lib64/libbfd.a(plugin.o): in function `try_load_plugin':
  /home/iurt/rpmbuild/BUILD/binutils-2.32/objs/bfd/../../bfd/plugin.c:243:
  undefined reference to `dlopen'
  /usr/bin/ld:
  /home/iurt/rpmbuild/BUILD/binutils-2.32/objs/bfd/../../bfd/plugin.c:271:
  undefined reference to `dlsym'
  /usr/bin/ld:
  /home/iurt/rpmbuild/BUILD/binutils-2.32/objs/bfd/../../bfd/plugin.c:256:
  undefined reference to `dlclose'
  /usr/bin/ld:
  /home/iurt/rpmbuild/BUILD/binutils-2.32/objs/bfd/../../bfd/plugin.c:246:
  undefined reference to `dlerror'
  as we allow dynamic linking and loading

Mageia 7 uses these linker flags:
  $ rpm --eval %ldflags
    -Wl,--as-needed -Wl,--no-undefined -Wl,-z,relro -Wl,-O1 -Wl,--build-id -Wl,--enable-new-dtags

So add -ldl to this feature LDFLAGS.

Reported-by: Thomas Backlund <tmb@mageia.org>
Tested-by: Thomas Backlund <tmb@mageia.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Song Liu <songliubraving@fb.com>
Link: https://lkml.kernel.org/r/20190501173158.GC21436@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index fe3f97e342fae..6d65874e16c3d 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -227,7 +227,7 @@ FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
 
 FEATURE_CHECK_LDFLAGS-libaio = -lrt
 
-FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes
+FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
 
 CFLAGS += -fno-omit-frame-pointer
 CFLAGS += -ggdb3
-- 
GitLab


From 18f90d372cf35b387663f1567de701e5393f6eb5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 22 Apr 2019 15:21:35 -0300
Subject: [PATCH 1841/1861] tools arch uapi: Copy missing unistd.h headers for
 arc, hexagon and riscv

Since those were introduced in:

  c8ce48f06503 ("asm-generic: Make time32 syscall numbers optional")

But when the asm-generic/unistd.h was sync'ed with tools/ in:

  1a787fc5ba18 ("tools headers uapi: Sync copy of asm-generic/unistd.h with the kernel sources")

I forgot to copy the files for the architectures that define
__ARCH_WANT_TIME32_SYSCALLS, so the perf build was breaking there, as
reported by Vineet Gupta for the ARC architecture.

After updating my ARC container to use the glibc based toolchain + cross
building libnuma, zlib and elfutils, I finally managed to reproduce the
problem and verify that this now is fixed and will not regress as will
be tested before each pull req sent upstream.

Reported-by: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Jiri Olsa <jolsa@kernel.org>
CC: linux-snps-arc@lists.infradead.org
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/r/20190426193531.GC28586@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/arc/include/uapi/asm/unistd.h     | 51 ++++++++++++++++++++
 tools/arch/hexagon/include/uapi/asm/unistd.h | 40 +++++++++++++++
 tools/arch/riscv/include/uapi/asm/unistd.h   | 42 ++++++++++++++++
 3 files changed, 133 insertions(+)
 create mode 100644 tools/arch/arc/include/uapi/asm/unistd.h
 create mode 100644 tools/arch/hexagon/include/uapi/asm/unistd.h
 create mode 100644 tools/arch/riscv/include/uapi/asm/unistd.h

diff --git a/tools/arch/arc/include/uapi/asm/unistd.h b/tools/arch/arc/include/uapi/asm/unistd.h
new file mode 100644
index 0000000000000..5eafa11151623
--- /dev/null
+++ b/tools/arch/arc/include/uapi/asm/unistd.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/******** no-legacy-syscalls-ABI *******/
+
+/*
+ * Non-typical guard macro to enable inclusion twice in ARCH sys.c
+ * That is how the Generic syscall wrapper generator works
+ */
+#if !defined(_UAPI_ASM_ARC_UNISTD_H) || defined(__SYSCALL)
+#define _UAPI_ASM_ARC_UNISTD_H
+
+#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_TIME32_SYSCALLS
+
+#define sys_mmap2 sys_mmap_pgoff
+
+#include <asm-generic/unistd.h>
+
+#define NR_syscalls	__NR_syscalls
+
+/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
+#define __NR_sysfs		(__NR_arch_specific_syscall + 3)
+
+/* ARC specific syscall */
+#define __NR_cacheflush		(__NR_arch_specific_syscall + 0)
+#define __NR_arc_settls		(__NR_arch_specific_syscall + 1)
+#define __NR_arc_gettls		(__NR_arch_specific_syscall + 2)
+#define __NR_arc_usr_cmpxchg	(__NR_arch_specific_syscall + 4)
+
+__SYSCALL(__NR_cacheflush, sys_cacheflush)
+__SYSCALL(__NR_arc_settls, sys_arc_settls)
+__SYSCALL(__NR_arc_gettls, sys_arc_gettls)
+__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg)
+__SYSCALL(__NR_sysfs, sys_sysfs)
+
+#undef __SYSCALL
+
+#endif
diff --git a/tools/arch/hexagon/include/uapi/asm/unistd.h b/tools/arch/hexagon/include/uapi/asm/unistd.h
new file mode 100644
index 0000000000000..432c4db1b6239
--- /dev/null
+++ b/tools/arch/hexagon/include/uapi/asm/unistd.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Syscall support for Hexagon
+ *
+ * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ *  The kernel pulls this unistd.h in three different ways:
+ *  1.  the "normal" way which gets all the __NR defines
+ *  2.  with __SYSCALL defined to produce function declarations
+ *  3.  with __SYSCALL defined to produce syscall table initialization
+ *  See also:  syscalltab.c
+ */
+
+#define sys_mmap2 sys_mmap_pgoff
+#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_TIME32_SYSCALLS
+
+#include <asm-generic/unistd.h>
diff --git a/tools/arch/riscv/include/uapi/asm/unistd.h b/tools/arch/riscv/include/uapi/asm/unistd.h
new file mode 100644
index 0000000000000..0e2eeeb1fd27b
--- /dev/null
+++ b/tools/arch/riscv/include/uapi/asm/unistd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2018 David Abdurachmanov <david.abdurachmanov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __LP64__
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_SET_GET_RLIMIT
+#endif /* __LP64__ */
+
+#include <asm-generic/unistd.h>
+
+/*
+ * Allows the instruction cache to be flushed from userspace.  Despite RISC-V
+ * having a direct 'fence.i' instruction available to userspace (which we
+ * can't trap!), that's not actually viable when running on Linux because the
+ * kernel might schedule a process on another hart.  There is no way for
+ * userspace to handle this without invoking the kernel (as it doesn't know the
+ * thread->hart mappings), so we've defined a RISC-V specific system call to
+ * flush the instruction cache.
+ *
+ * __NR_riscv_flush_icache is defined to flush the instruction cache over an
+ * address range, with the flush applying to either all threads or just the
+ * caller.  We don't currently do anything with the address range, that's just
+ * in there for forwards compatibility.
+ */
+#ifndef __NR_riscv_flush_icache
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+#endif
+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
-- 
GitLab


From 7e221b811f1472d0c58c7d4e0fe84fcacd22580a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 2 May 2019 09:26:23 -0400
Subject: [PATCH 1842/1861] perf tools: Remove needless asm/unistd.h include
 fixing build in some places

We were including sys/syscall.h and asm/unistd.h, since sys/syscall.h
includes asm/unistd.h, sometimes this leads to the redefinition of
defines, breaking the build.

Noticed on ARC with uCLibc.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <arnaldo.melo@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Link: https://lkml.kernel.org/n/tip-xjpf80o64i2ko74aj2jih0qg@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cloexec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index ca0fff6272be4..06f48312c5ed0 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -7,7 +7,6 @@
 #include "asm/bug.h"
 #include "debug.h"
 #include <unistd.h>
-#include <asm/unistd.h>
 #include <sys/syscall.h>
 
 static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
-- 
GitLab


From 26ae4f4406f88d82d79c85c11ac5fae18213cd38 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 3 May 2019 11:55:35 +0300
Subject: [PATCH 1843/1861] perf/ring_buffer: Fix AUX software double buffering

This recent commit:

  5768402fd9c6e87 ("perf/ring_buffer: Use high order allocations for AUX buffers optimistically")

overlooked the fact that the previous one page granularity of the AUX buffer
provided an implicit double buffering capability to the PMU driver, which
went away when the entire buffer became one high-order page.

Always make the full-trace mode AUX allocation at least two-part to preserve
the previous behavior and allow the implicit double buffering to continue.

Reported-by: Ammy Yi <ammy.yi@intel.com>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: adrian.hunter@intel.com
Fixes: 5768402fd9c6e87 ("perf/ring_buffer: Use high order allocations for AUX buffers optimistically")
Link: http://lkml.kernel.org/r/20190503085536.24119-2-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/ring_buffer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 5eedb49a65ea2..674b353834914 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -610,8 +610,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 	 * PMU requests more than one contiguous chunks of memory
 	 * for SW double buffering
 	 */
-	if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) &&
-	    !overwrite) {
+	if (!overwrite) {
 		if (!max_order)
 			return -EINVAL;
 
-- 
GitLab


From 72e830f68428ab9ea9eca65d160795f4e02cecfc Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 3 May 2019 11:55:36 +0300
Subject: [PATCH 1844/1861] perf/x86/intel/pt: Remove software double buffering
 PMU capability

Now that all AUX allocations are high-order by default, the software
double buffering PMU capability doesn't make sense any more, get rid
of it. In case some PMUs choose to opt out, we can re-introduce it.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: adrian.hunter@intel.com
Link: http://lkml.kernel.org/r/20190503085536.24119-3-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/pt.c | 3 +--
 include/linux/perf_event.h | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index fb3a2f13fc709..339d7628080cf 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1525,8 +1525,7 @@ static __init int pt_init(void)
 	}
 
 	if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
-		pt_pmu.pmu.capabilities =
-			PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+		pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
 
 	pt_pmu.pmu.capabilities	|= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
 	pt_pmu.pmu.attr_groups		 = pt_attr_groups;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e47ef764f613e..1f678f0238505 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -240,7 +240,6 @@ struct perf_event;
 #define PERF_PMU_CAP_NO_INTERRUPT		0x01
 #define PERF_PMU_CAP_NO_NMI			0x02
 #define PERF_PMU_CAP_AUX_NO_SG			0x04
-#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x08
 #define PERF_PMU_CAP_EXCLUSIVE			0x10
 #define PERF_PMU_CAP_ITRACE			0x20
 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x40
-- 
GitLab


From 77a5352ba977d2554643e3797e10823d0d03dcf7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 11 Apr 2019 13:34:44 +1000
Subject: [PATCH 1845/1861] sched/core: Allow the remote scheduler tick to be
 started on CPU0

This has no effect yet because CPU0 will always be a housekeeping CPU
until a later change.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lkml.kernel.org/r/20190411033448.20842-2-npiggin@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index de8ab411826ce..cef22c5499a82 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5866,7 +5866,7 @@ void __init sched_init_smp(void)
 
 static int __init migration_init(void)
 {
-	sched_rq_cpu_starting(smp_processor_id());
+	sched_cpu_starting(smp_processor_id());
 	return 0;
 }
 early_initcall(migration_init);
-- 
GitLab


From c263a4e990b7296b074e33aa077239a0a28a818e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 15 Apr 2019 10:35:51 +0200
Subject: [PATCH 1846/1861] s390: only build for new CPUs with clang

llvm does does not understand -march=z9-109 and older target
specifiers, so disable the respective Kconfig settings and
the logic to make the boot code work on old systems when
building with clang.

Part of the early boot code is normally compiled with -march=z900
for maximum compatibility. This also has to get changed with
clang to the oldest supported ISA, which is -march=z10 here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig       |  6 ++++++
 arch/s390/boot/Makefile | 18 ++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8c15392ee9854..380d13df4411c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -241,6 +241,7 @@ choice
 
 config MARCH_Z900
 	bool "IBM zSeries model z800 and z900"
+	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z900_FEATURES
 	help
 	  Select this to enable optimizations for model z800/z900 (2064 and
@@ -249,6 +250,7 @@ config MARCH_Z900
 
 config MARCH_Z990
 	bool "IBM zSeries model z890 and z990"
+	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z990_FEATURES
 	help
 	  Select this to enable optimizations for model z890/z990 (2084 and
@@ -257,6 +259,7 @@ config MARCH_Z990
 
 config MARCH_Z9_109
 	bool "IBM System z9"
+	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z9_109_FEATURES
 	help
 	  Select this to enable optimizations for IBM System z9 (2094 and
@@ -348,12 +351,15 @@ config TUNE_DEFAULT
 
 config TUNE_Z900
 	bool "IBM zSeries model z800 and z900"
+	depends on !CC_IS_CLANG
 
 config TUNE_Z990
 	bool "IBM zSeries model z890 and z990"
+	depends on !CC_IS_CLANG
 
 config TUNE_Z9_109
 	bool "IBM System z9"
+	depends on !CC_IS_CLANG
 
 config TUNE_Z10
 	bool "IBM System z10"
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index cb40317dc3f73..c51496bbac190 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -12,18 +12,24 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
 KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
 
 #
-# Use -march=z900 for als.c to be able to print an error
+# Use minimum architecture for als.c to be able to print an error
 # message if the kernel is started on a machine which is too old
 #
-ifneq ($(CC_FLAGS_MARCH),-march=z900)
+ifndef CONFIG_CC_IS_CLANG
+CC_FLAGS_MARCH_MINIMUM := -march=z900
+else
+CC_FLAGS_MARCH_MINIMUM := -march=z10
+endif
+
+ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM))
 AFLAGS_REMOVE_head.o		+= $(CC_FLAGS_MARCH)
-AFLAGS_head.o			+= -march=z900
+AFLAGS_head.o			+= $(CC_FLAGS_MARCH_MINIMUM)
 AFLAGS_REMOVE_mem.o		+= $(CC_FLAGS_MARCH)
-AFLAGS_mem.o			+= -march=z900
+AFLAGS_mem.o			+= $(CC_FLAGS_MARCH_MINIMUM)
 CFLAGS_REMOVE_als.o		+= $(CC_FLAGS_MARCH)
-CFLAGS_als.o			+= -march=z900
+CFLAGS_als.o			+= $(CC_FLAGS_MARCH_MINIMUM)
 CFLAGS_REMOVE_sclp_early_core.o	+= $(CC_FLAGS_MARCH)
-CFLAGS_sclp_early_core.o	+= -march=z900
+CFLAGS_sclp_early_core.o	+= $(CC_FLAGS_MARCH_MINIMUM)
 endif
 
 CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-- 
GitLab


From 96fb54a180894dc3c6821aa3297a3eb8b27f03c5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 15 Apr 2019 10:35:52 +0200
Subject: [PATCH 1847/1861] s390: boot, purgatory: pass $(CLANG_FLAGS) where
 needed

The purgatory and boot Makefiles do not inherit the original cflags,
so clang falls back to the default target architecture when building it,
typically this would be x86 when cross-compiling.

Add $(CLANG_FLAGS) everywhere so we pass the correct --target=s390x-linux
option when cross-compiling.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Makefile           | 4 ++--
 arch/s390/purgatory/Makefile | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 54b8a12d64e86..df1d6a150f300 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -21,9 +21,9 @@ KBUILD_CFLAGS	+= -fPIE
 LDFLAGS_vmlinux	:= -pie
 endif
 aflags_dwarf	:= -Wa,-gdwarf-2
-KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__
+KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
 KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
-KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2
 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index b20934d919863..dc1ae4ff79d7a 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -20,6 +20,7 @@ KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
 KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
 KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
 KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
+KBUILD_CFLAGS += $(CLANG_FLAGS)
 KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
 
-- 
GitLab


From 964d06b4ed212d85ff589a9438baeb6c4fff3272 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 15 Apr 2019 10:35:53 +0200
Subject: [PATCH 1848/1861] s390: drop CONFIG_VIRT_TO_BUS

VIRT_TO_BUS is only used for legacy device PCI and ISA drivers using
virt_to_bus() instead of the streaming DMA mapping API, and the
remaining drivers generally don't work on 64-bit architectures.

Two of these drivers also cause a build warning on s390, so instead
of trying to fix that, let's just disable the option as we do on
most architectures now.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 380d13df4411c..0ad0d204404a1 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -189,7 +189,6 @@ config S390
 	select TTY
 	select VIRT_CPU_ACCOUNTING
 	select ARCH_HAS_SCALED_CPUTIME
-	select VIRT_TO_BUS
 	select HAVE_NMI
 
 
-- 
GitLab


From 4ae987894c06056e29264e5a1051e459a4454a61 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 15 Apr 2019 10:35:54 +0200
Subject: [PATCH 1849/1861] s390: fix clang -Wpointer-sign warnigns in boot
 code

The arch/s390/boot directory is built with its own set of compiler
options that does not include -Wno-pointer-sign like the rest of
the kernel does, this causes a lot of harmless but correct warnings
when building with clang.

For the atomics, we can add type casts to avoid the warnings, for
everything else the easiest way is to slightly adapt the types
to be more consistent.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/ipl_parm.c       |  2 +-
 arch/s390/include/asm/bitops.h  | 12 ++++++------
 arch/s390/include/asm/ebcdic.h  |  2 +-
 arch/s390/include/asm/lowcore.h |  2 +-
 drivers/s390/char/sclp.h        |  4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index b49bd97d33af8..3c49bde8aa5e3 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -56,7 +56,7 @@ void store_ipl_parmblock(void)
 		ipl_block_valid = 1;
 }
 
-static size_t scpdata_length(const char *buf, size_t count)
+static size_t scpdata_length(const u8 *buf, size_t count)
 {
 	while (count) {
 		if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index d1f8a4d94cca0..9900d655014cc 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -73,7 +73,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
 	}
 #endif
 	mask = 1UL << (nr & (BITS_PER_LONG - 1));
-	__atomic64_or(mask, addr);
+	__atomic64_or(mask, (long *)addr);
 }
 
 static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -94,7 +94,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
 	}
 #endif
 	mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
-	__atomic64_and(mask, addr);
+	__atomic64_and(mask, (long *)addr);
 }
 
 static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -115,7 +115,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
 	}
 #endif
 	mask = 1UL << (nr & (BITS_PER_LONG - 1));
-	__atomic64_xor(mask, addr);
+	__atomic64_xor(mask, (long *)addr);
 }
 
 static inline int
@@ -125,7 +125,7 @@ test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
 	unsigned long old, mask;
 
 	mask = 1UL << (nr & (BITS_PER_LONG - 1));
-	old = __atomic64_or_barrier(mask, addr);
+	old = __atomic64_or_barrier(mask, (long *)addr);
 	return (old & mask) != 0;
 }
 
@@ -136,7 +136,7 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
 	unsigned long old, mask;
 
 	mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
-	old = __atomic64_and_barrier(mask, addr);
+	old = __atomic64_and_barrier(mask, (long *)addr);
 	return (old & ~mask) != 0;
 }
 
@@ -147,7 +147,7 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
 	unsigned long old, mask;
 
 	mask = 1UL << (nr & (BITS_PER_LONG - 1));
-	old = __atomic64_xor_barrier(mask, addr);
+	old = __atomic64_xor_barrier(mask, (long *)addr);
 	return (old & mask) != 0;
 }
 
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index 29441beb92e60..efb50fc6866cd 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -20,7 +20,7 @@ extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */
 extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
 
 static inline void
-codepage_convert(const __u8 *codepage, volatile __u8 * addr, unsigned long nr)
+codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr)
 {
 	if (nr-- <= 0)
 		return;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5b9f10b1e55de..237ee0c4169f7 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -129,7 +129,7 @@ struct lowcore {
 	/* SMP info area */
 	__u32	cpu_nr;				/* 0x03a0 */
 	__u32	softirq_pending;		/* 0x03a4 */
-	__u32	preempt_count;			/* 0x03a8 */
+	__s32	preempt_count;			/* 0x03a8 */
 	__u32	spinlock_lockval;		/* 0x03ac */
 	__u32	spinlock_index;			/* 0x03b0 */
 	__u32	fpu_flags;			/* 0x03b4 */
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 28b433960831f..196333013e543 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -367,14 +367,14 @@ sclp_ascebc(unsigned char ch)
 
 /* translate string from EBCDIC to ASCII */
 static inline void
-sclp_ebcasc_str(unsigned char *str, int nr)
+sclp_ebcasc_str(char *str, int nr)
 {
 	(MACHINE_IS_VM) ? EBCASC(str, nr) : EBCASC_500(str, nr);
 }
 
 /* translate string from ASCII to EBCDIC */
 static inline void
-sclp_ascebc_str(unsigned char *str, int nr)
+sclp_ascebc_str(char *str, int nr)
 {
 	(MACHINE_IS_VM) ? ASCEBC(str, nr) : ASCEBC_500(str, nr);
 }
-- 
GitLab


From ce968f6012f632bbe071839d229db77c45fc38d1 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Tue, 23 Apr 2019 14:00:56 -0700
Subject: [PATCH 1850/1861] s390/vdso: drop unnecessary cc-ldoption

Towards the goal of removing cc-ldoption, it seems that --hash-style=
was added to binutils 2.17.50.0.2 in 2006. The minimal required version
of binutils for the kernel according to
Documentation/process/changes.rst is 2.20.

Link: https://gcc.gnu.org/ml/gcc/2007-01/msg01141.html
Cc: clang-built-linux@googlegroups.com
Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/vdso32/Makefile | 2 +-
 arch/s390/kernel/vdso64/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index e76309fbbcb3b..aee9ffbccb548 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -19,7 +19,7 @@ KBUILD_AFLAGS_31 += -m31 -s
 KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
+		    -Wl,--hash-style=both
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index f849ac61c5da0..bec19e7e6e1cf 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -19,7 +19,7 @@ KBUILD_AFLAGS_64 += -m64 -s
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
+		    -Wl,--hash-style=both
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
-- 
GitLab


From 2f1a6fbbef7781382850c3104ecb658f21b5d460 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 11 Apr 2019 13:34:45 +1000
Subject: [PATCH 1851/1861] power/suspend: Add function to disable secondaries
 for suspend

This adds a function to disable secondary CPUs for suspend that are
not necessarily non-zero / non-boot CPUs. Platforms will be able to
use this to suspend using non-zero CPUs.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lkml.kernel.org/r/20190411033448.20842-3-npiggin@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpu.h      | 12 ++++++++++++
 kernel/kexec_core.c      |  4 ++--
 kernel/power/hibernate.c | 12 ++++++------
 kernel/power/suspend.c   |  4 ++--
 4 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 5041357d0297a..7ab2f09c0a146 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -137,9 +137,21 @@ static inline int disable_nonboot_cpus(void)
 	return freeze_secondary_cpus(0);
 }
 extern void enable_nonboot_cpus(void);
+
+static inline int suspend_disable_secondary_cpus(void)
+{
+	return freeze_secondary_cpus(0);
+}
+static inline void suspend_enable_secondary_cpus(void)
+{
+	return enable_nonboot_cpus();
+}
+
 #else /* !CONFIG_PM_SLEEP_SMP */
 static inline int disable_nonboot_cpus(void) { return 0; }
 static inline void enable_nonboot_cpus(void) {}
+static inline int suspend_disable_secondary_cpus(void) { return 0; }
+static inline void suspend_enable_secondary_cpus(void) { }
 #endif /* !CONFIG_PM_SLEEP_SMP */
 
 void cpu_startup_entry(enum cpuhp_state state);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index d7140447be75b..fd5c95ff9251f 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1150,7 +1150,7 @@ int kernel_kexec(void)
 		error = dpm_suspend_end(PMSG_FREEZE);
 		if (error)
 			goto Resume_devices;
-		error = disable_nonboot_cpus();
+		error = suspend_disable_secondary_cpus();
 		if (error)
 			goto Enable_cpus;
 		local_irq_disable();
@@ -1183,7 +1183,7 @@ int kernel_kexec(void)
  Enable_irqs:
 		local_irq_enable();
  Enable_cpus:
-		enable_nonboot_cpus();
+		suspend_enable_secondary_cpus();
 		dpm_resume_start(PMSG_RESTORE);
  Resume_devices:
 		dpm_resume_end(PMSG_RESTORE);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index abef759de7c8f..cfc7a57049e4c 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -281,7 +281,7 @@ static int create_image(int platform_mode)
 	if (error || hibernation_test(TEST_PLATFORM))
 		goto Platform_finish;
 
-	error = disable_nonboot_cpus();
+	error = suspend_disable_secondary_cpus();
 	if (error || hibernation_test(TEST_CPUS))
 		goto Enable_cpus;
 
@@ -323,7 +323,7 @@ static int create_image(int platform_mode)
 	local_irq_enable();
 
  Enable_cpus:
-	enable_nonboot_cpus();
+	suspend_enable_secondary_cpus();
 
  Platform_finish:
 	platform_finish(platform_mode);
@@ -417,7 +417,7 @@ int hibernation_snapshot(int platform_mode)
 
 int __weak hibernate_resume_nonboot_cpu_disable(void)
 {
-	return disable_nonboot_cpus();
+	return suspend_disable_secondary_cpus();
 }
 
 /**
@@ -486,7 +486,7 @@ static int resume_target_kernel(bool platform_mode)
 	local_irq_enable();
 
  Enable_cpus:
-	enable_nonboot_cpus();
+	suspend_enable_secondary_cpus();
 
  Cleanup:
 	platform_restore_cleanup(platform_mode);
@@ -564,7 +564,7 @@ int hibernation_platform_enter(void)
 	if (error)
 		goto Platform_finish;
 
-	error = disable_nonboot_cpus();
+	error = suspend_disable_secondary_cpus();
 	if (error)
 		goto Enable_cpus;
 
@@ -586,7 +586,7 @@ int hibernation_platform_enter(void)
 	local_irq_enable();
 
  Enable_cpus:
-	enable_nonboot_cpus();
+	suspend_enable_secondary_cpus();
 
  Platform_finish:
 	hibernation_ops->finish();
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 0bd595a0b6103..59b6def230462 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -428,7 +428,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	if (suspend_test(TEST_PLATFORM))
 		goto Platform_wake;
 
-	error = disable_nonboot_cpus();
+	error = suspend_disable_secondary_cpus();
 	if (error || suspend_test(TEST_CPUS))
 		goto Enable_cpus;
 
@@ -458,7 +458,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	BUG_ON(irqs_disabled());
 
  Enable_cpus:
-	enable_nonboot_cpus();
+	suspend_enable_secondary_cpus();
 
  Platform_wake:
 	platform_resume_noirq(state);
-- 
GitLab


From 9ca12ac04bb7d7cfb28aa549dcd3d15761f15543 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 11 Apr 2019 13:34:46 +1000
Subject: [PATCH 1852/1861] kernel/cpu: Allow non-zero CPU to be primary for
 suspend / kexec freeze

This patch provides an arch option, ARCH_SUSPEND_NONZERO_CPU, to
opt-in to allowing suspend to occur on one of the housekeeping CPUs
rather than hardcoded CPU0.

This will allow CPU0 to be a nohz_full CPU with a later change.

It may be possible for platforms with hardware/firmware restrictions
on suspend/wake effectively support this by handing off the final
stage to CPU0 when kernel housekeeping is no longer required. Another
option is to make housekeeping / nohz_full mask dynamic at runtime,
but the complexity could not be justified at this time.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lkml.kernel.org/r/20190411033448.20842-4-npiggin@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/Kconfig |  4 ++++
 include/linux/cpu.h  |  7 ++++++-
 kernel/cpu.c         | 10 +++++++++-
 kernel/power/Kconfig |  9 +++++++++
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d0be82c30619..bc98b0e37a105 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -318,6 +318,10 @@ config ARCH_SUSPEND_POSSIBLE
 		   (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
 		   || 44x || 40x
 
+config ARCH_SUSPEND_NONZERO_CPU
+	def_bool y
+	depends on PPC_POWERNV || PPC_PSERIES
+
 config PPC_DCR_NATIVE
 	bool
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7ab2f09c0a146..73baab8535c14 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -140,7 +140,12 @@ extern void enable_nonboot_cpus(void);
 
 static inline int suspend_disable_secondary_cpus(void)
 {
-	return freeze_secondary_cpus(0);
+	int cpu = 0;
+
+	if (IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU))
+		cpu = -1;
+
+	return freeze_secondary_cpus(cpu);
 }
 static inline void suspend_enable_secondary_cpus(void)
 {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6754f3ecfd943..d1bf6e2b4752f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -9,6 +9,7 @@
 #include <linux/notifier.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/isolation.h>
 #include <linux/sched/task.h>
 #include <linux/sched/smt.h>
 #include <linux/unistd.h>
@@ -1199,8 +1200,15 @@ int freeze_secondary_cpus(int primary)
 	int cpu, error = 0;
 
 	cpu_maps_update_begin();
-	if (!cpu_online(primary))
+	if (primary == -1) {
 		primary = cpumask_first(cpu_online_mask);
+		if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
+			primary = housekeeping_any_cpu(HK_FLAG_TIMER);
+	} else {
+		if (!cpu_online(primary))
+			primary = cpumask_first(cpu_online_mask);
+	}
+
 	/*
 	 * We take down all of the non-boot CPUs in one shot to avoid races
 	 * with the userspace trying to use the CPU hotplug at the same time
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index f8fe57d1022e3..9bbaaab14b36e 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -114,6 +114,15 @@ config PM_SLEEP_SMP
 	depends on PM_SLEEP
 	select HOTPLUG_CPU
 
+config PM_SLEEP_SMP_NONZERO_CPU
+	def_bool y
+	depends on PM_SLEEP_SMP
+	depends on ARCH_SUSPEND_NONZERO_CPU
+	---help---
+	If an arch can suspend (for suspend, hibernate, kexec, etc) on a
+	non-zero numbered CPU, it may define ARCH_SUSPEND_NONZERO_CPU. This
+	will allow nohz_full mask to include CPU0.
+
 config PM_AUTOSLEEP
 	bool "Opportunistic sleep"
 	depends on PM_SLEEP
-- 
GitLab


From 9219565aa89033a9cfdae788c1940473a1253d6c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 11 Apr 2019 13:34:47 +1000
Subject: [PATCH 1853/1861] sched/isolation: Require a present CPU in
 housekeeping mask

During housekeeping mask setup, currently a possible CPU is required.
That does not guarantee the CPU would be available at boot time, so
check to ensure that at least one present CPU is in the mask.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lkml.kernel.org/r/20190411033448.20842-5-npiggin@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/isolation.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index b02d148e76727..687302051a270 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -65,6 +65,7 @@ void __init housekeeping_init(void)
 static int __init housekeeping_setup(char *str, enum hk_flags flags)
 {
 	cpumask_var_t non_housekeeping_mask;
+	cpumask_var_t tmp;
 	int err;
 
 	alloc_bootmem_cpumask_var(&non_housekeeping_mask);
@@ -75,16 +76,23 @@ static int __init housekeeping_setup(char *str, enum hk_flags flags)
 		return 0;
 	}
 
+	alloc_bootmem_cpumask_var(&tmp);
 	if (!housekeeping_flags) {
 		alloc_bootmem_cpumask_var(&housekeeping_mask);
 		cpumask_andnot(housekeeping_mask,
 			       cpu_possible_mask, non_housekeeping_mask);
-		if (cpumask_empty(housekeeping_mask))
+
+		cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
+		if (cpumask_empty(tmp)) {
+			pr_warn("Housekeeping: must include one present CPU, "
+				"using boot CPU:%d\n", smp_processor_id());
 			__cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
+			__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
+		}
 	} else {
-		cpumask_var_t tmp;
-
-		alloc_bootmem_cpumask_var(&tmp);
+		cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask);
+		if (cpumask_empty(tmp))
+			__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
 		cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
 		if (!cpumask_equal(tmp, housekeeping_mask)) {
 			pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
@@ -92,8 +100,8 @@ static int __init housekeeping_setup(char *str, enum hk_flags flags)
 			free_bootmem_cpumask_var(non_housekeeping_mask);
 			return 0;
 		}
-		free_bootmem_cpumask_var(tmp);
 	}
+	free_bootmem_cpumask_var(tmp);
 
 	if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) {
 		if (IS_ENABLED(CONFIG_NO_HZ_FULL)) {
-- 
GitLab


From 08ae95f4fd3b38b257f5dc7e6507e071c27ba0d5 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 11 Apr 2019 13:34:48 +1000
Subject: [PATCH 1854/1861] nohz_full: Allow the boot CPU to be nohz_full

Allow the boot CPU/CPU0 to be nohz_full. Have the boot CPU take the
do_timer duty during boot until a housekeeping CPU can take over.

This is supported when CONFIG_PM_SLEEP_SMP is not configured, or when
it is configured and the arch allows suspend on non-zero CPUs.

nohz_full has been trialed at a large supercomputer site and found to
significantly reduce jitter. In order to deploy it in production, they
need CPU0 to be nohz_full because their job control system requires
the application CPUs to start from 0, and the housekeeping CPUs are
placed higher. An equivalent job scheduling that uses CPU0 for
housekeeping could be achieved by modifying their system, but it is
preferable if nohz_full can support their environment without
modification.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lkml.kernel.org/r/20190411033448.20842-6-npiggin@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-common.c | 50 +++++++++++++++++++++++++++++++++++----
 kernel/time/tick-sched.c  | 34 ++++++++++++++++++--------
 2 files changed, 70 insertions(+), 14 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index df401463a1913..e49e8091f9ac1 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -46,6 +46,14 @@ ktime_t tick_period;
  *    procedure also covers cpu hotplug.
  */
 int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
+#ifdef CONFIG_NO_HZ_FULL
+/*
+ * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
+ * tick_do_timer_cpu and it should be taken over by an eligible secondary
+ * when one comes online.
+ */
+static int tick_do_timer_boot_cpu __read_mostly = -1;
+#endif
 
 /*
  * Debugging: see timer_list.c
@@ -167,6 +175,26 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 	}
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static void giveup_do_timer(void *info)
+{
+	int cpu = *(unsigned int *)info;
+
+	WARN_ON(tick_do_timer_cpu != smp_processor_id());
+
+	tick_do_timer_cpu = cpu;
+}
+
+static void tick_take_do_timer_from_boot(void)
+{
+	int cpu = smp_processor_id();
+	int from = tick_do_timer_boot_cpu;
+
+	if (from >= 0 && from != cpu)
+		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
+}
+#endif
+
 /*
  * Setup the tick device
  */
@@ -186,12 +214,26 @@ static void tick_setup_device(struct tick_device *td,
 		 * this cpu:
 		 */
 		if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
-			if (!tick_nohz_full_cpu(cpu))
-				tick_do_timer_cpu = cpu;
-			else
-				tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+			tick_do_timer_cpu = cpu;
+
 			tick_next_period = ktime_get();
 			tick_period = NSEC_PER_SEC / HZ;
+#ifdef CONFIG_NO_HZ_FULL
+			/*
+			 * The boot CPU may be nohz_full, in which case set
+			 * tick_do_timer_boot_cpu so the first housekeeping
+			 * secondary that comes up will take do_timer from
+			 * us.
+			 */
+			if (tick_nohz_full_cpu(cpu))
+				tick_do_timer_boot_cpu = cpu;
+
+		} else if (tick_do_timer_boot_cpu != -1 &&
+						!tick_nohz_full_cpu(cpu)) {
+			tick_take_do_timer_from_boot();
+			tick_do_timer_boot_cpu = -1;
+			WARN_ON(tick_do_timer_cpu != cpu);
+#endif
 		}
 
 		/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6fa52cd6df0be..4aa917acbe1ca 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -121,10 +121,16 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 	 * into a long sleep. If two CPUs happen to assign themselves to
 	 * this duty, then the jiffies update is still serialized by
 	 * jiffies_lock.
+	 *
+	 * If nohz_full is enabled, this should not happen because the
+	 * tick_do_timer_cpu never relinquishes.
 	 */
-	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
-	    && !tick_nohz_full_cpu(cpu))
+	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+#ifdef CONFIG_NO_HZ_FULL
+		WARN_ON(tick_nohz_full_running);
+#endif
 		tick_do_timer_cpu = cpu;
+	}
 #endif
 
 	/* Check, if the jiffies need an update */
@@ -395,8 +401,8 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
 static int tick_nohz_cpu_down(unsigned int cpu)
 {
 	/*
-	 * The boot CPU handles housekeeping duty (unbound timers,
-	 * workqueues, timekeeping, ...) on behalf of full dynticks
+	 * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
+	 * timers, workqueues, timekeeping, ...) on behalf of full dynticks
 	 * CPUs. It must remain online when nohz full is enabled.
 	 */
 	if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
@@ -423,12 +429,15 @@ void __init tick_nohz_init(void)
 		return;
 	}
 
-	cpu = smp_processor_id();
+	if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
+			!IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
+		cpu = smp_processor_id();
 
-	if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
-		pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
-			cpu);
-		cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+		if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
+			pr_warn("NO_HZ: Clearing %d from nohz_full range "
+				"for timekeeping\n", cpu);
+			cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+		}
 	}
 
 	for_each_cpu(cpu, tick_nohz_full_mask)
@@ -904,8 +913,13 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 		/*
 		 * Boot safety: make sure the timekeeping duty has been
 		 * assigned before entering dyntick-idle mode,
+		 * tick_do_timer_cpu is TICK_DO_TIMER_BOOT
 		 */
-		if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
+		if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
+			return false;
+
+		/* Should not happen for nohz-full */
+		if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
 			return false;
 	}
 
-- 
GitLab


From 6f55967ad9d9752813e36de6d5fdbd19741adfc7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 4 May 2019 17:15:56 +0200
Subject: [PATCH 1855/1861] perf/x86/intel: Fix race in
 intel_pmu_disable_event()

New race in x86_pmu_stop() was introduced by replacing the
atomic __test_and_clear_bit() of cpuc->active_mask by separate
test_bit() and __clear_bit() calls in the following commit:

  3966c3feca3f ("x86/perf/amd: Remove need to check "running" bit in NMI handler")

The race causes panic for PEBS events with enabled callchains:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
  ...
  RIP: 0010:perf_prepare_sample+0x8c/0x530
  Call Trace:
   <NMI>
   perf_event_output_forward+0x2a/0x80
   __perf_event_overflow+0x51/0xe0
   handle_pmi_common+0x19e/0x240
   intel_pmu_handle_irq+0xad/0x170
   perf_event_nmi_handler+0x2e/0x50
   nmi_handle+0x69/0x110
   default_do_nmi+0x3e/0x100
   do_nmi+0x11a/0x180
   end_repeat_nmi+0x16/0x1a
  RIP: 0010:native_write_msr+0x6/0x20
  ...
   </NMI>
   intel_pmu_disable_event+0x98/0xf0
   x86_pmu_stop+0x6e/0xb0
   x86_pmu_del+0x46/0x140
   event_sched_out.isra.97+0x7e/0x160
  ...

The event is configured to make samples from PEBS drain code,
but when it's disabled, we'll go through NMI path instead,
where data->callchain will not get allocated and we'll crash:

          x86_pmu_stop
            test_bit(hwc->idx, cpuc->active_mask)
            intel_pmu_disable_event(event)
            {
              ...
              intel_pmu_pebs_disable(event);
              ...

EVENT OVERFLOW ->  <NMI>
                     intel_pmu_handle_irq
                       handle_pmi_common
   TEST PASSES ->        test_bit(bit, cpuc->active_mask))
                           perf_event_overflow
                             perf_prepare_sample
                             {
                               ...
                               if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
                                     data->callchain = perf_callchain(event, regs);

         CRASH ->              size += data->callchain->nr;
                             }
                   </NMI>
              ...
              x86_pmu_disable_event(event)
            }

            __clear_bit(hwc->idx, cpuc->active_mask);

Fixing this by disabling the event itself before setting
off the PEBS bit.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Arcari <darcari@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Lendacky Thomas <Thomas.Lendacky@amd.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 3966c3feca3f ("x86/perf/amd: Remove need to check "running" bit in NMI handler")
Link: http://lkml.kernel.org/r/20190504151556.31031-1-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f9451566cd9b2..d35f4775d5f10 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2091,15 +2091,19 @@ static void intel_pmu_disable_event(struct perf_event *event)
 	cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
 	cpuc->intel_cp_status &= ~(1ull << hwc->idx);
 
-	if (unlikely(event->attr.precise_ip))
-		intel_pmu_pebs_disable(event);
-
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_disable_fixed(hwc);
 		return;
 	}
 
 	x86_pmu_disable_event(event);
+
+	/*
+	 * Needs to be called after x86_pmu_disable_event,
+	 * so we don't trigger the event without PEBS bit set.
+	 */
+	if (unlikely(event->attr.precise_ip))
+		intel_pmu_pebs_disable(event);
 }
 
 static void intel_pmu_del_event(struct perf_event *event)
-- 
GitLab


From caa841360134f863987f2d4f77b8dc2fbb7596f8 Mon Sep 17 00:00:00 2001
From: Nadav Amit <nadav.amit@gmail.com>
Date: Sat, 4 May 2019 18:11:24 -0700
Subject: [PATCH 1856/1861] x86/mm: Initialize PGD cache during mm
 initialization

Poking-mm initialization might require to duplicate the PGD in early
stage. Initialize the PGD cache earlier to prevent boot failures.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 4fc19708b165 ("x86/alternatives: Initialize temporary mm for patching")
Link: http://lkml.kernel.org/r/20190505011124.39692-1-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pgtable.c         | 10 ++++++----
 include/asm-generic/pgtable.h |  2 ++
 init/main.c                   |  3 +++
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7bd01709a0914..c8177045b7d4a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -373,14 +373,14 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
 
 static struct kmem_cache *pgd_cache;
 
-static int __init pgd_cache_init(void)
+void __init pgd_cache_init(void)
 {
 	/*
 	 * When PAE kernel is running as a Xen domain, it does not use
 	 * shared kernel pmd. And this requires a whole page for pgd.
 	 */
 	if (!SHARED_KERNEL_PMD)
-		return 0;
+		return;
 
 	/*
 	 * when PAE kernel is not running as a Xen domain, it uses
@@ -390,9 +390,7 @@ static int __init pgd_cache_init(void)
 	 */
 	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
 				      SLAB_PANIC, NULL);
-	return 0;
 }
-core_initcall(pgd_cache_init);
 
 static inline pgd_t *_pgd_alloc(void)
 {
@@ -420,6 +418,10 @@ static inline void _pgd_free(pgd_t *pgd)
 }
 #else
 
+void __init pgd_cache_init(void)
+{
+}
+
 static inline pgd_t *_pgd_alloc(void)
 {
 	return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index fa782fba51eeb..75d9d68a6de7a 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1126,6 +1126,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 static inline void init_espfix_bsp(void) { }
 #endif
 
+extern void __init pgd_cache_init(void);
+
 #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
 static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
 {
diff --git a/init/main.c b/init/main.c
index 95dd9406ee31e..9dc2f3b4f7532 100644
--- a/init/main.c
+++ b/init/main.c
@@ -506,6 +506,8 @@ void __init __weak mem_encrypt_init(void) { }
 
 void __init __weak poking_init(void) { }
 
+void __init __weak pgd_cache_init(void) { }
+
 bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
@@ -537,6 +539,7 @@ static void __init mm_init(void)
 	init_espfix_bsp();
 	/* Should be run after espfix64 is set up. */
 	pti_init();
+	pgd_cache_init();
 }
 
 void __init __weak arch_call_rest_init(void)
-- 
GitLab


From e93c9c99a629c61837d5a7fc2120cd2b6c70dbdd Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 5 May 2019 17:42:58 -0700
Subject: [PATCH 1857/1861] Linux 5.1

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 633d1196bf005..26c92f892d24b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
-- 
GitLab


From fdd20ec8786ab2950439c7e78871618f7e51f18b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 6 May 2019 10:46:41 +0200
Subject: [PATCH 1858/1861] Documentation/features/time: Mark m68k having
 modern-timekeeping

M68k no longer uses arch_gettimeoffset.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 Documentation/features/time/modern-timekeeping/arch-support.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/features/time/modern-timekeeping/arch-support.txt b/Documentation/features/time/modern-timekeeping/arch-support.txt
index 2855dfe2464d4..1d46da165b75e 100644
--- a/Documentation/features/time/modern-timekeeping/arch-support.txt
+++ b/Documentation/features/time/modern-timekeeping/arch-support.txt
@@ -15,7 +15,7 @@
     |       h8300: |  ok  |
     |     hexagon: |  ok  |
     |        ia64: |  ok  |
-    |        m68k: | TODO |
+    |        m68k: |  ok  |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
     |       nds32: |  ok  |
-- 
GitLab


From f0996bc2978e02d2ea898101462b960f6119b18f Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 6 May 2019 13:45:26 +0300
Subject: [PATCH 1859/1861] ubsan: Fix nasty -Wbuiltin-declaration-mismatch
 GCC-9 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building lib/ubsan.c with gcc-9 results in a ton of nasty warnings like
this one:

    lib/ubsan.c warning: conflicting types for built-in function
         ‘__ubsan_handle_negate_overflow’; expected ‘void(void *, void *)’ [-Wbuiltin-declaration-mismatch]

The kernel's declarations of __ubsan_handle_*() often uses 'unsigned
long' types in parameters while GCC these parameters as 'void *' types,
hence the mismatch.

Fix this by using 'void *' to match GCC's declarations.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Fixes: c6d308534aef ("UBSAN: run-time undefined behavior sanity checker")
Cc: <stable@vger.kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/ubsan.c | 49 +++++++++++++++++++++++--------------------------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/lib/ubsan.c b/lib/ubsan.c
index e4162f59a81cc..1e9e2ab255396 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -86,11 +86,13 @@ static bool is_inline_int(struct type_descriptor *type)
 	return bits <= inline_bits;
 }
 
-static s_max get_signed_val(struct type_descriptor *type, unsigned long val)
+static s_max get_signed_val(struct type_descriptor *type, void *val)
 {
 	if (is_inline_int(type)) {
 		unsigned extra_bits = sizeof(s_max)*8 - type_bit_width(type);
-		return ((s_max)val) << extra_bits >> extra_bits;
+		unsigned long ulong_val = (unsigned long)val;
+
+		return ((s_max)ulong_val) << extra_bits >> extra_bits;
 	}
 
 	if (type_bit_width(type) == 64)
@@ -99,15 +101,15 @@ static s_max get_signed_val(struct type_descriptor *type, unsigned long val)
 	return *(s_max *)val;
 }
 
-static bool val_is_negative(struct type_descriptor *type, unsigned long val)
+static bool val_is_negative(struct type_descriptor *type, void *val)
 {
 	return type_is_signed(type) && get_signed_val(type, val) < 0;
 }
 
-static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val)
+static u_max get_unsigned_val(struct type_descriptor *type, void *val)
 {
 	if (is_inline_int(type))
-		return val;
+		return (unsigned long)val;
 
 	if (type_bit_width(type) == 64)
 		return *(u64 *)val;
@@ -116,7 +118,7 @@ static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val)
 }
 
 static void val_to_string(char *str, size_t size, struct type_descriptor *type,
-	unsigned long value)
+			void *value)
 {
 	if (type_is_int(type)) {
 		if (type_bit_width(type) == 128) {
@@ -163,8 +165,8 @@ static void ubsan_epilogue(unsigned long *flags)
 	current->in_ubsan--;
 }
 
-static void handle_overflow(struct overflow_data *data, unsigned long lhs,
-			unsigned long rhs, char op)
+static void handle_overflow(struct overflow_data *data, void *lhs,
+			void *rhs, char op)
 {
 
 	struct type_descriptor *type = data->type;
@@ -191,8 +193,7 @@ static void handle_overflow(struct overflow_data *data, unsigned long lhs,
 }
 
 void __ubsan_handle_add_overflow(struct overflow_data *data,
-				unsigned long lhs,
-				unsigned long rhs)
+				void *lhs, void *rhs)
 {
 
 	handle_overflow(data, lhs, rhs, '+');
@@ -200,23 +201,21 @@ void __ubsan_handle_add_overflow(struct overflow_data *data,
 EXPORT_SYMBOL(__ubsan_handle_add_overflow);
 
 void __ubsan_handle_sub_overflow(struct overflow_data *data,
-				unsigned long lhs,
-				unsigned long rhs)
+				void *lhs, void *rhs)
 {
 	handle_overflow(data, lhs, rhs, '-');
 }
 EXPORT_SYMBOL(__ubsan_handle_sub_overflow);
 
 void __ubsan_handle_mul_overflow(struct overflow_data *data,
-				unsigned long lhs,
-				unsigned long rhs)
+				void *lhs, void *rhs)
 {
 	handle_overflow(data, lhs, rhs, '*');
 }
 EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
 
 void __ubsan_handle_negate_overflow(struct overflow_data *data,
-				unsigned long old_val)
+				void *old_val)
 {
 	unsigned long flags;
 	char old_val_str[VALUE_LENGTH];
@@ -237,8 +236,7 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
 
 
 void __ubsan_handle_divrem_overflow(struct overflow_data *data,
-				unsigned long lhs,
-				unsigned long rhs)
+				void *lhs, void *rhs)
 {
 	unsigned long flags;
 	char rhs_val_str[VALUE_LENGTH];
@@ -323,7 +321,7 @@ static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
 }
 
 void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
-				unsigned long ptr)
+				void *ptr)
 {
 	struct type_mismatch_data_common common_data = {
 		.location = &data->location,
@@ -332,12 +330,12 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
 		.type_check_kind = data->type_check_kind
 	};
 
-	ubsan_type_mismatch_common(&common_data, ptr);
+	ubsan_type_mismatch_common(&common_data, (unsigned long)ptr);
 }
 EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
 
 void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
-				unsigned long ptr)
+				void *ptr)
 {
 
 	struct type_mismatch_data_common common_data = {
@@ -347,12 +345,12 @@ void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
 		.type_check_kind = data->type_check_kind
 	};
 
-	ubsan_type_mismatch_common(&common_data, ptr);
+	ubsan_type_mismatch_common(&common_data, (unsigned long)ptr);
 }
 EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
 
 void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data,
-					unsigned long bound)
+					void *bound)
 {
 	unsigned long flags;
 	char bound_str[VALUE_LENGTH];
@@ -369,8 +367,7 @@ void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data,
 }
 EXPORT_SYMBOL(__ubsan_handle_vla_bound_not_positive);
 
-void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data,
-				unsigned long index)
+void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index)
 {
 	unsigned long flags;
 	char index_str[VALUE_LENGTH];
@@ -388,7 +385,7 @@ void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data,
 EXPORT_SYMBOL(__ubsan_handle_out_of_bounds);
 
 void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
-					unsigned long lhs, unsigned long rhs)
+					void *lhs, void *rhs)
 {
 	unsigned long flags;
 	struct type_descriptor *rhs_type = data->rhs_type;
@@ -439,7 +436,7 @@ void __ubsan_handle_builtin_unreachable(struct unreachable_data *data)
 EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
 
 void __ubsan_handle_load_invalid_value(struct invalid_value_data *data,
-				unsigned long val)
+				void *val)
 {
 	unsigned long flags;
 	char val_str[VALUE_LENGTH];
-- 
GitLab


From 9a91ad929f9a719c0c734abe791a27ab9444cd61 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 6 May 2019 13:45:27 +0300
Subject: [PATCH 1860/1861] ubsan: Remove vla bound checks.

The kernel the kernel is built with -Wvla for some time, so is not
supposed to have any variable length arrays.  Remove vla bounds checking
from ubsan since it's useless now.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/ubsan.c            | 18 ------------------
 lib/ubsan.h            |  5 -----
 scripts/Makefile.ubsan |  1 -
 3 files changed, 24 deletions(-)

diff --git a/lib/ubsan.c b/lib/ubsan.c
index 1e9e2ab255396..c4859c2d2f1b2 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -349,24 +349,6 @@ void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
 }
 EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
 
-void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data,
-					void *bound)
-{
-	unsigned long flags;
-	char bound_str[VALUE_LENGTH];
-
-	if (suppress_report(&data->location))
-		return;
-
-	ubsan_prologue(&data->location, &flags);
-
-	val_to_string(bound_str, sizeof(bound_str), data->type, bound);
-	pr_err("variable length array bound value %s <= 0\n", bound_str);
-
-	ubsan_epilogue(&flags);
-}
-EXPORT_SYMBOL(__ubsan_handle_vla_bound_not_positive);
-
 void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index)
 {
 	unsigned long flags;
diff --git a/lib/ubsan.h b/lib/ubsan.h
index f4d8d0bd4016f..b8fa83864467f 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -57,11 +57,6 @@ struct nonnull_arg_data {
 	int arg_index;
 };
 
-struct vla_bound_data {
-	struct source_location location;
-	struct type_descriptor *type;
-};
-
 struct out_of_bounds_data {
 	struct source_location location;
 	struct type_descriptor *array_type;
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 38b2b4818e8eb..019771b845c5f 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -3,7 +3,6 @@ ifdef CONFIG_UBSAN
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=shift)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable)
-      CFLAGS_UBSAN += $(call cc-option, -fsanitize=vla-bound)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size)
-- 
GitLab


From 423ea3255424b954947d167681b71ded1b8fca53 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 6 May 2019 11:28:23 -0700
Subject: [PATCH 1861/1861] tty: rocket: fix incorrect forward declaration of
 'rp_init()'

Make the forward declaration actually match the real function
definition, something that previous versions of gcc had just ignored.

This is another patch to fix new warnings from gcc-9 before I start the
merge window pulls.  I don't want to miss legitimate new warnings just
because my system update brought a new compiler with new warnings.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/rocket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index b121d8f8f3d7d..27aeca30eeae1 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -266,7 +266,7 @@ MODULE_PARM_DESC(pc104_3, "set interface types for ISA(PC104) board #3 (e.g. pc1
 module_param_array(pc104_4, ulong, NULL, 0);
 MODULE_PARM_DESC(pc104_4, "set interface types for ISA(PC104) board #4 (e.g. pc104_4=232,232,485,485,...");
 
-static int rp_init(void);
+static int __init rp_init(void);
 static void rp_cleanup_module(void);
 
 module_init(rp_init);
-- 
GitLab